summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile13
-rw-r--r--usr/src/Makefile.lint3
-rw-r--r--usr/src/Makefile.master25
-rw-r--r--usr/src/Targetdirs12
-rw-r--r--usr/src/cmd/Makefile21
-rw-r--r--usr/src/cmd/Makefile.check3
-rw-r--r--usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel4
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c37
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c22
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c2
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c32
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile1
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c10
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h2
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c8
-rw-r--r--usr/src/cmd/column/Makefile43
-rw-r--r--usr/src/cmd/column/THIRDPARTYLICENSE26
-rw-r--r--usr/src/cmd/column/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/cmd/column/column.c335
-rw-r--r--usr/src/cmd/coreadm/coreadm.xml8
-rw-r--r--usr/src/cmd/cron/Makefile36
-rw-r--r--usr/src/cmd/cron/cron.c154
-rw-r--r--usr/src/cmd/cron/cron.h3
-rw-r--r--usr/src/cmd/cron/crontab.c38
-rw-r--r--usr/src/cmd/ctfdiff/Makefile44
-rw-r--r--usr/src/cmd/ctfdiff/ctfdiff.c220
-rw-r--r--usr/src/cmd/dd/dd.c5
-rw-r--r--usr/src/cmd/devfsadm/Makefile.com1
-rw-r--r--usr/src/cmd/devfsadm/devlink.tab.sh5
-rw-r--r--usr/src/cmd/devfsadm/i386/Makefile3
-rw-r--r--usr/src/cmd/devfsadm/i386/lx_link_i386.c86
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c5
-rw-r--r--usr/src/cmd/dladm/dladm.c134
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_db.c189
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_door.c66
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_impl.h6
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_main.c66
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_util.c17
-rw-r--r--usr/src/cmd/dlmgmtd/svc-dlmgmtd9
-rw-r--r--usr/src/cmd/dlstat/dlstat.c70
-rw-r--r--usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile3
-rw-r--r--usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl2
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/Makefile8
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/java_api/Makefile3
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d4
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh69
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c7
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh68
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d4
-rw-r--r--usr/src/cmd/flowadm/flowadm.c51
-rw-r--r--usr/src/cmd/flowstat/flowstat.c21
-rw-r--r--usr/src/cmd/fs.d/Makefile5
-rw-r--r--usr/src/cmd/fs.d/bootfs/Makefile21
-rw-r--r--usr/src/cmd/fs.d/bootfs/mount.c139
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/Makefile42
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile30
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c244
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/mount/Makefile31
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/mount/mount.c148
-rw-r--r--usr/src/cmd/fs.d/lxproc/Makefile32
-rw-r--r--usr/src/cmd/fs.d/lxproc/mount.c140
-rw-r--r--usr/src/cmd/fs.d/nfs/lib/smfcfg.c20
-rw-r--r--usr/src/cmd/fs.d/nfs/lib/smfcfg.h2
-rw-r--r--usr/src/cmd/fs.d/nfs/mount/mount.c2
-rw-r--r--usr/src/cmd/fs.d/nfs/mountd/mountd.c8
-rw-r--r--usr/src/cmd/fs.d/nfs/nfsd/nfsd.c8
-rw-r--r--usr/src/cmd/fs.d/nfs/svc/nfs-server4
-rw-r--r--usr/src/cmd/fs.d/nfs/umount/umount.c2
-rw-r--r--usr/src/cmd/halt/halt.c19
-rw-r--r--usr/src/cmd/ibd_upgrade/ibd_delete_link.c2
-rw-r--r--usr/src/cmd/init/Makefile10
-rw-r--r--usr/src/cmd/init/init.c40
-rw-r--r--usr/src/cmd/initpkg/mountall.sh5
-rw-r--r--usr/src/cmd/initpkg/shutdown.sh2
-rw-r--r--usr/src/cmd/initpkg/umountall.sh4
-rw-r--r--usr/src/cmd/ipf/lib/common/load_hash.c12
-rw-r--r--usr/src/cmd/ipf/lib/common/load_hashnode.c12
-rw-r--r--usr/src/cmd/ipf/lib/common/load_pool.c12
-rw-r--r--usr/src/cmd/ipf/lib/common/load_poolnode.c12
-rw-r--r--usr/src/cmd/ipf/tools/Makefile.tools15
-rw-r--r--usr/src/cmd/ipf/tools/ipf.c54
-rw-r--r--usr/src/cmd/ipf/tools/ipfs.c48
-rw-r--r--usr/src/cmd/ipf/tools/ipfstat.c52
-rw-r--r--usr/src/cmd/ipf/tools/ipfzone.c135
-rw-r--r--usr/src/cmd/ipf/tools/ipfzone.h17
-rw-r--r--usr/src/cmd/ipf/tools/ipmon.c49
-rw-r--r--usr/src/cmd/ipf/tools/ipnat.c31
-rw-r--r--usr/src/cmd/ipf/tools/ippool.c144
-rw-r--r--usr/src/cmd/krb5/kadmin/Makefile2
-rw-r--r--usr/src/cmd/ksh/Makefile.com2
-rw-r--r--usr/src/cmd/localedef/Makefile14
-rw-r--r--usr/src/cmd/lofiadm/main.c28
-rw-r--r--usr/src/cmd/lp/terminfo/40.ti68
-rw-r--r--usr/src/cmd/lp/terminfo/44x.ti57
-rw-r--r--usr/src/cmd/lp/terminfo/45x.ti42
-rw-r--r--usr/src/cmd/lp/terminfo/477.ti181
-rw-r--r--usr/src/cmd/lp/terminfo/47x.ti134
-rw-r--r--usr/src/cmd/lp/terminfo/495.ti78
-rw-r--r--usr/src/cmd/lp/terminfo/53x0.ti81
-rw-r--r--usr/src/cmd/lp/terminfo/57x.ti105
-rw-r--r--usr/src/cmd/lp/terminfo/58x.ti103
-rw-r--r--usr/src/cmd/lp/terminfo/593.ti73
-rw-r--r--usr/src/cmd/lp/terminfo/Makefile77
-rw-r--r--usr/src/cmd/lp/terminfo/PS.ti36
-rw-r--r--usr/src/cmd/lp/terminfo/citoh.ti127
-rw-r--r--usr/src/cmd/lp/terminfo/daisy.ti126
-rw-r--r--usr/src/cmd/lp/terminfo/dec.ti130
-rw-r--r--usr/src/cmd/lp/terminfo/epson.ti301
-rw-r--r--usr/src/cmd/lp/terminfo/hplaser.ti122
-rw-r--r--usr/src/cmd/lp/terminfo/ibm.ti353
-rw-r--r--usr/src/cmd/lp/terminfo/unknown.ti30
-rw-r--r--usr/src/cmd/mdb/Makefile.common4
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_ctf.c1
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_demangle.c11
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_main.c13
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_modapi.h27
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_print.c45
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_tab.c24
-rw-r--r--usr/src/cmd/mdb/common/modules/ctf/mdb_modctf.c85
-rw-r--r--usr/src/cmd/mdb/common/modules/dtrace/dtrace.c3
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/genunix.c108
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/netstack.c30
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/netstack.h3
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/zone.c67
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/zone.h4
-rw-r--r--usr/src/cmd/mdb/common/modules/libc/libc.c28
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/mdb_v8.c4606
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c728
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/v8cfg.h55
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/v8dbg.h83
-rw-r--r--usr/src/cmd/mdb/i86pc/modules/unix/unix.c141
-rw-r--r--usr/src/cmd/mdb/intel/amd64/libctf/Makefile26
-rw-r--r--usr/src/cmd/mdb/intel/amd64/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/intel/amd64/v8/Makefile45
-rw-r--r--usr/src/cmd/mdb/intel/ia32/libctf/Makefile25
-rw-r--r--usr/src/cmd/mdb/intel/ia32/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/intel/ia32/v8/Makefile44
-rw-r--r--usr/src/cmd/mdb/sparc/v7/libctf/Makefile25
-rw-r--r--usr/src/cmd/mdb/sparc/v7/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/sparc/v9/libctf/Makefile26
-rw-r--r--usr/src/cmd/mdb/sparc/v9/libumem/Makefile1
-rwxr-xr-x[-rw-r--r--]usr/src/cmd/mdb/test/mtest.sh0
-rw-r--r--usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out0
-rw-r--r--usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out0
-rw-r--r--usr/src/cmd/nicstat/Makefile31
-rw-r--r--usr/src/cmd/nicstat/nicstat.pl422
-rw-r--r--usr/src/cmd/nscd/Makefile1
-rw-r--r--usr/src/cmd/nscd/svc-nscd4
-rw-r--r--usr/src/cmd/passwd/Makefile2
-rw-r--r--usr/src/cmd/pgrep/pgrep.c8
-rw-r--r--usr/src/cmd/prstat/prstat.c48
-rw-r--r--usr/src/cmd/prstat/prstat.h2
-rw-r--r--usr/src/cmd/prstat/prutil.c3
-rw-r--r--usr/src/cmd/prtconf/prtconf.c52
-rw-r--r--usr/src/cmd/prtconf/prtconf.h1
-rw-r--r--usr/src/cmd/ptools/Makefile.bld41
-rw-r--r--usr/src/cmd/ptools/common/ptools_common.c50
-rw-r--r--usr/src/cmd/ptools/common/ptools_common.h36
-rw-r--r--usr/src/cmd/ptools/pargs/pargs.c7
-rw-r--r--usr/src/cmd/ptools/pfiles/pfiles.c33
-rw-r--r--usr/src/cmd/ptools/pmap/pmap.c10
-rw-r--r--usr/src/cmd/ptools/pmap/pmap_common.c5
-rw-r--r--usr/src/cmd/ptools/preap/preap.c8
-rw-r--r--usr/src/cmd/ptools/psig/psig.c33
-rw-r--r--usr/src/cmd/ptools/ptime/ptime.c118
-rw-r--r--usr/src/cmd/ptools/ptree/ptree.c17
-rw-r--r--usr/src/cmd/ptools/pwait/pwait.c29
-rw-r--r--usr/src/cmd/ptools/pwdx/pwdx.c7
-rw-r--r--usr/src/cmd/rcap/common/utils.c75
-rw-r--r--usr/src/cmd/rcap/common/utils.h2
-rw-r--r--usr/src/cmd/rcap/rcapadm/rcapadm.c16
-rw-r--r--usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c141
-rw-r--r--usr/src/cmd/rcap/rcapd/rcapd_scanner.c3
-rw-r--r--usr/src/cmd/rcap/rcapstat/rcapstat.c7
-rw-r--r--usr/src/cmd/rcm_daemon/Makefile.com4
-rw-r--r--usr/src/cmd/sed/main.c27
-rw-r--r--usr/src/cmd/sendmail/src/Makefile2
-rw-r--r--usr/src/cmd/sgs/elfdump/Makefile.targ2
-rw-r--r--usr/src/cmd/sgs/elfdump/amd64/Makefile2
-rw-r--r--usr/src/cmd/sgs/elfdump/i386/Makefile2
-rw-r--r--usr/src/cmd/sgs/include/debug.h3
-rw-r--r--usr/src/cmd/sgs/include/rtld.h3
-rw-r--r--usr/src/cmd/sgs/lex/Makefile.targ2
-rw-r--r--usr/src/cmd/sgs/lex/common/main.c45
-rw-r--r--usr/src/cmd/sgs/lex/common/once.h12
-rw-r--r--usr/src/cmd/sgs/libconv/common/corenote.c24
-rw-r--r--usr/src/cmd/sgs/libconv/common/corenote.msg6
-rw-r--r--usr/src/cmd/sgs/liblddbg/common/liblddbg.msg2
-rw-r--r--usr/src/cmd/sgs/liblddbg/common/llib-llddbg2
-rw-r--r--usr/src/cmd/sgs/liblddbg/common/mapfile-vers2
-rw-r--r--usr/src/cmd/sgs/liblddbg/common/util.c13
-rw-r--r--usr/src/cmd/sgs/librtld_db/common/librtld_db.msg6
-rw-r--r--usr/src/cmd/sgs/librtld_db/common/rd_elf.c28
-rw-r--r--usr/src/cmd/sgs/rtld/common/_rtld.h5
-rw-r--r--usr/src/cmd/sgs/rtld/common/analyze.c50
-rw-r--r--usr/src/cmd/sgs/rtld/common/globals.c3
-rw-r--r--usr/src/cmd/sgs/rtld/common/rtld.msg7
-rw-r--r--usr/src/cmd/sgs/rtld/common/setup.c10
-rw-r--r--usr/src/cmd/sgs/rtld/common/util.c39
-rw-r--r--usr/src/cmd/sgs/yacc/Makefile.targ2
-rw-r--r--usr/src/cmd/sgs/yacc/common/dextern.h19
-rw-r--r--usr/src/cmd/sgs/yacc/common/y1.c36
-rw-r--r--usr/src/cmd/sgs/yacc/common/y2.c7
-rw-r--r--usr/src/cmd/ssh/Makefile2
-rw-r--r--usr/src/cmd/ssh/include/key.h15
-rw-r--r--usr/src/cmd/ssh/include/servconf.h1
-rw-r--r--usr/src/cmd/ssh/libssh/common/canohost.c35
-rw-r--r--usr/src/cmd/ssh/libssh/common/key.c17
-rw-r--r--usr/src/cmd/ssh/sftp-server/Makefile2
-rw-r--r--usr/src/cmd/ssh/sftp/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh-add/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh-agent/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh-keygen/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c277
-rw-r--r--usr/src/cmd/ssh/ssh-keyscan/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh-keysign/Makefile2
-rw-r--r--usr/src/cmd/ssh/ssh/Makefile2
-rw-r--r--usr/src/cmd/ssh/sshd/Makefile2
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-pubkey.c109
-rw-r--r--usr/src/cmd/ssh/sshd/servconf.c9
-rw-r--r--usr/src/cmd/ssh/sshd/sshlogin.c3
-rw-r--r--usr/src/cmd/stat/Makefile11
-rw-r--r--usr/src/cmd/stat/arcstat/Makefile1
-rw-r--r--[-rwxr-xr-x]usr/src/cmd/stat/arcstat/arcstat.pl0
-rw-r--r--usr/src/cmd/stat/vfsstat/Makefile41
-rw-r--r--usr/src/cmd/stat/vfsstat/vfsstat.pl227
-rw-r--r--usr/src/cmd/stat/ziostat/Makefile41
-rwxr-xr-xusr/src/cmd/stat/ziostat/ziostat.pl204
-rw-r--r--usr/src/cmd/svc/configd/rc_node.c3
-rw-r--r--usr/src/cmd/svc/milestone/net-routing-setup23
-rw-r--r--usr/src/cmd/svc/milestone/network-location.xml2
-rw-r--r--usr/src/cmd/svc/milestone/network-routing-setup.xml14
-rw-r--r--usr/src/cmd/svc/milestone/network.xml8
-rw-r--r--usr/src/cmd/svc/milestone/single-user.xml2
-rw-r--r--usr/src/cmd/svc/shell/smf_include.sh6
-rw-r--r--usr/src/cmd/svc/startd/graph.c26
-rw-r--r--usr/src/cmd/svc/startd/method.c96
-rw-r--r--usr/src/cmd/svc/startd/startd.c130
-rw-r--r--usr/src/cmd/svc/startd/startd.h4
-rw-r--r--usr/src/cmd/svc/svcadm/Makefile7
-rw-r--r--usr/src/cmd/svc/svccfg/svccfg_libscf.c267
-rw-r--r--usr/src/cmd/svc/svcs/Makefile2
-rw-r--r--usr/src/cmd/svc/svcs/explain.c4
-rw-r--r--usr/src/cmd/svc/svcs/svcs.c94
-rw-r--r--usr/src/cmd/svr4pkg/pkgadd/Makefile2
-rw-r--r--usr/src/cmd/svr4pkg/pkgadm/Makefile2
-rw-r--r--usr/src/cmd/tail/Makefile5
-rw-r--r--usr/src/cmd/truss/codes.c4
-rw-r--r--usr/src/cmd/truss/print.c13
-rw-r--r--usr/src/cmd/truss/systable.c8
-rw-r--r--usr/src/cmd/vi/port/Makefile3
-rw-r--r--usr/src/cmd/vi/port/ex_cmdsub.c2
-rw-r--r--usr/src/cmd/vndadm/Makefile65
-rw-r--r--usr/src/cmd/vndadm/test/Makefile19
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.com43
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.subdirs29
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.targ59
-rw-r--r--usr/src/cmd/vndadm/test/scripts/Makefile28
-rwxr-xr-xusr/src/cmd/vndadm/test/scripts/vndtest.ksh300
-rw-r--r--usr/src/cmd/vndadm/test/tst/Makefile18
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/Makefile34
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh33
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh30
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out2
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh25
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh34
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh25
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/Makefile27
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh31
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh29
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh27
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh27
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/Makefile49
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c67
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c119
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c82
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.link.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c90
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c96
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c70
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c75
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c77
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c77
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c83
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c79
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c78
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c100
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh54
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c88
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/Makefile44
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badlink.c39
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badzone.c43
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.basic.c49
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.enomem.c91
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c80
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.open.c56
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.propiter.c79
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/err.badclose.c33
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c49
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c30
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out37
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c50
-rw-r--r--usr/src/cmd/vndadm/vndadm.c872
-rw-r--r--usr/src/cmd/vndstat/Makefile33
-rw-r--r--usr/src/cmd/vndstat/vndstat.c542
-rw-r--r--usr/src/cmd/zfs/zfs_main.c142
-rw-r--r--usr/src/cmd/zlogin/zlogin.c68
-rw-r--r--usr/src/cmd/zoneadm/Makefile11
-rw-r--r--usr/src/cmd/zoneadm/svc-zones2
-rw-r--r--usr/src/cmd/zoneadm/zfs.c16
-rw-r--r--usr/src/cmd/zoneadm/zoneadm.c190
-rw-r--r--usr/src/cmd/zoneadm/zones.xml10
-rw-r--r--usr/src/cmd/zoneadmd/Makefile49
-rw-r--r--usr/src/cmd/zoneadmd/Makefile.com70
-rw-r--r--usr/src/cmd/zoneadmd/amd64/Makefile31
-rw-r--r--usr/src/cmd/zoneadmd/i386/Makefile30
-rw-r--r--usr/src/cmd/zoneadmd/mcap.c1193
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c437
-rw-r--r--usr/src/cmd/zoneadmd/zcons.c131
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.c583
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.h20
-rw-r--r--usr/src/cmd/zonecfg/Makefile3
-rw-r--r--usr/src/cmd/zonecfg/zonecfg.c889
-rw-r--r--usr/src/cmd/zonecfg/zonecfg.h13
-rw-r--r--usr/src/cmd/zonecfg/zonecfg_grammar.y89
-rw-r--r--usr/src/cmd/zonecfg/zonecfg_lex.l29
-rw-r--r--usr/src/cmd/zonename/Makefile10
-rw-r--r--usr/src/cmd/zonestat/zonestatd/zonestatd.c11
-rw-r--r--usr/src/cmd/zpool/zpool_main.c1
-rw-r--r--usr/src/common/brand/lx/lx_signum.c244
-rw-r--r--usr/src/common/brand/lx/lx_signum.h81
-rw-r--r--usr/src/common/brand/lx/lx_syscall.h47
-rw-r--r--usr/src/common/ctf/ctf_create.c2
-rw-r--r--usr/src/common/ctf/ctf_hash.c160
-rw-r--r--usr/src/common/ctf/ctf_impl.h6
-rw-r--r--usr/src/common/ctf/ctf_types.c64
-rw-r--r--usr/src/common/fs/bootfsops.c329
-rw-r--r--usr/src/common/util/string.c7
-rw-r--r--usr/src/common/util/string.h1
-rw-r--r--usr/src/common/util/strtolctype.h4
-rw-r--r--usr/src/common/zfs/zfs_prop.c17
-rw-r--r--usr/src/common/zfs/zfs_prop.h2
-rw-r--r--usr/src/grub/Makefile6
-rw-r--r--usr/src/grub/Makefile.grub1
-rw-r--r--usr/src/grub/grub-0.97/stage2/boot.c13
-rw-r--r--usr/src/grub/grub-0.97/stage2/cmdline.c23
-rw-r--r--usr/src/head/Makefile2
-rw-r--r--usr/src/head/libzonecfg.h47
-rw-r--r--usr/src/head/netdb.h5
-rw-r--r--usr/src/head/regexp.h12
-rw-r--r--usr/src/head/resolv_joy.h472
-rw-r--r--usr/src/head/zdoor.h74
-rw-r--r--usr/src/head/zone.h4
-rw-r--r--usr/src/lib/Makefile35
-rw-r--r--usr/src/lib/Makefile.astmsg4
-rw-r--r--usr/src/lib/Makefile.lib12
-rw-r--r--usr/src/lib/Makefile.targ3
-rw-r--r--usr/src/lib/brand/Makefile5
-rw-r--r--usr/src/lib/brand/lx/Makefile55
-rw-r--r--usr/src/lib/brand/lx/Makefile.lx34
-rw-r--r--usr/src/lib/brand/lx/cmd/Makefile48
-rwxr-xr-xusr/src/lib/brand/lx/cmd/ifconfig.sh172
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_isaexec_wrapper.sh59
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_lockd.sh36
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_native.sh29
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_statd.sh36
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_thunk.sh29
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile54
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile.com83
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/Makefile38
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers44
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c851
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/mapfile-vers58
-rw-r--r--usr/src/lib/brand/lx/librtld_db/i386/Makefile33
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile49
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile.com103
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/Makefile51
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s62
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s428
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_runexe.s46
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/offsets.in43
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/capabilities.c484
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clock.c303
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c628
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/debug.c154
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/dir.c275
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fcntl.c455
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/file.c918
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fork.c72
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/id.c271
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/iovec.c283
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c1775
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c1033
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile-vers47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mem.c381
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/misc.c1109
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/module.c90
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mount.c748
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/open.c204
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/pgrp.c172
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/poll_select.c269
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/priority.c95
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ptrace.c2357
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/rlimit.c566
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sched.c624
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sendfile.c125
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/signal.c2326
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/socket.c2144
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/stat.c551
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/statfs.c343
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysctl.c137
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c947
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/time.c183
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/truncate.c173
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/wait.c329
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/xattr.c47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/Makefile57
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s384
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s60
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/offsets.in40
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h48
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h125
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h197
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h142
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h406
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h73
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h352
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h116
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h81
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h396
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h222
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h55
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h143
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_types.h109
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/Makefile52
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com68
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile35
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c479
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers51
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile34
-rw-r--r--usr/src/lib/brand/lx/lx_support/Makefile54
-rw-r--r--usr/src/lib/brand/lx/lx_support/lx_support.c542
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/Makefile52
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/Makefile.com74
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/amd64/Makefile34
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c1115
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers57
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/i386/Makefile33
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h56
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/Makefile39
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/Makefile.com71
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/amd64/Makefile34
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s78
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers59
-rw-r--r--usr/src/lib/brand/lx/netfiles/Makefile48
-rw-r--r--usr/src/lib/brand/lx/netfiles/etc_netconfig38
-rw-r--r--usr/src/lib/brand/lx/testing/Makefile36
-rw-r--r--usr/src/lib/brand/lx/testing/Readme_ltp314
-rw-r--r--usr/src/lib/brand/lx/testing/ltp_skiplist256
-rw-r--r--usr/src/lib/brand/lx/zone/Makefile75
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx.xml34
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx26.xml35
-rw-r--r--usr/src/lib/brand/lx/zone/config.xml104
-rw-r--r--usr/src/lib/brand/lx/zone/distros/Makefile50
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos35.distro66
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos36.distro66
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos37.distro65
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos38.distro79
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel35.distro98
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel36.distro97
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel37.distro96
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel38.distro109
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel_centos_common1016
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot.ksh212
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh222
-rw-r--r--usr/src/lib/brand/lx/zone/lx_distro_install.ksh2772
-rw-r--r--usr/src/lib/brand/lx/zone/lx_init_zone.ksh325
-rw-r--r--usr/src/lib/brand/lx/zone/lx_init_zone_debian.ksh242
-rw-r--r--usr/src/lib/brand/lx/zone/lx_init_zone_redhat.ksh453
-rw-r--r--usr/src/lib/brand/lx/zone/lx_init_zone_ubuntu.ksh25
-rw-r--r--usr/src/lib/brand/lx/zone/lx_install.ksh579
-rw-r--r--usr/src/lib/brand/lx/zone/lx_networking.ksh74
-rw-r--r--usr/src/lib/brand/lx/zone/platform.xml141
-rw-r--r--usr/src/lib/brand/shared/zone/common.ksh98
-rw-r--r--usr/src/lib/brand/shared/zone/uninstall.ksh50
-rw-r--r--usr/src/lib/brand/sngl/Makefile51
-rw-r--r--usr/src/lib/brand/sngl/Makefile.sngl28
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/Makefile47
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/Makefile.com76
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/amd64/Makefile48
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/common/mapfile-vers48
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/common/sngl_brand.c429
-rw-r--r--usr/src/lib/brand/sngl/sngl_brand/i386/Makefile46
-rw-r--r--usr/src/lib/brand/sngl/zone/Makefile53
-rw-r--r--usr/src/lib/brand/sngl/zone/config.xml112
-rw-r--r--usr/src/lib/brand/sngl/zone/platform.xml161
-rw-r--r--usr/src/lib/brand/sngl/zone/sinstall.sh31
-rw-r--r--usr/src/lib/common/i386/crtn.s1
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/libtopo.h5
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/mapfile-vers2
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_mod.h5
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_string.c64
-rwxr-xr-x[-rw-r--r--]usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh10
-rw-r--r--usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml4
-rw-r--r--usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c65
-rw-r--r--usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c51
-rw-r--r--usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com2
-rw-r--r--usr/src/lib/libbe/common/be_list.c1
-rw-r--r--usr/src/lib/libbrand/common/libbrand.c8
-rw-r--r--usr/src/lib/libbsm/common/adt.c102
-rw-r--r--usr/src/lib/libc/amd64/Makefile4
-rw-r--r--usr/src/lib/libc/i386/Makefile.com4
-rw-r--r--usr/src/lib/libc/inc/thr_inlines.h12
-rw-r--r--usr/src/lib/libc/inc/thr_uberdata.h4
-rw-r--r--usr/src/lib/libc/port/gen/getauxv.c15
-rw-r--r--usr/src/lib/libc/port/gen/getlogin.c1
-rw-r--r--usr/src/lib/libc/port/gen/psiginfo.c34
-rw-r--r--usr/src/lib/libc/port/gen/psignal.c9
-rw-r--r--usr/src/lib/libc/port/mapfile-vers12
-rw-r--r--usr/src/lib/libc/port/sys/epoll.c207
-rw-r--r--usr/src/lib/libc/port/sys/inotify.c142
-rw-r--r--usr/src/lib/libc/port/sys/zone.c13
-rw-r--r--usr/src/lib/libc/port/threads/sigaction.c12
-rw-r--r--usr/src/lib/libc/port/threads/thr.c29
-rw-r--r--usr/src/lib/libc/sparc/Makefile.com4
-rw-r--r--usr/src/lib/libc/sparc/crt/_rtld.c9
-rw-r--r--usr/src/lib/libc/sparcv9/Makefile.com3
-rw-r--r--usr/src/lib/libc_db/common/thread_db.c30
-rw-r--r--usr/src/lib/libctf/Makefile.com41
-rw-r--r--usr/src/lib/libctf/Makefile.shared.com67
-rw-r--r--usr/src/lib/libctf/Makefile.shared.targ22
-rw-r--r--usr/src/lib/libctf/common/ctf_diff.c783
-rw-r--r--usr/src/lib/libctf/common/libctf.h20
-rw-r--r--usr/src/lib/libctf/common/mapfile-vers17
-rw-r--r--usr/src/lib/libcurses/screen/setupterm.c7
-rw-r--r--usr/src/lib/libdladm/common/libdladm.h6
-rw-r--r--usr/src/lib/libdladm/common/libdllink.c28
-rw-r--r--usr/src/lib/libdladm/common/libdllink.h6
-rw-r--r--usr/src/lib/libdladm/common/libdlmgmt.c19
-rw-r--r--usr/src/lib/libdladm/common/libdlvnic.c29
-rw-r--r--usr/src/lib/libdladm/common/linkprop.c69
-rw-r--r--usr/src/lib/libdladm/common/mapfile-vers2
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi.c61
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi.h2
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi_impl.h3
-rw-r--r--usr/src/lib/libdlpi/common/mapfile-vers5
-rw-r--r--usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java20
-rw-r--r--usr/src/lib/libdtrace/Makefile.com10
-rw-r--r--usr/src/lib/libdtrace/common/dt_impl.h1
-rw-r--r--usr/src/lib/libdtrace/common/dt_open.c27
-rw-r--r--usr/src/lib/libdtrace/common/dt_options.c73
-rw-r--r--usr/src/lib/libdtrace/common/dt_program.c9
-rw-r--r--usr/src/lib/libdtrace/common/dt_work.c32
-rw-r--r--usr/src/lib/libdtrace/common/dtrace.h1
-rw-r--r--usr/src/lib/libdtrace/common/mac.d.in66
-rw-r--r--usr/src/lib/libdtrace/common/mac.sed.in45
-rw-r--r--usr/src/lib/libdtrace/common/vnd.d28
-rw-r--r--usr/src/lib/libgrubmgmt/common/libgrub_fs.c2
-rw-r--r--usr/src/lib/libipadm/common/libipadm.c11
-rw-r--r--usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com4
-rw-r--r--usr/src/lib/libnisdb/db_mindex3.cc4
-rw-r--r--usr/src/lib/libnisdb/db_table.cc4
-rw-r--r--usr/src/lib/libnisdb/nis_db.cc8
-rw-r--r--usr/src/lib/libnsl/nss/getipnodeby.c26
-rw-r--r--usr/src/lib/libpkg/Makefile.com2
-rw-r--r--usr/src/lib/libproc/common/Pcontrol.c6
-rw-r--r--usr/src/lib/libproc/common/Pzone.c3
-rw-r--r--usr/src/lib/libresolv2_joy/Makefile76
-rw-r--r--usr/src/lib/libresolv2_joy/Makefile.com162
-rw-r--r--usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE185
-rw-r--r--usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/lib/libresolv2_joy/amd64/Makefile31
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/daemon.c81
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c64
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c62
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/mktemp.c156
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/putenv.c27
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/readv.c39
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/setenv.c151
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/setitimer.c29
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c124
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strdup.c20
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strerror.c94
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c70
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strsep.c88
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strtoul.c119
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/utimes.c40
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/writev.c89
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/dst_api.c1048
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/dst_internal.h155
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/hmac_link.c491
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/support.c342
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c263
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c277
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_data.c46
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c65
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c68
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c279
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c407
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_neta.c89
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_netof.c64
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_network.c106
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c111
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns.c154
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_ho.c1143
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_nw.c591
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_p.h52
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_pr.c268
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_sv.c300
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c105
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen.c459
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_ho.c391
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_ng.c174
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_nw.c264
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_p.h113
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_pr.c228
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_sv.c229
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c1253
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gethostent.c1096
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c278
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c334
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetent.c345
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c234
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c160
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c219
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getprotoent.c176
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c223
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getservent.c179
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getservent_r.c242
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/hesiod.c505
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h48
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp.c583
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_ho.c405
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_ng.c253
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_nw.c348
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_p.h60
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_pr.c327
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_sv.c346
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c2301
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_data.c254
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_data.h63
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_p.h51
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl.c142
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c578
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c446
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c373
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_p.h51
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c294
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c432
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nis.c156
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nis_p.h47
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nul_ng.c127
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/pathnames.h52
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/util.c109
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/assertions.c94
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/base64.c333
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/bitncmp.c68
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c620
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_p.c188
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_p.h28
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c787
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_connects.c369
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_files.c277
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_streams.c308
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_timers.c499
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_waits.c247
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/eventlib.c933
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h281
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/heap.c236
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/hex.c119
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/logging.c716
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/logging_p.h61
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/memcluster.c588
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/movefile.c43
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/tree.c534
-rw-r--r--usr/src/lib/libresolv2_joy/common/llib-lresolv_joy59
-rw-r--r--usr/src/lib/libresolv2_joy/common/mapfile-vers60
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_date.c129
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_name.c1153
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c61
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c273
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c275
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_print.c1242
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c300
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c207
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c387
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c162
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c484
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/herror.c129
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c135
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_comp.c287
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_data.c322
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_debug.c1252
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_debug.h35
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c722
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_init.c958
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c386
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c1163
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h25
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_private.h22
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_query.c440
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_send.c1120
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c170
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_update.c213
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c126
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c249
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c23
-rw-r--r--usr/src/lib/libresolv2_joy/i386/Makefile30
-rw-r--r--usr/src/lib/libresolv2_joy/include/Makefile60
-rw-r--r--usr/src/lib/libresolv2_joy/include/arpa/port_inet.h41
-rw-r--r--usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h40
-rw-r--r--usr/src/lib/libresolv2_joy/include/conf/sunoptions.h30
-rw-r--r--usr/src/lib/libresolv2_joy/include/config.h75
-rw-r--r--usr/src/lib/libresolv2_joy/include/err.h62
-rw-r--r--usr/src/lib/libresolv2_joy/include/fd_setsize.h10
-rw-r--r--usr/src/lib/libresolv2_joy/include/hesiod.h39
-rw-r--r--usr/src/lib/libresolv2_joy/include/irp.h107
-rw-r--r--usr/src/lib/libresolv2_joy/include/irs.h348
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/assertions.h123
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/ctl.h112
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/dst.h168
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/eventlib.h206
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/heap.h49
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h112
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/list.h118
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/logging.h113
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/memcluster.h50
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/misc.h44
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/platform.h42
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/tree.h59
-rwxr-xr-xusr/src/lib/libresolv2_joy/include/make_os_version34
-rw-r--r--usr/src/lib/libresolv2_joy/include/make_os_version.sh34
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_after.h539
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_before.h201
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_netdb.h188
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_resolv.h42
-rwxr-xr-xusr/src/lib/libresolv2_joy/include/probe_ipv673
-rw-r--r--usr/src/lib/libresolv2_joy/include/probe_ipv6.sh73
-rw-r--r--usr/src/lib/libresolv2_joy/include/res_update.h88
-rw-r--r--usr/src/lib/libresolv2_joy/include/resolv_mt.h47
-rw-r--r--usr/src/lib/libresolv2_joy/include/sunw_port_after.h123
-rw-r--r--usr/src/lib/libresolv2_joy/include/sunw_port_before.h43
-rw-r--r--usr/src/lib/libresolv2_joy/include/sys/bitypes.h37
-rw-r--r--usr/src/lib/libresolv2_joy/include/sys/cdefs.h144
-rw-r--r--usr/src/lib/libresolv2_joy/sparc/Makefile30
-rw-r--r--usr/src/lib/libresolv2_joy/sparcv9/Makefile38
-rw-r--r--usr/src/lib/libscf/inc/libscf.h6
-rw-r--r--usr/src/lib/libshare/nfs/libshare_nfs.c14
-rw-r--r--usr/src/lib/libshell/Makefile1
-rw-r--r--usr/src/lib/libshell/Makefile.doc93
-rw-r--r--usr/src/lib/libshell/common/COMPATIBILITY134
-rw-r--r--usr/src/lib/libshell/common/DESIGN170
-rw-r--r--usr/src/lib/libshell/common/OBSOLETE152
-rw-r--r--usr/src/lib/libshell/common/README232
-rw-r--r--usr/src/lib/libshell/common/RELEASE2204
-rw-r--r--usr/src/lib/libshell/common/TYPES182
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/1.pngbin236 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/10.pngbin284 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/2.pngbin261 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/3.pngbin265 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/4.pngbin260 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/5.pngbin261 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/6.pngbin278 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/7.pngbin253 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/8.pngbin275 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/9.pngbin284 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_bourne.pngbin130 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_i18n.pngbin124 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh.pngbin122 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh88.pngbin124 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh93.pngbin140 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_l10n.pngbin118 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_perf.pngbin127 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/shell_styleguide.docbook1464
-rw-r--r--usr/src/lib/libsmartsshd/Makefile48
-rw-r--r--usr/src/lib/libsmartsshd/Makefile.com47
-rw-r--r--usr/src/lib/libsmartsshd/amd64/Makefile32
-rw-r--r--usr/src/lib/libsmartsshd/common/llib-lsmartsshd32
-rw-r--r--usr/src/lib/libsmartsshd/common/mapfile-vers45
-rw-r--r--usr/src/lib/libsmartsshd/common/sshd-plugin.c123
-rw-r--r--usr/src/lib/libsmartsshd/i386/Makefile31
-rw-r--r--usr/src/lib/libsocket/inet/getaddrinfo.c13
-rw-r--r--usr/src/lib/libumem/amd64/umem_genasm.c2
-rw-r--r--usr/src/lib/libvnd/Makefile50
-rw-r--r--usr/src/lib/libvnd/Makefile.com39
-rw-r--r--usr/src/lib/libvnd/amd64/Makefile19
-rw-r--r--usr/src/lib/libvnd/common/libvnd.c550
-rw-r--r--usr/src/lib/libvnd/common/libvnd.h84
-rw-r--r--usr/src/lib/libvnd/common/llib-lvnd19
-rw-r--r--usr/src/lib/libvnd/common/mapfile-vers55
-rw-r--r--usr/src/lib/libvnd/i386/Makefile18
-rw-r--r--usr/src/lib/libwanboot/Makefile.com2
-rw-r--r--usr/src/lib/libzdoor/Makefile48
-rw-r--r--usr/src/lib/libzdoor/Makefile.com50
-rw-r--r--usr/src/lib/libzdoor/amd64/Makefile32
-rw-r--r--usr/src/lib/libzdoor/common/llib-lzdoor34
-rw-r--r--usr/src/lib/libzdoor/common/mapfile-vers48
-rw-r--r--usr/src/lib/libzdoor/common/zdoor-int.c324
-rw-r--r--usr/src/lib/libzdoor/common/zdoor-int.h64
-rw-r--r--usr/src/lib/libzdoor/common/zdoor.c437
-rw-r--r--usr/src/lib/libzdoor/common/zerror.c117
-rw-r--r--usr/src/lib/libzdoor/common/zerror.h48
-rw-r--r--usr/src/lib/libzdoor/common/ztree.c344
-rw-r--r--usr/src/lib/libzdoor/common/ztree.h88
-rw-r--r--usr/src/lib/libzdoor/i386/Makefile31
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h2
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c20
-rw-r--r--usr/src/lib/libzfs/common/libzfs_impl.h3
-rw-r--r--usr/src/lib/libzfs/common/libzfs_iter.c16
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c7
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c41
-rw-r--r--usr/src/lib/libzfs/common/mapfile-vers3
-rw-r--r--usr/src/lib/libzonecfg/common/getzoneent.c14
-rw-r--r--usr/src/lib/libzonecfg/common/libzonecfg.c646
-rw-r--r--usr/src/lib/libzonecfg/common/mapfile-vers13
-rw-r--r--usr/src/lib/libzonecfg/dtd/zonecfg.dtd.115
-rw-r--r--usr/src/lib/libzpool/common/sys/zfs_context.h3
-rw-r--r--usr/src/lib/nsswitch/dns/Makefile.com4
-rw-r--r--usr/src/lib/nsswitch/dns/common/dns_common.c215
-rw-r--r--usr/src/lib/nsswitch/dns/common/dns_common.h4
-rw-r--r--usr/src/lib/nsswitch/dns/common/dns_mt.c155
-rw-r--r--usr/src/lib/nsswitch/dns/common/gethostent.c8
-rw-r--r--usr/src/lib/nsswitch/dns/common/gethostent6.c4
-rw-r--r--usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com2
-rw-r--r--usr/src/lib/print/mod_ipp/Makefile2
-rw-r--r--usr/src/lib/pysolaris/Makefile.com1
-rw-r--r--usr/src/man/Makefile1
-rw-r--r--usr/src/man/Makefile.man7
-rw-r--r--usr/src/man/man1/Makefile1
-rw-r--r--usr/src/man/man1/column.1129
-rw-r--r--usr/src/man/man1/crontab.148
-rw-r--r--usr/src/man/man1/ld.12
-rw-r--r--usr/src/man/man1/ld.so.1.121
-rw-r--r--usr/src/man/man1/nawk.19
-rw-r--r--usr/src/man/man1/proc.111
-rw-r--r--usr/src/man/man1/ps.16
-rw-r--r--usr/src/man/man1/sed.134
-rw-r--r--usr/src/man/man1/zlogin.136
-rw-r--r--usr/src/man/man1m/Makefile6
-rw-r--r--usr/src/man/man1m/dladm.1m92
-rw-r--r--usr/src/man/man1m/flowadm.1m49
-rw-r--r--usr/src/man/man1m/ipf.1m36
-rw-r--r--usr/src/man/man1m/ipfs.1m44
-rw-r--r--usr/src/man/man1m/ipfstat.1m31
-rw-r--r--usr/src/man/man1m/ipmon.1m33
-rw-r--r--usr/src/man/man1m/ipnat.1m32
-rw-r--r--usr/src/man/man1m/ippool.1m48
-rw-r--r--usr/src/man/man1m/mount_tmpfs.1m14
-rw-r--r--usr/src/man/man1m/prstat.1m25
-rw-r--r--usr/src/man/man1m/prtconf.1m12
-rw-r--r--usr/src/man/man1m/reboot.1m3
-rw-r--r--usr/src/man/man1m/snoop.1m21
-rw-r--r--usr/src/man/man1m/svc.startd.1m9
-rw-r--r--usr/src/man/man1m/svccfg.1m9
-rw-r--r--usr/src/man/man1m/tunefs.1m8
-rw-r--r--usr/src/man/man1m/vfsstat.1m213
-rw-r--r--usr/src/man/man1m/vndadm.1m651
-rw-r--r--usr/src/man/man1m/vndstat.1m163
-rw-r--r--usr/src/man/man1m/zfs.1m41
-rw-r--r--usr/src/man/man1m/zoneadm.1m28
-rw-r--r--usr/src/man/man1m/zonecfg.1m84
-rw-r--r--usr/src/man/man1m/zpool.1m12
-rw-r--r--usr/src/man/man3c/Makefile12
-rw-r--r--usr/src/man/man3c/epoll_create.3c104
-rw-r--r--usr/src/man/man3c/epoll_ctl.3c300
-rw-r--r--usr/src/man/man3c/epoll_wait.3c113
-rw-r--r--usr/src/man/man3c/inotify_add_watch.3c120
-rw-r--r--usr/src/man/man3c/inotify_init.3c107
-rw-r--r--usr/src/man/man3c/inotify_rm_watch.3c81
-rw-r--r--usr/src/man/man3c/psignal.3c13
-rw-r--r--usr/src/man/man3dlpi/Makefile2
-rw-r--r--usr/src/man/man3dlpi/dlpi_open.3dlpi31
-rw-r--r--usr/src/man/man3lib/Makefile1
-rw-r--r--usr/src/man/man3lib/libvnd.3lib690
-rw-r--r--usr/src/man/man3vnd/Makefile70
-rw-r--r--usr/src/man/man3vnd/vnd_create.3vnd280
-rw-r--r--usr/src/man/man3vnd/vnd_errno.3vnd170
-rw-r--r--usr/src/man/man3vnd/vnd_frameio_read.3vnd705
-rw-r--r--usr/src/man/man3vnd/vnd_pollfd.3vnd155
-rw-r--r--usr/src/man/man3vnd/vnd_prop_get.3vnd242
-rw-r--r--usr/src/man/man3vnd/vnd_prop_iter.3vnd148
-rw-r--r--usr/src/man/man3vnd/vnd_prop_writeable.3vnd101
-rw-r--r--usr/src/man/man3vnd/vnd_walk.3vnd155
-rw-r--r--usr/src/man/man3xnet/Makefile4
-rw-r--r--usr/src/man/man3xnet/htonl.3xnet29
-rw-r--r--usr/src/man/man4/proc.413
-rw-r--r--usr/src/man/man5/Makefile3
-rw-r--r--usr/src/man/man5/epoll.5113
-rw-r--r--usr/src/man/man5/inotify.5305
-rw-r--r--usr/src/man/man5/privileges.512
-rw-r--r--usr/src/man/man5/resource_controls.5114
-rw-r--r--usr/src/man/man7d/Makefile1
-rw-r--r--usr/src/man/man7d/poll.7d9
-rw-r--r--usr/src/man/man7d/vnd.7d118
-rw-r--r--usr/src/man/man7fs/Makefile6
-rw-r--r--usr/src/man/man7fs/bootfs.7fs90
-rw-r--r--usr/src/man/man7fs/hyprlofs.7fs62
-rw-r--r--usr/src/man/man7fs/lxproc.7fs115
-rw-r--r--usr/src/man/man9e/chpoll.9e17
-rw-r--r--usr/src/man/man9f/kmem_cache_create.9f2
-rw-r--r--usr/src/pkg/manifests/SUNWcs.mf28
-rw-r--r--usr/src/pkg/manifests/SUNWlx.mf11
-rw-r--r--usr/src/pkg/manifests/driver-crypto-nfp.mf33
-rw-r--r--usr/src/pkg/manifests/driver-network-bge.mf13
-rw-r--r--usr/src/pkg/manifests/service-file-system-smb.mf1
-rw-r--r--usr/src/pkg/manifests/system-zones-brand-joyent.mf54
-rw-r--r--usr/src/pkg/manifests/system-zones-brand-lx.mf107
-rw-r--r--usr/src/psm/stand/bootblks/ufs/i386/mboot.S1
-rw-r--r--usr/src/stand/lib/fs/hsfs/hsfsops.c1
-rw-r--r--usr/src/test/test-runner/cmd/Makefile4
-rw-r--r--usr/src/test/test-runner/cmd/run.py2
-rw-r--r--usr/src/test/util-tests/tests/Makefile1
-rw-r--r--usr/src/tools/README.tools3
-rw-r--r--usr/src/tools/cscope-fast/Makefile1
-rw-r--r--usr/src/tools/ctf/Makefile2
-rw-r--r--usr/src/tools/ctf/Makefile.ctf1
-rw-r--r--usr/src/tools/ctf/common/ctf_headers.h14
-rw-r--r--usr/src/tools/ctf/common/utils.h3
-rw-r--r--usr/src/tools/ctf/ctfdiff/Makefile44
-rw-r--r--usr/src/tools/ctf/ctfdiff/Makefile.com44
-rw-r--r--usr/src/tools/ctf/ctfdiff/i386/Makefile27
-rw-r--r--usr/src/tools/ctf/ctfdiff/sparc/Makefile27
-rw-r--r--usr/src/tools/ctf/cvt/ctftools.h11
-rw-r--r--usr/src/tools/ctf/libctf/Makefile46
-rw-r--r--usr/src/tools/ctf/libctf/Makefile.com42
-rw-r--r--usr/src/tools/ctf/libctf/i386/Makefile8
-rw-r--r--usr/src/tools/ctf/libctf/sparc/Makefile8
-rw-r--r--usr/src/tools/env/illumos.sh6
-rw-r--r--usr/src/tools/findunref/exception_list.git39
-rw-r--r--usr/src/tools/findunref/exception_list.unknown0
-rw-r--r--usr/src/tools/onbld/Checks/Cddl.py2
-rw-r--r--usr/src/tools/onbld/Checks/CmtBlk.py2
-rw-r--r--usr/src/tools/onbld/Checks/Comments.py4
-rw-r--r--usr/src/tools/onbld/Checks/Copyright.py2
-rw-r--r--usr/src/tools/onbld/Checks/DbLookups.py31
-rw-r--r--usr/src/tools/onbld/Checks/HdrChk.py2
-rw-r--r--usr/src/tools/onbld/Checks/Keywords.py4
-rw-r--r--usr/src/tools/onbld/Checks/Mapfile.py2
-rw-r--r--usr/src/tools/onbld/Checks/__init__.py2
-rw-r--r--usr/src/tools/onbld/Scm/__init__.py4
-rw-r--r--usr/src/tools/onbld/hgext/__init__.py4
-rw-r--r--usr/src/tools/scripts/cddlchk.py2
-rw-r--r--usr/src/tools/scripts/copyrightchk.py2
-rw-r--r--usr/src/tools/scripts/git-pbchk.py2
-rw-r--r--usr/src/tools/scripts/hdrchk.py2
-rw-r--r--usr/src/tools/scripts/hg-active.py2
-rw-r--r--usr/src/tools/scripts/mapfilechk.py2
-rw-r--r--usr/src/tools/scripts/nightly.sh34
-rw-r--r--usr/src/tools/scripts/validate_pkg.py2
-rw-r--r--usr/src/tools/scripts/webrev.sh8
-rw-r--r--usr/src/tools/scripts/wsdiff.py2
-rw-r--r--usr/src/ucbhead/Makefile68
-rw-r--r--usr/src/ucbhead/sys/file.h4
-rw-r--r--usr/src/uts/Makefile.uts1
-rw-r--r--usr/src/uts/common/Makefile.files44
-rw-r--r--usr/src/uts/common/Makefile.rules90
-rw-r--r--usr/src/uts/common/brand/lx/autofs/lx_autofs.c1569
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.c1196
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.c1996
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_netlink.c1170
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.c1157
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.conf27
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c1662
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c549
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c378
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c178
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_proc.h250
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prsubr.c538
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c374
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c3646
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_audio.h130
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs.h334
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h121
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h349
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_futex.h108
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_impl.h52
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ldt.h93
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_pid.h61
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ptm.h44
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_sched.h60
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_syscalls.h75
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_brk.c57
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c164
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_futex.c739
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getpid.c74
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_id.c297
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_ioctl.c1158
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_kill.c399
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c121
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_pipe.c180
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_rw.c39
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sched.c513
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c140
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_thread_area.c173
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c21
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.h7
-rw-r--r--usr/src/uts/common/brand/sngl/sngl_brand.c257
-rw-r--r--usr/src/uts/common/brand/sngl/sngl_brand.h65
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.c21
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.h6
-rw-r--r--usr/src/uts/common/conf/param.c12
-rw-r--r--usr/src/uts/common/crypto/api/kcf_random.c4
-rw-r--r--usr/src/uts/common/crypto/core/kcf_sched.c6
-rw-r--r--usr/src/uts/common/disp/cmt.c8
-rw-r--r--usr/src/uts/common/disp/cpucaps.c285
-rw-r--r--usr/src/uts/common/disp/disp.c26
-rw-r--r--usr/src/uts/common/disp/thread.c16
-rw-r--r--usr/src/uts/common/dtrace/dtrace.c46
-rw-r--r--usr/src/uts/common/dtrace/sdt_subr.c33
-rw-r--r--usr/src/uts/common/exec/elf/elf.c130
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_construct.c367
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_vfsops.c320
-rw-r--r--usr/src/uts/common/fs/bootfs/bootfs_vnops.c544
-rw-r--r--usr/src/uts/common/fs/dev/sdev_netops.c257
-rw-r--r--usr/src/uts/common/fs/dev/sdev_plugin.c913
-rw-r--r--usr/src/uts/common/fs/dev/sdev_subr.c211
-rw-r--r--usr/src/uts/common/fs/dev/sdev_vfsops.c23
-rw-r--r--usr/src/uts/common/fs/fifofs/fifosubr.c7
-rw-r--r--usr/src/uts/common/fs/fifofs/fifovnops.c13
-rw-r--r--usr/src/uts/common/fs/hsfs/hsfs_vnops.c6
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c640
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c127
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c614
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c1426
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_subr.c524
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_vfsops.c367
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_vnops.c3094
-rw-r--r--usr/src/uts/common/fs/lxproc/lxproc.h273
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vnops.c5
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vnops.c30
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c4
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vnops.c6
-rw-r--r--usr/src/uts/common/fs/pcfs/pc_dir.c12
-rw-r--r--usr/src/uts/common/fs/portfs/port_vnops.c16
-rw-r--r--usr/src/uts/common/fs/proc/prcontrol.c12
-rw-r--r--usr/src/uts/common/fs/proc/prdata.h4
-rw-r--r--usr/src/uts/common/fs/proc/prvnops.c107
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_sops.c13
-rw-r--r--usr/src/uts/common/fs/sockfs/socknotify.c2
-rw-r--r--usr/src/uts/common/fs/sockfs/socksyscalls.c117
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi.c7
-rw-r--r--usr/src/uts/common/fs/swapfs/swap_subr.c6
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_dir.c12
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_subr.c34
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vfsops.c229
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vnops.c76
-rw-r--r--usr/src/uts/common/fs/udfs/udf_dir.c6
-rw-r--r--usr/src/uts/common/fs/udfs/udf_vnops.c9
-rw-r--r--usr/src/uts/common/fs/ufs/ufs_vnops.c46
-rw-r--r--usr/src/uts/common/fs/vfs.c27
-rw-r--r--usr/src/uts/common/fs/vnode.c117
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c35
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c15
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c1
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c4
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dir.c3
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_pool.c1
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c8
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_zone.h62
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h6
-rw-r--r--usr/src/uts/common/fs/zfs/txg.c3
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c14
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_queue.c12
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c59
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c12
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c59
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_zone.c1334
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c14
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c34
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c103
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_conn.c8
-rw-r--r--usr/src/uts/common/inet/ip/ip_attr.c5
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c169
-rw-r--r--usr/src/uts/common/inet/ip/ip_squeue.c2
-rw-r--r--usr/src/uts/common/inet/ip/ip_tunables.c1
-rw-r--r--usr/src/uts/common/inet/ip/ipsecesp.c3
-rw-r--r--usr/src/uts/common/inet/ipf/fil.c46
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c436
-rw-r--r--usr/src/uts/common/inet/ipf/ip_log.c8
-rw-r--r--usr/src/uts/common/inet/ipf/ip_state.c1
-rw-r--r--usr/src/uts/common/inet/ipf/ipf.conf5
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_fil.h31
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipf_stack.h14
-rw-r--r--usr/src/uts/common/inet/ipf/solaris.c171
-rw-r--r--usr/src/uts/common/inet/squeue.c100
-rw-r--r--usr/src/uts/common/inet/tcp_impl.h6
-rw-r--r--usr/src/uts/common/io/1394/targets/av1394/av1394_async.c9
-rw-r--r--usr/src/uts/common/io/aggr/aggr_port.c8
-rw-r--r--usr/src/uts/common/io/aggr/aggr_recv.c36
-rw-r--r--usr/src/uts/common/io/axf/ax88172reg.h163
-rw-r--r--usr/src/uts/common/io/axf/axf_usbgem.c1539
-rw-r--r--usr/src/uts/common/io/bge/bge_chip2.c234
-rw-r--r--usr/src/uts/common/io/bge/bge_hw.h218
-rw-r--r--usr/src/uts/common/io/bge/bge_impl.h14
-rw-r--r--usr/src/uts/common/io/bge/bge_main2.c14
-rw-r--r--usr/src/uts/common/io/bge/bge_mii.c71
-rw-r--r--usr/src/uts/common/io/devpoll.c424
-rw-r--r--usr/src/uts/common/io/dld/dld_drv.c6
-rw-r--r--usr/src/uts/common/io/dld/dld_proto.c79
-rw-r--r--usr/src/uts/common/io/dld/dld_str.c104
-rw-r--r--usr/src/uts/common/io/dls/dls.c100
-rw-r--r--usr/src/uts/common/io/dls/dls_link.c1
-rw-r--r--usr/src/uts/common/io/dls/dls_mgmt.c220
-rw-r--r--usr/src/uts/common/io/dls/dls_stat.c172
-rw-r--r--usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE32
-rw-r--r--usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.c5506
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.conf15
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.h1766
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas_list.h212
-rw-r--r--usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c3
-rw-r--r--usr/src/uts/common/io/gsqueue/gsqueue.c612
-rw-r--r--usr/src/uts/common/io/inotify.c1480
-rw-r--r--usr/src/uts/common/io/inotify.conf16
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_main.c4
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket.c3
-rw-r--r--usr/src/uts/common/io/mac/mac.c3
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c46
-rw-r--r--usr/src/uts/common/io/mac/mac_protect.c3
-rw-r--r--usr/src/uts/common/io/mac/mac_stat.c9
-rw-r--r--usr/src/uts/common/io/mr_sas/mr_sas.conf8
-rw-r--r--usr/src/uts/common/io/nfp/THIRDPARTYLICENSE19
-rw-r--r--usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/common/io/nfp/autoversion.h21
-rw-r--r--usr/src/uts/common/io/nfp/drvlist.c19
-rw-r--r--usr/src/uts/common/io/nfp/hostif.c1192
-rw-r--r--usr/src/uts/common/io/nfp/i21285.c310
-rw-r--r--usr/src/uts/common/io/nfp/i21285.h43
-rw-r--r--usr/src/uts/common/io/nfp/i21555.c423
-rw-r--r--usr/src/uts/common/io/nfp/i21555.h51
-rw-r--r--usr/src/uts/common/io/nfp/i21555d.c28
-rw-r--r--usr/src/uts/common/io/nfp/nfdev-common.h141
-rw-r--r--usr/src/uts/common/io/nfp/nfdev-solaris.h37
-rw-r--r--usr/src/uts/common/io/nfp/nfp.h113
-rw-r--r--usr/src/uts/common/io/nfp/nfp_cmd.h68
-rw-r--r--usr/src/uts/common/io/nfp/nfp_common.h68
-rw-r--r--usr/src/uts/common/io/nfp/nfp_error.h48
-rw-r--r--usr/src/uts/common/io/nfp/nfp_hostif.h54
-rw-r--r--usr/src/uts/common/io/nfp/nfp_ifvers.c51
-rw-r--r--usr/src/uts/common/io/nfp/nfp_osif.h105
-rw-r--r--usr/src/uts/common/io/nfp/nfpci.h171
-rw-r--r--usr/src/uts/common/io/nfp/osif.c184
-rw-r--r--usr/src/uts/common/io/ptm.c47
-rw-r--r--usr/src/uts/common/io/scsi/targets/sd.c79
-rw-r--r--usr/src/uts/common/io/udmf/dm9601reg.h348
-rw-r--r--usr/src/uts/common/io/udmf/udmf_usbgem.c1036
-rw-r--r--usr/src/uts/common/io/upf/adm8511reg.h205
-rw-r--r--usr/src/uts/common/io/upf/upf_usbgem.c1213
-rw-r--r--usr/src/uts/common/io/urf/rtl8150reg.h218
-rw-r--r--usr/src/uts/common/io/urf/urf_usbgem.c1039
-rw-r--r--usr/src/uts/common/io/usb/usba/usba_ugen.c24
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem.c6389
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem.h428
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem_mii.h242
-rw-r--r--usr/src/uts/common/io/vnd/frameio.c464
-rw-r--r--usr/src/uts/common/io/vnd/vnd.c5584
-rw-r--r--usr/src/uts/common/io/vnd/vnd.conf16
-rw-r--r--usr/src/uts/common/io/vnic/vnic_dev.c19
-rw-r--r--usr/src/uts/common/krtld/bootrd.c83
-rw-r--r--usr/src/uts/common/os/bio.c10
-rw-r--r--usr/src/uts/common/os/brand.c39
-rw-r--r--usr/src/uts/common/os/core.c1
-rw-r--r--usr/src/uts/common/os/cred.c8
-rw-r--r--usr/src/uts/common/os/ddi_intr_irm.c2
-rw-r--r--usr/src/uts/common/os/exec.c87
-rw-r--r--usr/src/uts/common/os/exit.c93
-rw-r--r--usr/src/uts/common/os/grow.c35
-rw-r--r--usr/src/uts/common/os/logsubr.c4
-rw-r--r--usr/src/uts/common/os/msacct.c28
-rw-r--r--usr/src/uts/common/os/netstack.c33
-rw-r--r--usr/src/uts/common/os/pid.c12
-rw-r--r--usr/src/uts/common/os/policy.c9
-rw-r--r--usr/src/uts/common/os/priv_defs4
-rw-r--r--usr/src/uts/common/os/sig.c6
-rw-r--r--usr/src/uts/common/os/streamio.c4
-rw-r--r--usr/src/uts/common/os/sysent.c30
-rw-r--r--usr/src/uts/common/os/vmem.c2
-rw-r--r--usr/src/uts/common/os/zone.c749
-rw-r--r--usr/src/uts/common/sys/Makefile11
-rw-r--r--usr/src/uts/common/sys/aggr_impl.h3
-rw-r--r--usr/src/uts/common/sys/auxv.h17
-rw-r--r--usr/src/uts/common/sys/brand.h12
-rw-r--r--usr/src/uts/common/sys/buf.h8
-rw-r--r--usr/src/uts/common/sys/cpucaps.h5
-rw-r--r--usr/src/uts/common/sys/cpucaps_impl.h6
-rw-r--r--usr/src/uts/common/sys/cred.h1
-rw-r--r--usr/src/uts/common/sys/ctf_api.h30
-rw-r--r--usr/src/uts/common/sys/devpoll.h18
-rw-r--r--usr/src/uts/common/sys/dktp/dadk.h2
-rw-r--r--usr/src/uts/common/sys/dld.h7
-rw-r--r--usr/src/uts/common/sys/dld_impl.h5
-rw-r--r--usr/src/uts/common/sys/dlpi.h11
-rw-r--r--usr/src/uts/common/sys/dls.h10
-rw-r--r--usr/src/uts/common/sys/dls_impl.h6
-rw-r--r--usr/src/uts/common/sys/dls_mgmt.h2
-rw-r--r--usr/src/uts/common/sys/epoll.h89
-rw-r--r--usr/src/uts/common/sys/exec.h11
-rw-r--r--usr/src/uts/common/sys/frameio.h107
-rw-r--r--usr/src/uts/common/sys/fs/bootfs_impl.h81
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs.h91
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs_info.h174
-rw-r--r--usr/src/uts/common/sys/fs/sdev_impl.h61
-rw-r--r--usr/src/uts/common/sys/fs/sdev_plugin.h106
-rw-r--r--usr/src/uts/common/sys/fs/tmp.h39
-rw-r--r--usr/src/uts/common/sys/gsqueue.h65
-rw-r--r--usr/src/uts/common/sys/inotify.h153
-rw-r--r--usr/src/uts/common/sys/mac.h1
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h27
-rw-r--r--usr/src/uts/common/sys/mac_impl.h2
-rw-r--r--usr/src/uts/common/sys/mman.h2
-rw-r--r--usr/src/uts/common/sys/mntent.h2
-rw-r--r--usr/src/uts/common/sys/neti.h2
-rw-r--r--usr/src/uts/common/sys/netstack.h3
-rw-r--r--usr/src/uts/common/sys/param.h2
-rw-r--r--usr/src/uts/common/sys/policy.h1
-rw-r--r--usr/src/uts/common/sys/poll.h13
-rw-r--r--usr/src/uts/common/sys/poll_impl.h10
-rw-r--r--usr/src/uts/common/sys/proc.h11
-rw-r--r--usr/src/uts/common/sys/ptms.h19
-rw-r--r--usr/src/uts/common/sys/resource.h2
-rw-r--r--usr/src/uts/common/sys/socket.h7
-rw-r--r--usr/src/uts/common/sys/squeue.h14
-rw-r--r--usr/src/uts/common/sys/squeue_impl.h2
-rw-r--r--usr/src/uts/common/sys/sunddi.h1
-rw-r--r--usr/src/uts/common/sys/systrace.h13
-rw-r--r--usr/src/uts/common/sys/termios.h18
-rw-r--r--usr/src/uts/common/sys/thread.h2
-rw-r--r--usr/src/uts/common/sys/uadmin.h3
-rw-r--r--usr/src/uts/common/sys/vm_usage.h5
-rw-r--r--usr/src/uts/common/sys/vmsystm.h5
-rw-r--r--usr/src/uts/common/sys/vnd.h141
-rw-r--r--usr/src/uts/common/sys/vnd_errno.h72
-rw-r--r--usr/src/uts/common/sys/vnode.h8
-rw-r--r--usr/src/uts/common/sys/zone.h140
-rw-r--r--usr/src/uts/common/syscall/brandsys.c8
-rw-r--r--usr/src/uts/common/syscall/memcntl.c9
-rw-r--r--usr/src/uts/common/syscall/poll.c19
-rw-r--r--usr/src/uts/common/syscall/rusagesys.c14
-rw-r--r--usr/src/uts/common/syscall/sysconfig.c24
-rw-r--r--usr/src/uts/common/syscall/uadmin.c6
-rw-r--r--usr/src/uts/common/vm/hat.h10
-rw-r--r--usr/src/uts/common/vm/seg_vn.c12
-rw-r--r--usr/src/uts/common/vm/vm_anon.c11
-rw-r--r--usr/src/uts/common/vm/vm_as.c20
-rw-r--r--usr/src/uts/common/vm/vm_pvn.c28
-rw-r--r--usr/src/uts/common/vm/vm_usage.c387
-rw-r--r--usr/src/uts/i86pc/Makefile.rules2
-rw-r--r--usr/src/uts/i86pc/dboot/dboot_startkern.c302
-rw-r--r--usr/src/uts/i86pc/io/ppm/acpisleep.c24
-rw-r--r--usr/src/uts/i86pc/io/psm/psm_common.c3
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in2
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm.s30
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm_amd64.s86
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c14
-rw-r--r--usr/src/uts/i86pc/os/fakebop.c38
-rw-r--r--usr/src/uts/i86pc/os/ibft.c6
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c14
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c5
-rw-r--r--usr/src/uts/i86pc/os/startup.c51
-rw-r--r--usr/src/uts/i86pc/sys/acpidev.h3
-rw-r--r--usr/src/uts/i86pc/sys/apic.h2
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c79
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c7
-rw-r--r--usr/src/uts/intel/Makefile6
-rw-r--r--usr/src/uts/intel/Makefile.files47
-rw-r--r--usr/src/uts/intel/Makefile.intel27
-rw-r--r--usr/src/uts/intel/Makefile.rules13
-rw-r--r--usr/src/uts/intel/axf/Makefile73
-rw-r--r--usr/src/uts/intel/bootfs/Makefile72
-rw-r--r--usr/src/uts/intel/brand/common/brand_asm.h2
-rw-r--r--usr/src/uts/intel/brand/lx/lx_brand_asm.s359
-rw-r--r--usr/src/uts/intel/brand/sngl/sngl_brand_asm.s31
-rw-r--r--usr/src/uts/intel/core_pcbe/Makefile4
-rw-r--r--usr/src/uts/intel/dev/Makefile1
-rw-r--r--usr/src/uts/intel/dr_sas/Makefile90
-rw-r--r--usr/src/uts/intel/elfexec/Makefile3
-rw-r--r--usr/src/uts/intel/genassym/Makefile85
-rw-r--r--usr/src/uts/intel/genassym/offsets.in49
-rw-r--r--usr/src/uts/intel/gsqueue/Makefile49
-rw-r--r--usr/src/uts/intel/hyprlofs/Makefile83
-rw-r--r--usr/src/uts/intel/ia32/os/archdep.c15
-rw-r--r--usr/src/uts/intel/ia32/os/desctbls.c28
-rw-r--r--usr/src/uts/intel/inotify/Makefile70
-rw-r--r--usr/src/uts/intel/io/acpica/changes.txt585
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbcmds.c473
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbdisply.c34
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbexec.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbfileio.c7
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbhistry.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbinput.c14
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbmethod.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbstats.c30
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbutils.c9
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c10
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmopcode.c75
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrc.c15
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c3
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c700
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c51
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmutils.c52
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmwalk.c5
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsargs.c12
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsfield.c80
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsinit.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsutils.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswexec.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswload.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswload2.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswscope.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswstate.c2
-rw-r--r--usr/src/uts/intel/io/acpica/events/evevent.c20
-rw-r--r--usr/src/uts/intel/io/acpica/events/evglock.c12
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeblk.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeinit.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeutil.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evmisc.c186
-rw-r--r--usr/src/uts/intel/io/acpica/events/evregion.c32
-rw-r--r--usr/src/uts/intel/io/acpica/events/evrgnini.c2
-rw-r--r--usr/src/uts/intel/io/acpica/events/evsci.c6
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxface.c686
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfevnt.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfregn.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exconfig.c4
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exconvrt.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/excreate.c30
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exdebug.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exdump.c37
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exfield.c28
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exfldio.c28
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exmisc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exmutex.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg1.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg2.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg3.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg6.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exprep.c29
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exregion.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresnte.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresolv.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresop.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstore.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstoren.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstorob.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exsystem.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exutils.c32
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwacpi.c5
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwesleep.c277
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwpci.c2
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwregs.c16
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwsleep.c395
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwtimer.c4
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwvalid.c4
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwxface.c57
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c478
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsaccess.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsalloc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsdump.c18
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c4
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nseval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsinit.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsload.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsparse.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nspredef.c47
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsrepair.c167
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsrepair2.c22
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nssearch.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsutils.c4
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nswalk.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfeval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfname.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfobj.c2
-rw-r--r--usr/src/uts/intel/io/acpica/osl.c17
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psargs.c147
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psloop.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psopcode.c13
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psparse.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psscope.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/pstree.c9
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psutils.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/pswalk.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsaddr.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rscalc.c69
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rscreate.c75
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsdump.c158
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsinfo.c61
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsio.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsirq.c34
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rslist.c77
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsmemory.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsmisc.c268
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsserial.c425
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsutils.c61
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsxface.c58
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbfadt.c54
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbfind.c2
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbinstal.c121
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbutils.c103
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbxfroot.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utaddress.c322
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utalloc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utcache.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utclib.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utcopy.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdebug.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdecode.c41
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdelete.c33
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/uteval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utglobal.c22
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utids.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utinit.c46
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utlock.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmath.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmisc.c40
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmutex.c11
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utosi.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utresrc.c295
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utstate.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/uttrack.c8
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxface.c54
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxferror.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxfmutex.c213
-rw-r--r--usr/src/uts/intel/io/dktp/dcdev/dadk.c48
-rw-r--r--usr/src/uts/intel/io/ipmi/ipmivars.h1
-rw-r--r--usr/src/uts/intel/io/vmxnet/buildNumber.h12
-rw-r--r--usr/src/uts/intel/io/vmxnet/includeCheck.h159
-rw-r--r--usr/src/uts/intel/io/vmxnet/net.h220
-rw-r--r--usr/src/uts/intel/io/vmxnet/net_sg.h84
-rw-r--r--usr/src/uts/intel/io/vmxnet/vm_basic_types.h1037
-rw-r--r--usr/src/uts/intel/io/vmxnet/vm_device_version.h246
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmnet_def.h91
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet.c2438
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet.conf24
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet2_def.h436
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet_def.h184
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.debug641
-rw-r--r--usr/src/uts/intel/ip/ip.global-objs.obj641
-rw-r--r--usr/src/uts/intel/ipf/Makefile2
-rw-r--r--usr/src/uts/intel/ipf/ipf.global-objs.debug6432
-rw-r--r--usr/src/uts/intel/kdi/kdi_idt.c3
-rw-r--r--usr/src/uts/intel/lx_afs/Makefile108
-rw-r--r--usr/src/uts/intel/lx_afs/Makefile.rules40
-rw-r--r--usr/src/uts/intel/lx_audio/Makefile100
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile108
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile.rules85
-rw-r--r--usr/src/uts/intel/lx_netlink/Makefile77
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile115
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile.rules38
-rw-r--r--usr/src/uts/intel/lx_ptm/Makefile91
-rw-r--r--usr/src/uts/intel/lx_systrace/Makefile80
-rw-r--r--usr/src/uts/intel/lxprocfs/Makefile88
-rw-r--r--usr/src/uts/intel/nfp/Makefile82
-rw-r--r--usr/src/uts/intel/opteron_pcbe/Makefile2
-rw-r--r--usr/src/uts/intel/os/driver_aliases1
-rw-r--r--usr/src/uts/intel/os/name_to_major1
-rw-r--r--usr/src/uts/intel/p123_pcbe/Makefile4
-rw-r--r--usr/src/uts/intel/p4_pcbe/Makefile2
-rw-r--r--usr/src/uts/intel/sngl_brand/Makefile98
-rw-r--r--usr/src/uts/intel/sys/acpi/acapps.h13
-rw-r--r--usr/src/uts/intel/sys/acpi/accommon.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acconfig.h30
-rw-r--r--usr/src/uts/intel/sys/acpi/acdebug.h12
-rw-r--r--usr/src/uts/intel/sys/acpi/acdisasm.h202
-rw-r--r--usr/src/uts/intel/sys/acpi/acdispat.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acevents.h21
-rw-r--r--usr/src/uts/intel/sys/acpi/acexcep.h8
-rw-r--r--usr/src/uts/intel/sys/acpi/acglobal.h34
-rw-r--r--usr/src/uts/intel/sys/acpi/achware.h63
-rw-r--r--usr/src/uts/intel/sys/acpi/acinterp.h6
-rw-r--r--usr/src/uts/intel/sys/acpi/aclocal.h56
-rw-r--r--usr/src/uts/intel/sys/acpi/acmacros.h11
-rw-r--r--usr/src/uts/intel/sys/acpi/acnames.h15
-rw-r--r--usr/src/uts/intel/sys/acpi/acnamesp.h5
-rw-r--r--usr/src/uts/intel/sys/acpi/acobject.h13
-rw-r--r--usr/src/uts/intel/sys/acpi/acopcode.h6
-rw-r--r--usr/src/uts/intel/sys/acpi/acoutput.h3
-rw-r--r--usr/src/uts/intel/sys/acpi/acparser.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acpi.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acpiosxf.h12
-rw-r--r--usr/src/uts/intel/sys/acpi/acpixf.h201
-rw-r--r--usr/src/uts/intel/sys/acpi/acpredef.h40
-rw-r--r--usr/src/uts/intel/sys/acpi/acresrc.h117
-rw-r--r--usr/src/uts/intel/sys/acpi/acrestyp.h228
-rw-r--r--usr/src/uts/intel/sys/acpi/acstruct.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/actables.h7
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl.h32
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl1.h87
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl2.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl3.h650
-rw-r--r--usr/src/uts/intel/sys/acpi/actypes.h48
-rw-r--r--usr/src/uts/intel/sys/acpi/acutils.h50
-rw-r--r--usr/src/uts/intel/sys/acpi/amlcode.h31
-rw-r--r--usr/src/uts/intel/sys/acpi/amlresrc.h162
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/accygwin.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acefi.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acenv.h26
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acfreebsd.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acgcc.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acintel.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/aclinux.h5
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acmsvc.h24
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acnetbsd.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acos2.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acwin.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acwin64.h2
-rw-r--r--usr/src/uts/intel/sys/bootinfo.h21
-rw-r--r--usr/src/uts/intel/sys/bootvfs.h17
-rw-r--r--usr/src/uts/intel/sys/machbrand.h9
-rw-r--r--usr/src/uts/intel/sys/segments.h2
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h30
-rw-r--r--usr/src/uts/intel/udmf/Makefile74
-rw-r--r--usr/src/uts/intel/upf/Makefile74
-rw-r--r--usr/src/uts/intel/urf/Makefile74
-rw-r--r--usr/src/uts/intel/usbgem/Makefile84
-rw-r--r--usr/src/uts/intel/vmxnet/Makefile93
-rw-r--r--usr/src/uts/intel/vnd/Makefile56
-rw-r--r--usr/src/uts/sparc/Makefile.sparc2
-rw-r--r--usr/src/uts/sparc/inotify/Makefile70
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.debug641
-rw-r--r--usr/src/uts/sparc/ip/ip.global-objs.obj641
-rw-r--r--usr/src/uts/sparc/ipf/ipf.global-objs.debug644
-rw-r--r--usr/src/uts/sun4/brand/common/brand_solaris.s2
-rw-r--r--usr/src/uts/sun4/ml/offsets.in2
-rw-r--r--usr/src/uts/sun4u/io/pci/pcisch.c5
-rw-r--r--usr/src/uts/sun4v/io/vcc.c5
1569 files changed, 209256 insertions, 10908 deletions
diff --git a/usr/src/Makefile b/usr/src/Makefile
index b9be247888..66cda2ade3 100644
--- a/usr/src/Makefile
+++ b/usr/src/Makefile
@@ -46,7 +46,7 @@ $(SPARC_BLD)psm: stand
SUBDIRS= $(COMMON_SUBDIRS) $($(MACH)_SUBDIRS)
-HDRSUBDIRS= uts head lib cmd
+HDRSUBDIRS= uts head lib cmd ucbhead
# UCB headers are bug-for-bug compatible and not checkable against the header
# standards.
@@ -56,7 +56,7 @@ CHKHDRSUBDIRS= head uts lib
#
# Headers that can be built in parallel
#
-PARALLEL_HEADERS = sysheaders userheaders libheaders cmdheaders
+PARALLEL_HEADERS = sysheaders userheaders libheaders ucbheaders cmdheaders
#
# Directories that can be built in parallel
@@ -111,7 +111,7 @@ all: mapfiles closedbins sgs .WAIT $(SUBDIRS) pkg
# packaging to be pulled from $(SRC) and $(CLOSED) and staged in
# $(ROOT)/licenses.
#
-install: install1 install2 _msg stage-licenses
+install: install1 install2 _msg
@cd msg; pwd; $(MAKE) _msg
@rm -rf "$(ROOT)/catalog"
@@ -162,13 +162,13 @@ closedbins: bldtools $(ROOTDIRS) FRC
.PARALLEL: $(PARALLEL_HEADERS) DUMMY
.PARALLEL: $(PARALLEL_DIRS) DUMMY
-$(SUBDIRS) head pkg: FRC
+$(SUBDIRS) head ucbhead pkg: FRC
@cd $@; pwd; $(MAKE) $(TARGET)
# librpcsvc has a dependency on headers installed by
# userheaders, hence the .WAIT before libheaders.
sgs: rootdirs .WAIT sysheaders userheaders .WAIT \
- libheaders cmdheaders
+ libheaders ucbheaders cmdheaders
#
# Top-level setup target to setup the development environment that includes
@@ -205,6 +205,9 @@ libheaders: bldtools
sysheaders: FRC
@cd uts; pwd; $(MAKE) install_h
+ucbheaders: FRC
+ @cd ucbhead; pwd; $(MAKE) install_h
+
cmdheaders: FRC
@cd cmd/fm; pwd; $(MAKE) install_h
@cd cmd/mdb; pwd; $(MAKE) install_h
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 73744e1262..98f57fdfe0 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -341,6 +341,7 @@ COMMON_SUBDIRS = \
lib/libbe \
lib/pylibbe \
lib/brand/sn1 \
+ lib/brand/sngl \
lib/brand/solaris10 \
lib/crypt_modules \
lib/extendedFILE \
@@ -463,7 +464,6 @@ COMMON_SUBDIRS = \
lib/pam_modules \
lib/passwdutil \
lib/pkcs11 \
- lib/print \
lib/raidcfg_plugins \
lib/scsi \
lib/smbsrv \
@@ -488,6 +488,7 @@ i386_SUBDIRS= \
cmd/biosdev \
cmd/rtc \
cmd/ucodeadm \
+ lib/brand/lx \
lib/cfgadm_plugins/sata \
lib/cfgadm_plugins/sbd \
lib/libfdisk
diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master
index 9e71b02407..988f34da46 100644
--- a/usr/src/Makefile.master
+++ b/usr/src/Makefile.master
@@ -155,6 +155,7 @@ JAVADOC= $(JAVA_ROOT)/bin/javadoc
RMIC= $(JAVA_ROOT)/bin/rmic
JAR= $(JAVA_ROOT)/bin/jar
CTFCONVERT= $(ONBLD_TOOLS)/bin/$(MACH)/ctfconvert
+CTFDIFF= $(ONBLD_TOOLS)/bin/$(MACH)/ctfdiff
CTFMERGE= $(ONBLD_TOOLS)/bin/$(MACH)/ctfmerge
CTFSTABS= $(ONBLD_TOOLS)/bin/$(MACH)/ctfstabs
CTFSTRIP= $(ONBLD_TOOLS)/bin/$(MACH)/ctfstrip
@@ -165,8 +166,8 @@ CTFFINDMOD= $(ONBLD_TOOLS)/bin/ctffindmod
XREF= $(ONBLD_TOOLS)/bin/xref
FIND= /usr/bin/find
PERL= /usr/bin/perl
-PERL_VERSION= 5.10.0
-PERL_PKGVERS= -510
+PERL_VERSION= 5.12
+PERL_PKGVERS= -512
PYTHON_26= /usr/bin/python2.6
PYTHON= $(PYTHON_26)
SORT= /usr/bin/sort
@@ -238,7 +239,8 @@ INS.symlink= $(RM) $@; $(SYMLINK) $(INSLINKTARGET) $@
# rebuilds if the baked-in mtime != the mtime of the source file
# (rather than only if it's less than), thus when installing python
# files we must make certain to not adjust the mtime of the source
-# (.py) file.
+# (.py) file. As a part of this we also go through and change the #!
+# line in the python script to that of the actual python we are using.
#
INS.pyfile= $(INS.file); $(TOUCH) -r $< $@
@@ -420,9 +422,17 @@ sparcv9_COPTFLAG= -xO3
i386_COPTFLAG= -O
amd64_COPTFLAG= -xO3
+# This would normally be added by cw(1) but cannot be while we want to support
+# Both GCC 3.x and GCC 4.x
+$(__GNUC4)$(MACH)_COPTFLAG += -_gcc=-fno-inline-small-functions \
+ -_gcc=-fno-inline-functions-called-once
+$(__GNUC4)$(MACH64)_COPTFLAG += -_gcc=-fno-inline-small-functions \
+ -_gcc=-fno-inline-functions-called-once
+
COPTFLAG= $($(MACH)_COPTFLAG)
COPTFLAG64= $($(MACH64)_COPTFLAG)
+
# When -g is used, the compiler globalizes static objects
# (gives them a unique prefix). Disable that.
CNOGLOBAL= -W0,-noglobal
@@ -1090,9 +1100,16 @@ PKGPUBLISHER_NONREDIST= on-extra
$(RM) $@;
$(SED) -e "s@TEXT_DOMAIN@\"$(TEXT_DOMAIN)\"@" $< > $@;
$(CHMOD) +x $@
+#
+# You might ask why we aren't using -i for sed here. That's because illumos
+# 1815 hasn't been fixed and sadly we didn't back out something that broke a lot
+# of working code in favor of GNU compatibility.
+#
+SED.py= $(SED) -e 's?^$(POUND_SIGN)!ON_PYTHON_26$$?$(POUND_SIGN)!$(PYTHON_26)?' \
+ -e 's?^$(POUND_SIGN)!ON_PYTHON$$?$(POUND_SIGN)!$(PYTHON)?'
.py:
- $(RM) $@; $(CAT) $< > $@; $(CHMOD) +x $@
+ $(RM) $@; $(SED.py) $< > $@; $(CHMOD) +x $@
.py.pyc:
$(RM) $@
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index 9e278ab15c..79fdd83e6f 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -53,6 +53,9 @@ i386_DIRS= \
/boot/grub/bin \
/platform/i86pc \
/lib/libmvec \
+ /usr/lib/brand/lx \
+ /usr/lib/brand/lx/amd64 \
+ /usr/lib/brand/lx/distros \
/usr/lib/xen \
/usr/lib/xen/bin
@@ -206,6 +209,7 @@ DIRS= \
/root \
/sbin \
/system \
+ /system/boot \
/system/contract \
/system/object \
/tmp \
@@ -248,6 +252,7 @@ DIRS= \
/usr/lib/brand/labeled \
/usr/lib/brand/shared \
/usr/lib/brand/sn1 \
+ /usr/lib/brand/sngl \
/usr/lib/brand/solaris10 \
/usr/lib/class \
/usr/lib/class/FSS \
@@ -530,6 +535,7 @@ DIRS64= \
/usr/lib/$(MACH64) \
/usr/lib/$(MACH64)/gss \
/usr/lib/brand/sn1/$(MACH64) \
+ /usr/lib/brand/sngl/$(MACH64) \
/usr/lib/brand/solaris10/$(MACH64) \
/usr/lib/elfedit/$(MACH64) \
/usr/lib/fm/$(MACH64) \
@@ -592,6 +598,9 @@ SYM.DIRS= \
/usr/ucblib/32 \
/var/ld/32
+i386_SYM.DIRS64= \
+ /usr/lib/brand/lx/64
+
sparc_SYM.DIRS64=
SYM.DIRS64= \
@@ -601,6 +610,7 @@ SYM.DIRS64= \
/lib/secure/64 \
/usr/lib/64 \
/usr/lib/brand/sn1/64 \
+ /usr/lib/brand/sngl/64 \
/usr/lib/brand/solaris10/64 \
/usr/lib/elfedit/64 \
/usr/lib/libp/64 \
@@ -708,7 +718,9 @@ $(BUILD64) $(ROOT)/lib/crypto/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/lib/secure/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/elfedit/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/brand/lx/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/sn1/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/brand/sngl/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/solaris10/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/libp/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/lwp/64:= LINKDEST=$(MACH64)
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index c59e370d47..d16130bd8f 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,7 +21,7 @@
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2012 Joyent, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -29,8 +29,8 @@
include ../Makefile.master
#
-# Note that the commands 'lp', and 'perl' are first in
-# the list, violating alphabetical order. This is because they are very
+# Note that if the 'lp' command were built, it would be first in
+# the list, violating alphabetical order. This is because it is very
# long-running and should be given the most wall-clock time for a
# parallel build.
#
@@ -51,7 +51,6 @@ FIRST_SUBDIRS= \
COMMON_SUBDIRS= \
allocate \
availdevs \
- lp \
perl \
Adm \
abi \
@@ -95,6 +94,7 @@ COMMON_SUBDIRS= \
cmd-crypto \
cmd-inet \
col \
+ column \
compress \
consadm \
coreadm \
@@ -104,6 +104,7 @@ COMMON_SUBDIRS= \
crypt \
csh \
csplit \
+ ctfdiff \
ctrun \
ctstat \
ctwatch \
@@ -187,7 +188,6 @@ COMMON_SUBDIRS= \
groups \
grpck \
gss \
- hal \
halt \
head \
hostid \
@@ -225,7 +225,6 @@ COMMON_SUBDIRS= \
kvmstat \
last \
lastcomm \
- latencytop \
ldap \
ldapcachemgr \
lgrpinfo \
@@ -246,7 +245,6 @@ COMMON_SUBDIRS= \
look \
ls \
luxadm \
- lvm \
mach \
machid \
mail \
@@ -282,6 +280,7 @@ COMMON_SUBDIRS= \
news \
newtask \
nice \
+ nicstat \
nl \
nlsadmin \
nohup \
@@ -314,7 +313,6 @@ COMMON_SUBDIRS= \
plockstat \
pr \
prctl \
- print \
printf \
priocntl \
profiles \
@@ -332,7 +330,6 @@ COMMON_SUBDIRS= \
pwck \
pwconv \
pwd \
- pyzfs \
raidctl \
ramdiskadm \
rcap \
@@ -437,6 +434,8 @@ COMMON_SUBDIRS= \
valtools \
vgrind \
vi \
+ vndadm \
+ vndstat \
volcheck \
volrmmount \
vrrpadm \
@@ -503,7 +502,6 @@ sparc_SUBDIRS= \
# (see previous comment about 'lp'.)
#
MSGSUBDIRS= \
- lp \
abi \
acctadm \
allocate \
@@ -615,7 +613,6 @@ MSGSUBDIRS= \
logins \
ls \
luxadm \
- lvm \
mailx \
man \
mesg \
@@ -645,7 +642,6 @@ MSGSUBDIRS= \
power \
pr \
praudit \
- print \
profiles \
projadd \
projects \
@@ -656,7 +652,6 @@ MSGSUBDIRS= \
ptools \
pwconv \
pwd \
- pyzfs \
raidctl \
ramdiskadm \
rcap \
diff --git a/usr/src/cmd/Makefile.check b/usr/src/cmd/Makefile.check
index 96fea6b370..f442650fca 100644
--- a/usr/src/cmd/Makefile.check
+++ b/usr/src/cmd/Makefile.check
@@ -117,7 +117,6 @@ MANIFEST_SUBDIRS= \
krb5/krb5kdc \
krb5/kwarn \
krb5/slave \
- lp/cmd/lpsched \
lvm/rpc.mdcommd \
lvm/rpc.metad \
lvm/rpc.metamedd \
@@ -126,8 +125,6 @@ MANIFEST_SUBDIRS= \
lvm/util \
picl/picld \
pools/poold \
- print/bsd-sysv-commands \
- print/ppdmgr \
rcap/rcapd \
rpcsvc/rpc.bootparamd \
sendmail/lib \
diff --git a/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel b/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
index c62e339953..49151907eb 100644
--- a/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
+++ b/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
@@ -18,6 +18,7 @@
# CDDL HEADER END
#
# Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# socket configuration information
#
@@ -52,3 +53,6 @@
29 4 1 /dev/spdsock
31 1 0 trill
+
+ 33 1 0 lx_netlink
+ 33 4 0 lx_netlink
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
index 8e9e153b21..9f546bcad9 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -112,7 +113,9 @@ ipmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
goto fail;
}
- /* check for solaris.network.interface.config authorization */
+ /*
+ * if not root, check for solaris.network.interface.config authorization
+ */
if (infop->idi_set) {
uid_t uid;
struct passwd pwd;
@@ -124,24 +127,32 @@ ipmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
goto fail;
}
uid = ucred_getruid(cred);
+ ucred_free(cred);
if ((int)uid < 0) {
err = errno;
ipmgmt_log(LOG_ERR, "Could not get user id.");
goto fail;
}
- if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) ==
- NULL) {
- err = errno;
- ipmgmt_log(LOG_ERR, "Could not get password entry.");
- goto fail;
- }
- if (chkauthattr(NETWORK_INTERFACE_CONFIG_AUTH,
- pwd.pw_name) != 1) {
- err = EPERM;
- ipmgmt_log(LOG_ERR, "Not authorized for operation.");
- goto fail;
+
+ /*
+ * Branded zones may have different auth, but root always
+ * allowed.
+ */
+ if (uid != 0) {
+ if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) == NULL) {
+ err = errno;
+ ipmgmt_log(LOG_ERR,
+ "Could not get password entry.");
+ goto fail;
+ }
+ if (chkauthattr(NETWORK_INTERFACE_CONFIG_AUTH,
+ pwd.pw_name) != 1) {
+ err = EPERM;
+ ipmgmt_log(LOG_ERR,
+ "Not authorized for operation.");
+ goto fail;
+ }
}
- ucred_free(cred);
}
/* individual handlers take care of calling door_return */
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt b/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
index 77b6be9f54..d5812793d4 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
@@ -21,6 +21,7 @@
#
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
# This daemon stores address object to logical interface number mappings
# (among other things) and reads/writes from/to ipmgmtd data store.
@@ -38,14 +39,16 @@ fi
# When the non-global shared-IP stack zone boots, it tries to bring up this
# service as well. If we don't start a background process and simply exit the
# service, the service will go into maintenance mode and so will all it's
-# dependents.
+# dependents. Ideally we would simply exit with SMF_EXIT_NODAEMON, but since
+# this method is also used in an S10C zone, where support for SMF_EXIT_NODAEMON
+# does not exist, we have to stick around.
#
# In S10C zone (where this script is also used) smf_isnonglobalzone
# function is unavailable in smf_include.sh
#
if [ `/sbin/zonename` != global ]; then
if [ `/sbin/zonename -t` = shared ]; then
- (while true ; do sleep 3600 ; done) &
+ (while true ; do sleep 3600 ; done) &
exit $SMF_EXIT_OK
fi
fi
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
index 78da07aebf..9a710f9125 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*
* REQUESTING state of the client state machine.
*/
@@ -38,6 +39,7 @@
#include <dhcp_hostconf.h>
#include <dhcpagent_util.h>
#include <dhcpmsg.h>
+#include <strings.h>
#include "states.h"
#include "util.h"
@@ -641,8 +643,24 @@ accept_v4_acknak(dhcp_smach_t *dsmp, PKT_LIST *plp)
stop_pkt_retransmission(dsmp);
if (*plp->opts[CD_DHCP_TYPE]->value == NAK) {
- dhcpmsg(MSG_WARNING, "accept_v4_acknak: NAK on interface %s",
- dsmp->dsm_name);
+ char saddr[18];
+
+ saddr[0] = '\0';
+ if (plp->opts[CD_SERVER_ID] != NULL &&
+ plp->opts[CD_SERVER_ID]->len == sizeof (struct in_addr)) {
+ struct in_addr t_server;
+
+ bcopy(plp->opts[CD_SERVER_ID]->value, &t_server,
+ plp->opts[CD_SERVER_ID]->len);
+ (void) strlcpy(saddr, inet_ntoa(t_server),
+ sizeof (saddr));
+ }
+
+ dhcpmsg(MSG_WARNING, "accept_v4_acknak: NAK on interface %s "
+ "from %s %s",
+ dsmp->dsm_name,
+ inet_ntoa(plp->pktfrom.v4.sin_addr), saddr);
+
dsmp->dsm_bad_offers++;
free_pkt_entry(plp);
dhcp_restart(dsmp);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
index ce771e3188..173a8f8325 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
@@ -1485,7 +1485,7 @@ mDNSlocal const mDNSu8 *getOptRdata(const mDNSu8 *ptr, const mDNSu8 *const limit
ptr += sizeof(mDNSs32);
nread += sizeof(mDNSs32);
}
- else { LogMsg("ERROR: getOptRdata - unknown opt %d", opt->opt); return mDNSNULL; }
+ else { return mDNSNULL; }
opt++; // increment pointer into rdatabody
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
index e93209c9b6..fb8c1cdad6 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
@@ -25,7 +25,7 @@
include ../Makefile.com
PROG= p12split
-LDLIBS += -lwanboot -linetutil -lwanbootutil -lcrypto
+LDLIBS += -lwanboot -linetutil -lwanbootutil -lsunw_crypto
CPPFLAGS += -I$(CMNCRYPTDIR)
all: $(PROG)
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
index f4a1f548b7..acad766bd3 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
@@ -25,7 +25,7 @@
include ../Makefile.com
PROG = wanboot-cgi
-LDLIBS += -lgen -lnsl -lwanbootutil -lnvpair -lwanboot -lcrypto
+LDLIBS += -lgen -lnsl -lwanbootutil -lnvpair -lwanboot -lsunw_crypto
CPPFLAGS += -I$(CMNCRYPTDIR)
all: $(PROG)
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
index 1b82adef80..a743bed065 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
@@ -33,7 +33,7 @@ include ../../../Makefile.cmd
ROOTMANIFESTDIR = $(ROOTSVCNETWORK)
LDLIBS += -ldladm -ldlpi
-all install := LDLIBS += -lcrypto
+all install := LDLIBS += -lsunw_crypto
LINTFLAGS += -u
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
index 71a2fc9853..be826baba2 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
@@ -21,10 +21,9 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -46,6 +45,7 @@
#include <libscf.h>
#include <libscf_priv.h>
#include <libuutil.h>
+#include <ifaddrs.h>
/*
* This program moves routing management under SMF. We do this by giving
@@ -2335,8 +2335,8 @@ out:
/*
*
- * Return the number of IPv6 addresses configured. This answers the
- * generic question, "is IPv6 configured?". We only start in.ndpd if IPv6
+ * Return the number of non-loopback IPv6 addresses configured. This answers
+ * the generic question, "is IPv6 configured?". We only start in.ndpd if IPv6
* is configured, and we also only enable IPv6 routing daemons if IPv6 is
* enabled.
*/
@@ -2344,28 +2344,24 @@ static int
ra_numv6intfs(void)
{
static int num = -1;
- int ipsock;
- struct lifnum lifn;
+ int cnt;
+ struct ifaddrs *ifp_head, *ifp;
if (num != -1)
return (num);
- if ((ipsock = socket(PF_INET6, SOCK_DGRAM, 0)) == -1) {
- (void) fprintf(stderr,
- gettext("%1$s: unable to open %2$s: %3$s\n"),
- myname, IP_DEV_NAME, strerror(errno));
+ if (getifaddrs(&ifp_head) < 0)
return (0);
- }
- lifn.lifn_family = AF_INET6;
- lifn.lifn_flags = 0;
- if (ioctl(ipsock, SIOCGLIFNUM, &lifn) == -1) {
- (void) close(ipsock);
- return (0);
+ cnt = 0;
+ for (ifp = ifp_head; ifp; ifp = ifp->ifa_next) {
+ if (!(ifp->ifa_flags & IFF_LOOPBACK) &&
+ (ifp->ifa_flags & IFF_IPV6))
+ cnt++;
}
- (void) close(ipsock);
- return (num = lifn.lifn_count);
+ freeifaddrs(ifp_head);
+ return (num = cnt);
}
/*
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
index 1d408bccba..e285aa09d3 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
@@ -45,6 +45,7 @@ SRCS= $(OBJS:.o=.c)
HDRS= snoop.h snoop_mip.h at.h snoop_ospf.h snoop_ospf6.h
include ../../../Makefile.cmd
+include ../../../Makefile.ctf
CPPFLAGS += -I. -I$(SRC)/common/net/dhcp \
-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
index 097dd6ee90..6d586ab9b5 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
@@ -121,6 +121,7 @@ main(int argc, char **argv)
char *output_area;
int nbytes;
char *datalink = NULL;
+ char *zonename = NULL;
dlpi_handle_t dh;
names[0] = '\0';
@@ -227,7 +228,7 @@ main(int argc, char **argv)
}
(void) setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
- while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz"))
+ while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz:Z"))
!= EOF) {
switch (c) {
case 'a':
@@ -348,8 +349,11 @@ main(int argc, char **argv)
case 'U':
Uflg = B_TRUE;
break;
-#ifdef DEBUG
case 'z':
+ zonename = optarg;
+ break;
+#ifdef DEBUG
+ case 'Z':
zflg = B_TRUE;
break;
#endif /* DEBUG */
@@ -371,7 +375,7 @@ main(int argc, char **argv)
* requested was chosen, but that's too hard.
*/
if (!icapfile) {
- use_kern_pf = open_datalink(&dh, datalink);
+ use_kern_pf = open_datalink(&dh, datalink, zonename);
} else {
use_kern_pf = B_FALSE;
cap_open_read(icapfile);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
index e4f182572b..40cefa2c59 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
@@ -182,7 +182,7 @@ extern void cap_open_read(const char *);
extern void cap_open_write(const char *);
extern void cap_read(int, int, int, void (*)(), int);
extern void cap_close(void);
-extern boolean_t open_datalink(dlpi_handle_t *, const char *);
+extern boolean_t open_datalink(dlpi_handle_t *, const char *, const char *);
extern void init_datalink(dlpi_handle_t, ulong_t, ulong_t, struct timeval *,
struct Pf_ext_packetfilt *);
extern void net_read(dlpi_handle_t, size_t, int, void (*)(), int);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
index ab6bc292ac..54fbdc844b 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
@@ -114,7 +114,7 @@ select_datalink(const char *linkname, void *arg)
* about the datalink useful for building the proper packet filters.
*/
boolean_t
-open_datalink(dlpi_handle_t *dhp, const char *linkname)
+open_datalink(dlpi_handle_t *dhp, const char *linkname, const char *zonename)
{
int retval;
int flags = DLPI_PASSIVE | DLPI_RAW;
@@ -122,6 +122,9 @@ open_datalink(dlpi_handle_t *dhp, const char *linkname)
dlpi_info_t dlinfo;
if (linkname == NULL) {
+ if (zonename != NULL)
+ pr_err("a datalink must be specified with a zone name");
+
/*
* Select a datalink to use by default. Prefer datalinks that
* are plumbed by IP.
@@ -145,7 +148,8 @@ open_datalink(dlpi_handle_t *dhp, const char *linkname)
flags |= DLPI_DEVIPNET;
if (Iflg || strcmp(linkname, "lo0") == 0)
flags |= DLPI_IPNETINFO;
- if ((retval = dlpi_open(linkname, dhp, flags)) != DLPI_SUCCESS) {
+ if ((retval = dlpi_open_zone(linkname, zonename, dhp,
+ flags)) != DLPI_SUCCESS) {
pr_err("cannot open \"%s\": %s", linkname,
dlpi_strerror(retval));
}
diff --git a/usr/src/cmd/column/Makefile b/usr/src/cmd/column/Makefile
new file mode 100644
index 0000000000..ab4cf3390e
--- /dev/null
+++ b/usr/src/cmd/column/Makefile
@@ -0,0 +1,43 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2013 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG=column
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/column/THIRDPARTYLICENSE b/usr/src/cmd/column/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..a80f56cb43
--- /dev/null
+++ b/usr/src/cmd/column/THIRDPARTYLICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 1989, 1993, 1994
+ The Regents of the University of California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+ *
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/usr/src/cmd/column/THIRDPARTYLICENSE.descrip b/usr/src/cmd/column/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..42051a2982
--- /dev/null
+++ b/usr/src/cmd/column/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+PORTIONS OF COLUMN COMMAND FUNCTIONALITY
diff --git a/usr/src/cmd/column/column.c b/usr/src/cmd/column/column.c
new file mode 100644
index 0000000000..4f9a3c81a6
--- /dev/null
+++ b/usr/src/cmd/column/column.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Portions Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1989, 1993, 1994\n\
+ The Regents of the University of California. All rights reserved.\n";
+#endif
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include <sys/termios.h>
+
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define TAB 8
+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */
+
+static void c_columnate(void);
+static void input(FILE *);
+static void maketbl(void);
+static void print(void);
+static void r_columnate(void);
+static void usage(void);
+static int width(const wchar_t *);
+
+static int termwidth = 80; /* default terminal width */
+
+static int entries; /* number of records */
+static int eval; /* exit value */
+static int maxlength; /* longest record */
+static wchar_t **list; /* array of pointers to records */
+static const wchar_t *separator = L"\t "; /* field separator for table option */
+
+int
+main(int argc, char **argv)
+{
+ struct winsize win;
+ FILE *fp;
+ int ch, tflag, xflag;
+ char *p;
+ const char *src;
+ wchar_t *newsep;
+ size_t seplen;
+
+ (void) setlocale(LC_ALL, "");
+
+ if (ioctl(1, TIOCGWINSZ, &win) == -1 || !win.ws_col) {
+ if ((p = getenv("COLUMNS")))
+ termwidth = atoi(p);
+ } else
+ termwidth = win.ws_col;
+
+ tflag = xflag = 0;
+ while ((ch = getopt(argc, argv, "c:s:tx")) != -1)
+ switch (ch) {
+ case 'c':
+ termwidth = atoi(optarg);
+ break;
+ case 's':
+ src = optarg;
+ seplen = mbsrtowcs(NULL, &src, 0, NULL);
+ if (seplen == (size_t)-1)
+ err(1, "bad separator");
+ newsep = malloc((seplen + 1) * sizeof (wchar_t));
+ if (newsep == NULL)
+ err(1, NULL);
+ (void) mbsrtowcs(newsep, &src, seplen + 1, NULL);
+ separator = newsep;
+ break;
+ case 't':
+ tflag = 1;
+ break;
+ case 'x':
+ xflag = 1;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (!*argv)
+ input(stdin);
+ else for (; *argv; ++argv)
+ if ((fp = fopen(*argv, "rF"))) {
+ input(fp);
+ (void) fclose(fp);
+ } else {
+ warn("%s", *argv);
+ eval = 1;
+ }
+
+ if (!entries)
+ exit(eval);
+
+ maxlength = roundup(maxlength + 1, TAB);
+ if (tflag)
+ maketbl();
+ else if (maxlength >= termwidth)
+ print();
+ else if (xflag)
+ c_columnate();
+ else
+ r_columnate();
+ exit(eval);
+
+ /*NOTREACHED*/
+ return (eval);
+}
+
+static void
+c_columnate(void)
+{
+ int chcnt, col, cnt, endcol, numcols;
+ wchar_t **lp;
+
+ numcols = termwidth / maxlength;
+ endcol = maxlength;
+ for (chcnt = col = 0, lp = list; ; ++lp) {
+ (void) wprintf(L"%ls", *lp);
+ chcnt += width(*lp);
+ if (!--entries)
+ break;
+ if (++col == numcols) {
+ chcnt = col = 0;
+ endcol = maxlength;
+ (void) putwchar('\n');
+ } else {
+ while ((cnt = roundup(chcnt + 1, TAB)) <= endcol) {
+ (void) putwchar('\t');
+ chcnt = cnt;
+ }
+ endcol += maxlength;
+ }
+ }
+ if (chcnt)
+ (void) putwchar('\n');
+}
+
+static void
+r_columnate(void)
+{
+ int base, chcnt, cnt, col, endcol, numcols, numrows, row;
+
+ numcols = termwidth / maxlength;
+ numrows = entries / numcols;
+ if (entries % numcols)
+ ++numrows;
+
+ for (row = 0; row < numrows; ++row) {
+ endcol = maxlength;
+ for (base = row, chcnt = col = 0; col < numcols; ++col) {
+ (void) wprintf(L"%ls", list[base]);
+ chcnt += width(list[base]);
+ if ((base += numrows) >= entries)
+ break;
+ while ((cnt = roundup(chcnt + 1, TAB)) <= endcol) {
+ (void) putwchar('\t');
+ chcnt = cnt;
+ }
+ endcol += maxlength;
+ }
+ (void) putwchar('\n');
+ }
+}
+
+static void
+print(void)
+{
+ int cnt;
+ wchar_t **lp;
+
+ for (cnt = entries, lp = list; cnt--; ++lp)
+ (void) wprintf(L"%ls\n", *lp);
+}
+
+typedef struct _tbl {
+ wchar_t **list;
+ int cols, *len;
+} TBL;
+#define DEFCOLS 25
+
+static void
+maketbl(void)
+{
+ TBL *t;
+ int coloff, cnt;
+ wchar_t *p, **lp;
+ int *lens, maxcols;
+ TBL *tbl;
+ wchar_t **cols;
+ wchar_t *last;
+
+ if ((t = tbl = calloc(entries, sizeof (TBL))) == NULL)
+ err(1, (char *)NULL);
+ if ((cols = calloc((maxcols = DEFCOLS), sizeof (*cols))) == NULL)
+ err(1, (char *)NULL);
+ if ((lens = calloc(maxcols, sizeof (int))) == NULL)
+ err(1, (char *)NULL);
+ for (cnt = 0, lp = list; cnt < entries; ++cnt, ++lp, ++t) {
+ for (coloff = 0, p = *lp;
+ (cols[coloff] = wcstok(p, separator, &last));
+ p = NULL)
+ if (++coloff == maxcols) {
+ if (!(cols = realloc(cols, ((uint_t)maxcols +
+ DEFCOLS) * sizeof (char *))) ||
+ !(lens = realloc(lens,
+ ((uint_t)maxcols + DEFCOLS) *
+ sizeof (int))))
+ err(1, NULL);
+ (void) memset((char *)lens + maxcols *
+ sizeof (int), 0, DEFCOLS * sizeof (int));
+ maxcols += DEFCOLS;
+ }
+ if ((t->list = calloc(coloff, sizeof (*t->list))) == NULL)
+ err(1, (char *)NULL);
+ if ((t->len = calloc(coloff, sizeof (int))) == NULL)
+ err(1, (char *)NULL);
+ for (t->cols = coloff; --coloff >= 0; ) {
+ t->list[coloff] = cols[coloff];
+ t->len[coloff] = width(cols[coloff]);
+ if (t->len[coloff] > lens[coloff])
+ lens[coloff] = t->len[coloff];
+ }
+ }
+ for (cnt = 0, t = tbl; cnt < entries; ++cnt, ++t) {
+ for (coloff = 0; coloff < t->cols - 1; ++coloff)
+ (void) wprintf(L"%ls%*ls", t->list[coloff],
+ lens[coloff] - t->len[coloff] + 2, L" ");
+ (void) wprintf(L"%ls\n", t->list[coloff]);
+ }
+}
+
+#define DEFNUM 1000
+#define MAXLINELEN (LINE_MAX + 1)
+
+static void
+input(FILE *fp)
+{
+ static int maxentry;
+ int len;
+ wchar_t *p, buf[MAXLINELEN];
+
+ if (!list)
+ if ((list = calloc((maxentry = DEFNUM), sizeof (*list))) ==
+ NULL)
+ err(1, (char *)NULL);
+ while (fgetws(buf, MAXLINELEN, fp)) {
+ for (p = buf; *p && iswspace(*p); ++p)
+ ;
+ if (!*p)
+ continue;
+ if (!(p = wcschr(p, L'\n'))) {
+ warnx("line too long");
+ eval = 1;
+ continue;
+ }
+ *p = L'\0';
+ len = width(buf);
+ if (maxlength < len)
+ maxlength = len;
+ if (entries == maxentry) {
+ maxentry += DEFNUM;
+ if (!(list = realloc(list,
+ (uint_t)maxentry * sizeof (*list))))
+ err(1, NULL);
+ }
+ list[entries] = malloc((wcslen(buf) + 1) * sizeof (wchar_t));
+ if (list[entries] == NULL)
+ err(1, NULL);
+ (void) wcscpy(list[entries], buf);
+ entries++;
+ }
+}
+
+/* Like wcswidth(), but ignores non-printing characters. */
+static int
+width(const wchar_t *wcs)
+{
+ int w, cw;
+
+ for (w = 0; *wcs != L'\0'; wcs++)
+ if ((cw = wcwidth(*wcs)) > 0)
+ w += cw;
+ return (w);
+}
+
+static void
+usage(void)
+{
+
+ (void) fprintf(stderr,
+ "usage: column [-tx] [-c columns] [-s sep] [file ...]\n");
+ exit(1);
+}
diff --git a/usr/src/cmd/coreadm/coreadm.xml b/usr/src/cmd/coreadm/coreadm.xml
index 46a4cda17a..28f1e27240 100644
--- a/usr/src/cmd/coreadm/coreadm.xml
+++ b/usr/src/cmd/coreadm/coreadm.xml
@@ -48,14 +48,6 @@
<service_fmri value='svc:/system/filesystem/minimal' />
</dependency>
- <dependency
- name='coreadm_manifest-import'
- type='service'
- grouping='require_all'
- restart_on='none'>
- <service_fmri value='svc:/system/manifest-import:default' />
- </dependency>
-
<instance name='default' enabled='false'>
<exec_method
type='method'
diff --git a/usr/src/cmd/cron/Makefile b/usr/src/cmd/cron/Makefile
index c9ffeadffe..fb05e292a3 100644
--- a/usr/src/cmd/cron/Makefile
+++ b/usr/src/cmd/cron/Makefile
@@ -26,6 +26,7 @@
DEFAULTFILES = cron.dfl
include ../Makefile.cmd
+include ../Makefile.ctf
MANIFEST = cron.xml
@@ -96,20 +97,20 @@ XPG4ATOBJS= $(ATOBJS:%=objs.xpg4/%) $(XPG4OBJS:%=objs.xpg4/%)
XPG6COMMONOBJS= $(COMMONOBJS:%=objs.xpg6/%)
XPG6CTOBJS= $(CRONTABOBJS:%=objs.xpg6/%)
-cron := POBJS = $(CRONOBJS) $(COMMONOBJ2)
-at := POBJS = $(ATOBJS) $(COMMONOBJS)
-at.xpg4 := POBJS = $(XPG4ATOBJS) $(XPG4COMMONOBJS)
-atrm := POBJS = $(ATRMOBJS) $(COMMONOBJS)
-atq := POBJS = $(ATQOBJS) $(COMMONOBJS)
-crontab := POBJS = $(CRONTABOBJS) $(COMMONOBJS)
-crontab.xpg4 := POBJS = $(XPG4CTOBJS) $(XPG4COMMONOBJS)
-crontab.xpg6 := POBJS = $(XPG6CTOBJS) $(XPG6COMMONOBJS)
+cron := OBJS = $(CRONOBJS) $(COMMONOBJ2)
+at := OBJS = $(ATOBJS) $(COMMONOBJS)
+at.xpg4 := OBJS = $(XPG4ATOBJS) $(XPG4COMMONOBJS)
+atrm := OBJS = $(ATRMOBJS) $(COMMONOBJS)
+atq := OBJS = $(ATQOBJS) $(COMMONOBJS)
+crontab := OBJS = $(CRONTABOBJS) $(COMMONOBJS)
+crontab.xpg4 := OBJS = $(XPG4CTOBJS) $(XPG4COMMONOBJS)
+crontab.xpg6 := OBJS = $(XPG6CTOBJS) $(XPG6COMMONOBJS)
CFLAGS += $(CCVERBOSE)
NOBJS= $(CRONOBJS) $(ATOBJS) $(ATRMOBJS1) $(ATQOBJS) $(CRONTABOBJS1) \
$(COMMONOBJS)
-OBJS = $(NOBJS) $(XPG4COMMONOBJS) $(XPG4ATOBJS) $(XPG4CTOBJS) \
+COBJS = $(NOBJS) $(XPG4COMMONOBJS) $(XPG4ATOBJS) $(XPG4CTOBJS) \
$(XPG6COMMONOBJS) $(XPG6CTOBJS) $(GETRESPOBJ)
SRCS = $(NOBJS:%.o=%.c) $(GETRESPSRC)
@@ -157,32 +158,35 @@ all : $(PROG) $(XPG4) $(XPG6) $(SCRIPT) $(XPG4SCRIPT) $(FILES)
install : all $(ROOTPROG) $(ROOTETCDEFAULTFILES) $(ROOTSYMLINK) \
$(ROOTMANIFEST) $(ROOTMETHOD)
-$(PROG) : $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(PROG) : $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
-$(XPG4) : objs.xpg4 $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(XPG4) : objs.xpg4 $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
-$(XPG6) : objs.xpg6 $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(XPG6) : objs.xpg6 $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
objs.xpg6/%.o: %.c
$(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
objs.xpg6:
-@mkdir -p $@
objs.xpg4/%.o: %.c
$(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
objs.xpg4:
-@mkdir -p $@
objs.xpg4/values-xpg4.o: ../../lib/common/common/values-xpg4.c
$(COMPILE.c) -o $@ ../../lib/common/common/values-xpg4.c
+ $(POST_PROCESS_O)
%.o: $(SRC)/common/util/%.c
$(COMPILE.c) $(OUTPUT_OPTION) $<
@@ -219,7 +223,7 @@ $(POFILE): $(POFILES)
$(RM) $@; cat $(POFILES) > $@
clean :
- $(RM) $(OBJS) att1.h att1.c att2.c
+ $(RM) $(COBJS) att1.h att1.c att2.c
lint : lint_SRCS
diff --git a/usr/src/cmd/cron/cron.c b/usr/src/cmd/cron/cron.c
index ab36d09037..33a7373f1e 100644
--- a/usr/src/cmd/cron/cron.c
+++ b/usr/src/cmd/cron/cron.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*
* Copyright 2013 Joshua M. Clulow <josh@sysmgr.org>
- *
+ * Copyright 2013 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Gary Mills
*/
@@ -315,7 +315,8 @@ static int ex(struct event *e);
static void read_dirs(int);
static void mail(char *, char *, int);
static char *next_field(int, int);
-static void readcron(struct usr *, time_t);
+static void readcron(char *, struct usr *, time_t);
+static void readcronfile(FILE *, struct usr *, time_t);
static int next_ge(int, char *);
static void free_if_unused(struct usr *);
static void del_atjob(char *, char *);
@@ -420,7 +421,7 @@ extern void el_delete(void);
static int valid_entry(char *, int);
static struct usr *create_ulist(char *, int);
-static void init_cronevent(char *, int);
+static void init_cronevent(char *, char *);
static void init_atevent(char *, time_t, int, int);
static void update_atevent(struct usr *, char *, time_t, int);
@@ -759,6 +760,18 @@ read_dirs(int first)
time_t tim;
+ if (chdir(SYSCRONDIR) != -1) {
+ cwd = CRON;
+ if ((dir = opendir(".")) != NULL) {
+ while ((dp = readdir(dir)) != NULL) {
+ if (!valid_entry(dp->d_name, CRONEVENT))
+ continue;
+ init_cronevent(SYSCRONDIR, dp->d_name);
+ }
+ (void) closedir(dir);
+ }
+ }
+
if (chdir(CRONDIR) == -1)
crabort(BADCD, REMOVE_FIFO|CONSOLE_MSG);
cwd = CRON;
@@ -767,7 +780,7 @@ read_dirs(int first)
while ((dp = readdir(dir)) != NULL) {
if (!valid_entry(dp->d_name, CRONEVENT))
continue;
- init_cronevent(dp->d_name, first);
+ init_cronevent(CRONDIR, dp->d_name);
}
(void) closedir(dir);
@@ -853,23 +866,18 @@ create_ulist(char *name, int type)
}
void
-init_cronevent(char *name, int first)
+init_cronevent(char *basedir, char *name)
{
struct usr *u;
- if (first) {
+ if ((u = find_usr(name)) == NULL) {
u = create_ulist(name, CRONEVENT);
- readcron(u, 0);
+ readcron(basedir, u, 0);
} else {
- if ((u = find_usr(name)) == NULL) {
- u = create_ulist(name, CRONEVENT);
- readcron(u, 0);
- } else {
- u->ctexists = TRUE;
- rm_ctevents(u);
- el_remove(u->ctid, 0);
- readcron(u, 0);
- }
+ u->ctexists = TRUE;
+ rm_ctevents(u);
+ el_remove(u->ctid, 0);
+ readcron(basedir, u, 0);
}
}
@@ -950,7 +958,7 @@ mod_ctab(char *name, time_t reftime)
(void) strcpy(u->home, pw->pw_dir);
u->uid = pw->pw_uid;
u->gid = pw->pw_gid;
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
} else {
u->uid = pw->pw_uid;
u->gid = pw->pw_gid;
@@ -973,7 +981,7 @@ mod_ctab(char *name, time_t reftime)
/* user didnt have a crontab last time */
u->ctid = ecid++;
u->ctevents = NULL;
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
return;
}
#ifdef DEBUG
@@ -981,7 +989,7 @@ mod_ctab(char *name, time_t reftime)
#endif
rm_ctevents(u);
el_remove(u->ctid, 0);
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
}
}
@@ -1116,8 +1124,94 @@ update_atevent(struct usr *u, char *name, time_t tim, int jobtype)
static char line[CTLINESIZE]; /* holds a line from a crontab file */
static int cursor; /* cursor for the above line */
+static int
+copyfile(char *name, FILE *dp)
+{
+ FILE *tf;
+
+ if ((tf = fopen(name, "r")) == NULL) {
+ (void) fclose(dp);
+ return (1);
+ }
+
+ while (fgets(line, CTLINESIZE, tf) != NULL) {
+ if (fputs(line, dp) == EOF) {
+ (void) fclose(tf);
+ (void) fclose(dp);
+ return (1);
+ }
+ }
+ (void) fclose(tf);
+
+ return (0);
+}
+
+static void
+readcron(char *basedir, struct usr *u, time_t reftime)
+{
+ char *altpath;
+ struct stat sb;
+ FILE *cf; /* cf will be a user's crontab file */
+ char altnamebuf[PATH_MAX];
+ char namebuf[PATH_MAX];
+
+ if (strcmp(basedir, SYSCRONDIR) == 0)
+ altpath = CRONDIR;
+ else
+ altpath = SYSCRONDIR;
+
+ if (snprintf(altnamebuf, sizeof (altnamebuf), "%s/%s", altpath,
+ u->name) >= sizeof (altnamebuf))
+ return;
+
+ if (snprintf(namebuf, sizeof (namebuf), "%s/%s", basedir, u->name) >=
+ sizeof (namebuf))
+ return;
+
+ if (stat(altnamebuf, &sb) != -1) {
+ /*
+ * There is a secondary crontab for this user. We need to
+ * merge the two crontabs into a temporary file for loading.
+ */
+ int fd;
+ char tmpfile[PATH_MAX];
+
+ (void) strlcpy(tmpfile, "/tmp/cronXXXXXX", sizeof (tmpfile));
+ if ((fd = mkstemp(tmpfile)) == -1)
+ return;
+
+ unlink(tmpfile);
+ if ((cf = fdopen(fd, "w+")) == NULL) {
+ close(fd);
+ return;
+ }
+
+ if (copyfile(namebuf, cf) != 0)
+ return;
+
+ if (copyfile(altnamebuf, cf) != 0)
+ return;
+
+ (void) fflush(cf);
+ rewind(cf);
+
+ } else {
+ /*
+ * Only one crontab, open it directly.
+ */
+ if ((cf = fopen(namebuf, "r")) == NULL) {
+ mail(u->name, NOREAD, ERR_UNIXERR);
+ return;
+ }
+ }
+
+ readcronfile(cf, u, reftime);
+
+ (void) fclose(cf);
+}
+
static void
-readcron(struct usr *u, time_t reftime)
+readcronfile(FILE *cf, struct usr *u, time_t reftime)
{
/*
* readcron reads in a crontab file for a user (u). The list of
@@ -1125,12 +1219,9 @@ readcron(struct usr *u, time_t reftime)
* this list. Each event is also entered into the main event
* list.
*/
- FILE *cf; /* cf will be a user's crontab file */
struct event *e;
int start;
unsigned int i;
- char namebuf[PATH_MAX];
- char *pname;
struct shared *tz = NULL;
struct shared *home = NULL;
struct shared *shell = NULL;
@@ -1138,19 +1229,6 @@ readcron(struct usr *u, time_t reftime)
/* read the crontab file */
cte_init(); /* Init error handling */
- if (cwd != CRON) {
- if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
- CRONDIR, u->name) >= sizeof (namebuf)) {
- return;
- }
- pname = namebuf;
- } else {
- pname = u->name;
- }
- if ((cf = fopen(pname, "r")) == NULL) {
- mail(u->name, NOREAD, ERR_UNIXERR);
- return;
- }
while (fgets(line, CTLINESIZE, cf) != NULL) {
char *tmp;
/* process a line of a crontab file */
@@ -1279,7 +1357,6 @@ again:
#endif
}
cte_sendmail(u->name); /* mail errors if any to user */
- (void) fclose(cf);
rel_shared(tz);
rel_shared(shell);
rel_shared(home);
@@ -2442,6 +2519,9 @@ ex(struct event *e)
} else {
r = audit_cron_session(e->u->name, CRONDIR,
e->u->uid, e->u->gid, NULL);
+ if (r != 0)
+ r = audit_cron_session(e->u->name, SYSCRONDIR,
+ e->u->uid, e->u->gid, NULL);
}
if (r != 0) {
msg("cron audit problem. job failed (%s) for user %s",
diff --git a/usr/src/cmd/cron/cron.h b/usr/src/cmd/cron/cron.h
index a76016299c..93e21e7b41 100644
--- a/usr/src/cmd/cron/cron.h
+++ b/usr/src/cmd/cron/cron.h
@@ -71,6 +71,9 @@ struct message {
char logname[LLEN];
};
+/* anything below here can be changed */
+
+#define SYSCRONDIR "/etc/cron.d/crontabs"
#define CRONDIR "/var/spool/cron/crontabs"
#define ATDIR "/var/spool/cron/atjobs"
#define ACCTFILE "/var/cron/log"
diff --git a/usr/src/cmd/cron/crontab.c b/usr/src/cmd/cron/crontab.c
index 327a71388b..cdb4e1e394 100644
--- a/usr/src/cmd/cron/crontab.c
+++ b/usr/src/cmd/cron/crontab.c
@@ -71,7 +71,7 @@
"usage:\n" \
"\tcrontab [file]\n" \
"\tcrontab -e [username]\n" \
- "\tcrontab -l [username]\n" \
+ "\tcrontab -l [-g] [username]\n" \
"\tcrontab -r [username]"
#define INVALIDUSER "you are not a valid user (no entry in /etc/passwd)."
#define NOTALLOWED "you are not authorized to use cron. Sorry."
@@ -120,6 +120,7 @@ main(int argc, char **argv)
int c, r;
int rflag = 0;
int lflag = 0;
+ int gflag = 0;
int eflag = 0;
int errflg = 0;
char *pp;
@@ -151,11 +152,14 @@ main(int argc, char **argv)
exit(1);
}
- while ((c = getopt(argc, argv, "elr")) != EOF)
+ while ((c = getopt(argc, argv, "eglr")) != EOF)
switch (c) {
case 'e':
eflag++;
break;
+ case 'g':
+ gflag++;
+ break;
case 'l':
lflag++;
break;
@@ -170,6 +174,9 @@ main(int argc, char **argv)
if (eflag + lflag + rflag > 1)
errflg++;
+ if (gflag && !lflag)
+ errflg++;
+
argc -= optind;
argv += optind;
if (errflg || argc > 1)
@@ -236,12 +243,27 @@ main(int argc, char **argv)
exit(0);
}
if (lflag) {
- if ((fp = fopen(cf, "r")) == NULL)
- crabort(BADOPEN);
- while (fgets(line, CTLINESIZE, fp) != NULL)
- fputs(line, stdout);
- fclose(fp);
- exit(0);
+ char sysconf[PATH_MAX];
+
+ if (gflag) {
+ if (snprintf(sysconf, sizeof (sysconf), "%s/%s",
+ SYSCRONDIR, login) < sizeof (sysconf) &&
+ (fp = fopen(sysconf, "r")) != NULL) {
+ while (fgets(line, CTLINESIZE, fp) != NULL)
+ fputs(line, stdout);
+ fclose(fp);
+ exit(0);
+ } else {
+ crabort(BADOPEN);
+ }
+ } else {
+ if ((fp = fopen(cf, "r")) == NULL)
+ crabort(BADOPEN);
+ while (fgets(line, CTLINESIZE, fp) != NULL)
+ fputs(line, stdout);
+ fclose(fp);
+ exit(0);
+ }
}
if (eflag) {
if ((fp = fopen(cf, "r")) == NULL) {
diff --git a/usr/src/cmd/ctfdiff/Makefile b/usr/src/cmd/ctfdiff/Makefile
new file mode 100644
index 0000000000..5779f280bc
--- /dev/null
+++ b/usr/src/cmd/ctfdiff/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG= ctfdiff
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/ctfdiff/ctfdiff.c b/usr/src/cmd/ctfdiff/ctfdiff.c
new file mode 100644
index 0000000000..9d4c65e698
--- /dev/null
+++ b/usr/src/cmd/ctfdiff/ctfdiff.c
@@ -0,0 +1,220 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * diff two CTF containers
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <strings.h>
+#include <libctf.h>
+
+#define CTFDIFF_EXIT_SIMILAR 0
+#define CTFDIFF_EXIT_DIFFERENT 1
+#define CTFDIFF_EXIT_ERROR 2
+
+static const char *g_iname;
+static ctf_file_t *g_ifp;
+static const char *g_oname;
+static ctf_file_t *g_ofp;
+static char **g_typelist = NULL;
+static int g_nexttype = 0;
+static int g_ntypes = 0;
+static boolean_t g_onlydiff = B_FALSE;
+static boolean_t g_different = B_FALSE;
+
+static const char *
+fp_to_name(ctf_file_t *fp)
+{
+ if (fp == g_ifp)
+ return (g_iname);
+ if (fp == g_ofp)
+ return (g_oname);
+ return (NULL);
+}
+
+/* ARGSUSED */
+static void
+diff_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t similar, ctf_file_t *ofp,
+ ctf_id_t oid, void *arg)
+{
+ if (similar == B_TRUE)
+ return;
+
+ /*
+ * Check if it's the type the user cares about.
+ */
+ if (g_nexttype != 0) {
+ int i;
+ char namebuf[256];
+
+ if (ctf_type_name(ifp, iid, namebuf, sizeof (namebuf)) ==
+ NULL) {
+ (void) fprintf(stderr, "failed to obtain the name "
+ "of type %ld from %s: %s\n",
+ iid, fp_to_name(ifp),
+ ctf_errmsg(ctf_errno(ifp)));
+ exit(CTFDIFF_EXIT_ERROR);
+ }
+
+ for (i = 0; i < g_nexttype; i++) {
+ if (strcmp(g_typelist[i], namebuf) == 0)
+ break;
+ }
+
+ if (i == g_nexttype)
+ return;
+ }
+
+ g_different = B_TRUE;
+
+ if (g_onlydiff == B_TRUE)
+ return;
+
+ (void) printf("fp %s type %ld ", fp_to_name(ifp), iid);
+ if (similar == B_TRUE) {
+ (void) printf("is the same as fp %s type %ld\n",
+ fp_to_name(ofp), oid);
+ } else {
+ (void) printf("is different\n");
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ ctf_diff_flag_t flags = 0;
+ int err, c;
+ ctf_file_t *ifp, *ofp;
+ ctf_diff_t *cdp;
+ ctf_file_t *pifp = NULL;
+ ctf_file_t *pofp = NULL;
+
+ while ((c = getopt(argc, argv, "qIp:P:T:")) != -1) {
+ switch (c) {
+ case 'q':
+ g_onlydiff = B_TRUE;
+ break;
+ case 'p':
+ pifp = ctf_open(optarg, &err);
+ if (pifp == NULL) {
+ (void) fprintf(stderr, "ctfdiff: failed to "
+ "open parent input container %s: %s\n",
+ optarg, ctf_errmsg(err));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ break;
+ case 'I':
+ flags |= CTF_DIFF_F_IGNORE_INTNAMES;
+ break;
+ case 'P':
+ pofp = ctf_open(optarg, &err);
+ if (pofp == NULL) {
+ (void) fprintf(stderr, "ctfdiff: failed to "
+ "open parent output container %s: %s\n",
+ optarg, ctf_errmsg(err));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ break;
+ case 'T':
+ if (g_nexttype == g_ntypes) {
+ if (g_ntypes == 0)
+ g_ntypes = 16;
+ else
+ g_ntypes *= 2;
+ g_typelist = realloc(g_typelist,
+ sizeof (char *) * g_ntypes);
+ if (g_typelist == NULL) {
+ (void) fprintf(stderr, "ctfdiff: "
+ "failed to allocate memory for "
+ "the %dth -t option: %s\n",
+ g_nexttype + 1, strerror(errno));
+ }
+ }
+ g_typelist[g_nexttype] = optarg;
+ g_nexttype++;
+ }
+ }
+
+ argc -= optind - 1;
+ argv += optind - 1;
+
+ if (argc != 3) {
+ (void) fprintf(stderr, "usage: ctfdiff [-qI] [-p parent] "
+ "[-P parent] [-T type]... input output");
+ return (CTFDIFF_EXIT_ERROR);
+ }
+
+ ifp = ctf_open(argv[1], &err);
+ if (ifp == NULL) {
+ (void) fprintf(stderr, "ctfdiff: failed to open %s: %s\n",
+ argv[1], ctf_errmsg(err));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ if (pifp != NULL) {
+ err = ctf_import(ifp, pifp);
+ if (err != 0) {
+ (void) fprintf(stderr, "ctfdiff: failed to set parent "
+ "container: %s\n", ctf_errmsg(ctf_errno(pifp)));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ }
+ g_iname = argv[1];
+ g_ifp = ifp;
+
+ ofp = ctf_open(argv[2], &err);
+ if (ofp == NULL) {
+ (void) fprintf(stderr, "ctfdiff: failed to open %s: %s\n",
+ argv[2], ctf_errmsg(err));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+
+ if (pofp != NULL) {
+ err = ctf_import(ofp, pofp);
+ if (err != 0) {
+ (void) fprintf(stderr, "ctfdiff: failed to set parent "
+ "container: %s\n", ctf_errmsg(ctf_errno(pofp)));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ }
+ g_oname = argv[2];
+ g_ofp = ofp;
+
+ if (ctf_diff_init(ifp, ofp, &cdp) != 0) {
+ (void) fprintf(stderr,
+ "ctfdiff: failed to initialize libctf diff engine: %s\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+
+ if (ctf_diff_setflags(cdp, flags) != 0) {
+ (void) fprintf(stderr,
+ "ctfdiff: failed to set ctfdiff flags: %s\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ }
+
+ err = ctf_diff_types(cdp, diff_cb, NULL);
+ ctf_diff_fini(cdp);
+ if (err == CTF_ERR) {
+ (void) fprintf(stderr, "encountered a libctf error: %s!\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+
+ return (g_different == B_TRUE ? CTFDIFF_EXIT_DIFFERENT :
+ CTFDIFF_EXIT_SIMILAR);
+}
diff --git a/usr/src/cmd/dd/dd.c b/usr/src/cmd/dd/dd.c
index 623850f83e..30f027f85a 100644
--- a/usr/src/cmd/dd/dd.c
+++ b/usr/src/cmd/dd/dd.c
@@ -147,6 +147,7 @@ static off_t iseekn; /* number of input records to seek past */
static off_t oseekn; /* number of output records to seek past */
static unsigned long long count; /* number of input records to copy */
/* (0 = all) */
+static boolean_t ecount; /* explicit count given */
static int trantype; /* BSD or SVr4 compatible EBCDIC */
static char *string; /* command arg pointer */
@@ -571,6 +572,7 @@ main(int argc, char **argv)
if (match("count="))
{
count = number(BIG);
+ ecount = B_TRUE;
continue;
}
if (match("files="))
@@ -1009,8 +1011,7 @@ main(int argc, char **argv)
nstats = 0;
}
- if ((count == 0) || (nifr+nipr < count))
- {
+ if ((count == 0 && ecount == B_FALSE) || (nifr+nipr < count)) {
/* If proceed on error is enabled, zero the input buffer */
if (cflag&NERR)
diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com
index 4df3b00585..1585db2894 100644
--- a/usr/src/cmd/devfsadm/Makefile.com
+++ b/usr/src/cmd/devfsadm/Makefile.com
@@ -67,7 +67,6 @@ LINK_OBJS_CMN = \
fssnap_link.o \
sgen_link.o \
smp_link.o \
- md_link.o \
dtrace_link.o \
vscan_link.o \
zfs_link.o \
diff --git a/usr/src/cmd/devfsadm/devlink.tab.sh b/usr/src/cmd/devfsadm/devlink.tab.sh
index 6724fcb573..0267efeb9f 100644
--- a/usr/src/cmd/devfsadm/devlink.tab.sh
+++ b/usr/src/cmd/devfsadm/devlink.tab.sh
@@ -22,8 +22,7 @@
#
# Copyright (c) 1998, 2000 by Sun Microsystems, Inc.
# All rights reserved.
-#
-#ident "%Z%%M% %I% %E% SMI"
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
#
# This is the script that generates the devlink.tab file. It is
# architecture-aware, and dumps different stuff for x86 and sparc.
@@ -34,8 +33,6 @@
#
cat <<EOM
-#ident "%Z%%M% %I% %E% SMI"
-#
# Copyright (c) 1998 by Sun Microsystems, Inc.
#
#
diff --git a/usr/src/cmd/devfsadm/i386/Makefile b/usr/src/cmd/devfsadm/i386/Makefile
index 1f14c93dad..75f2da3436 100644
--- a/usr/src/cmd/devfsadm/i386/Makefile
+++ b/usr/src/cmd/devfsadm/i386/Makefile
@@ -24,8 +24,11 @@
LINK_OBJS_i386 = \
misc_link_i386.o \
+ lx_link_i386.o \
xen_link.o
+lx_link_i386.o lx_link_i386.po lx_link_i386.ln := CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
xen_link.o xen_link.ln xen_link.po := CPPFLAGS += -I$(UTSBASE)/i86xpv
include ../Makefile.com
diff --git a/usr/src/cmd/devfsadm/i386/lx_link_i386.c b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
new file mode 100644
index 0000000000..855f4f7383
--- /dev/null
+++ b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <devfsadm.h>
+#include <strings.h>
+#include <stdio.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_audio.h>
+
+static int lx_ptm(di_minor_t minor, di_node_t node);
+static int lx_audio(di_minor_t minor, di_node_t node);
+static int lx_systrace(di_minor_t minor, di_node_t node);
+
+static devfsadm_create_t lx_create_cbt[] = {
+ { "pseudo", "ddi_pseudo", LX_PTM_DRV,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_ptm },
+ { "pseudo", "ddi_pseudo", LX_AUDIO_DRV,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_audio },
+ { "pseudo", "ddi_pseudo", "lx_systrace",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_systrace },
+};
+
+DEVFSADM_CREATE_INIT_V0(lx_create_cbt);
+
+static int
+lx_ptm(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LX_PTM_MINOR_NODE, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/ptmx", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_audio(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LXA_MINORNAME_DEVCTL, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/audio_devctl", node, minor, 0);
+ if (strcmp(LXA_MINORNAME_DSP, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/dsp", node, minor, 0);
+ if (strcmp(LXA_MINORNAME_MIXER, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/mixer", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_systrace(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "dtrace/provider/%s", mname);
+ (void) devfsadm_mklink(path, node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index b7aef8b00d..70599d6039 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <regex.h>
@@ -108,6 +108,9 @@ static devfsadm_create_t misc_cbt[] = {
"(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^bpf$)",
TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
},
+ { "pseudo", "ddi_pseudo", "inotify",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ },
{ "pseudo", "ddi_pseudo", "ipd",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c
index 904eca536d..fb0bfdd009 100644
--- a/usr/src/cmd/dladm/dladm.c
+++ b/usr/src/cmd/dladm/dladm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -154,6 +155,7 @@ typedef struct show_vnic_state {
dladm_status_t vs_status;
uint32_t vs_flags;
ofmt_handle_t vs_ofmt;
+ char *vs_zonename;
} show_vnic_state_t;
typedef struct show_part_state {
@@ -265,7 +267,7 @@ typedef struct cmd {
static cmd_t cmds[] = {
{ "rename-link", do_rename_link,
- " rename-link <oldlink> <newlink>" },
+ " rename-link [-z zonename] <oldlink> <newlink>" },
{ "show-link", do_show_link,
" show-link [-pP] [-o <field>,..] [-s [-i <interval>]] "
"[<link>]\n" },
@@ -300,12 +302,13 @@ static cmd_t cmds[] = {
{ "show-wifi", do_show_wifi,
" show-wifi [-p] [-o <field>,...] [<link>]\n" },
{ "set-linkprop", do_set_linkprop,
- " set-linkprop [-t] -p <prop>=<value>[,...] <name>" },
+ " set-linkprop [-t] [-z zonename] -p <prop>=<value>[,...] "
+ "<name>" },
{ "reset-linkprop", do_reset_linkprop,
- " reset-linkprop [-t] [-p <prop>,...] <name>" },
+ " reset-linkprop [-t] [-z zonename] [-p <prop>,...] <name>"},
{ "show-linkprop", do_show_linkprop,
- " show-linkprop [-cP] [-o <field>,...] [-p <prop>,...] "
- "<name>\n" },
+ " show-linkprop [-cP] [-o <field>,...] [-z zonename] "
+ "[-p <prop>,...] <name>\n" },
{ "show-ether", do_show_ether,
" show-ether [-px][-o <field>,...] <link>\n" },
{ "create-secobj", do_create_secobj,
@@ -347,10 +350,10 @@ static cmd_t cmds[] = {
"\t\t {vrrp -V <vrid> -A {inet | inet6}} [-v <vid> [-f]]\n"
"\t\t [-p <prop>=<value>[,...]] <vnic-link>" },
{ "delete-vnic", do_delete_vnic,
- " delete-vnic [-t] <vnic-link>" },
+ " delete-vnic [-t] [-z zonename] <vnic-link>" },
{ "show-vnic", do_show_vnic,
- " show-vnic [-pP] [-l <link>] [-s [-i <interval>]] "
- "[<link>]\n" },
+ " show-vnic [-pP] [-l <link>] [-z zonename] "
+ "[-s [-i <interval>]] [<link>]\n" },
{ "up-vnic", do_up_vnic, NULL },
{ "create-part", do_create_part,
" create-part [-t] [-f] -l <link> [-P <pkey>]\n"
@@ -959,6 +962,7 @@ typedef struct show_linkprop_state {
char ls_link[MAXLINKNAMELEN];
char *ls_line;
char **ls_propvals;
+ char *ls_zonename;
dladm_arg_list_t *ls_proplist;
boolean_t ls_parsable;
boolean_t ls_persist;
@@ -1011,21 +1015,24 @@ typedef struct vnic_fields_buf_s
char vnic_macaddr[18];
char vnic_macaddrtype[19];
char vnic_vid[6];
+ char vnic_zone[ZONENAME_MAX];
} vnic_fields_buf_t;
static const ofmt_field_t vnic_fields[] = {
{ "LINK", 13,
offsetof(vnic_fields_buf_t, vnic_link), print_default_cb},
-{ "OVER", 13,
+{ "OVER", 11,
offsetof(vnic_fields_buf_t, vnic_over), print_default_cb},
-{ "SPEED", 7,
+{ "SPEED", 6,
offsetof(vnic_fields_buf_t, vnic_speed), print_default_cb},
{ "MACADDRESS", 18,
offsetof(vnic_fields_buf_t, vnic_macaddr), print_default_cb},
-{ "MACADDRTYPE", 20,
+{ "MACADDRTYPE", 12,
offsetof(vnic_fields_buf_t, vnic_macaddrtype), print_default_cb},
-{ "VID", 7,
+{ "VID", 5,
offsetof(vnic_fields_buf_t, vnic_vid), print_default_cb},
+{ "ZONE", 20,
+ offsetof(vnic_fields_buf_t, vnic_zone), print_default_cb},
{ NULL, 0, 0, NULL}}
;
@@ -2495,13 +2502,17 @@ do_rename_link(int argc, char *argv[], const char *use)
char *link1, *link2;
char *altroot = NULL;
dladm_status_t status;
+ char *zonename = NULL;
opterr = 0;
- while ((option = getopt_long(argc, argv, ":R:", lopts, NULL)) != -1) {
+ while ((option = getopt_long(argc, argv, ":R:z:", lopts, NULL)) != -1) {
switch (option) {
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -2517,7 +2528,7 @@ do_rename_link(int argc, char *argv[], const char *use)
link1 = argv[optind++];
link2 = argv[optind];
- if ((status = dladm_rename_link(handle, link1, link2)) !=
+ if ((status = dladm_rename_link(handle, zonename, link1, link2)) !=
DLADM_STATUS_OK)
die_dlerr(status, "rename operation failed");
}
@@ -3407,11 +3418,12 @@ do_show_link(int argc, char *argv[], const char *use)
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPsSi:o:",
+ while ((option = getopt_long(argc, argv, ":pPsSi:o:z:",
show_lopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -3450,6 +3462,9 @@ do_show_link(int argc, char *argv[], const char *use)
if (!dladm_str2interval(optarg, &interval))
die("invalid interval value '%s'", optarg);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -3475,8 +3490,8 @@ do_show_link(int argc, char *argv[], const char *use)
if (strlcpy(linkname, argv[optind], MAXLINKNAMELEN) >=
MAXLINKNAMELEN)
die("link name too long");
- if ((status = dladm_name2info(handle, linkname, &linkid, &f,
- NULL, NULL)) != DLADM_STATUS_OK) {
+ if ((status = dladm_zname2info(handle, zonename, linkname,
+ &linkid, &f, NULL, NULL)) != DLADM_STATUS_OK) {
die_dlerr(status, "link %s is not valid", linkname);
}
@@ -4741,6 +4756,12 @@ do_create_vnic(int argc, char *argv[], const char *use)
if ((flags & DLADM_OPT_FORCE) != 0 && vid == 0)
die("-f option can only be used with -v");
+ /*
+ * If creating a transient VNIC for a zone, mark it in the kernel.
+ */
+ if (strstr(propstr, "zone=") != NULL && !(flags & DLADM_OPT_PERSIST))
+ flags |= DLADM_OPT_TRANSIENT;
+
if (mac_prefix_len != 0 && mac_addr_type != VNIC_MAC_ADDR_TYPE_RANDOM &&
mac_addr_type != VNIC_MAC_ADDR_TYPE_FIXED)
usage();
@@ -4832,9 +4853,10 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
datalink_id_t linkid;
char *altroot = NULL;
dladm_status_t status;
+ char *zonename = NULL;
opterr = 0;
- while ((option = getopt_long(argc, argv, ":R:t", lopts,
+ while ((option = getopt_long(argc, argv, ":R:tz:", lopts,
NULL)) != -1) {
switch (option) {
case 't':
@@ -4843,6 +4865,9 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
}
@@ -4855,8 +4880,8 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
if (altroot != NULL)
altroot_cmd(altroot, argc, argv);
- status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
- NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind], &linkid, NULL,
+ NULL, NULL);
if (status != DLADM_STATUS_OK)
die("invalid link name '%s'", argv[optind]);
@@ -4988,6 +5013,9 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
char vnic_name[MAXLINKNAMELEN];
char mstr[MAXMACADDRLEN * 3];
vnic_fields_buf_t vbuf;
+ uint_t valcnt = 1;
+ char zonename[DLADM_PROP_VAL_MAX + 1];
+ char *valptr[1];
if ((status = dladm_vnic_info(handle, linkid, vnic, state->vs_flags)) !=
DLADM_STATUS_OK)
@@ -5017,6 +5045,18 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
NULL, devname, sizeof (devname)) != DLADM_STATUS_OK)
(void) sprintf(devname, "?");
+
+ zonename[0] = '\0';
+ if (!is_etherstub) {
+ valptr[0] = zonename;
+ (void) dladm_get_linkprop(handle, linkid,
+ DLADM_PROP_VAL_CURRENT, "zone", (char **)valptr, &valcnt);
+ }
+
+ if (state->vs_zonename != NULL &&
+ strcmp(state->vs_zonename, zonename) != 0)
+ return (DLADM_STATUS_OK);
+
state->vs_found = B_TRUE;
if (state->vs_stats) {
/* print vnic statistics */
@@ -5092,6 +5132,13 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
(void) snprintf(vbuf.vnic_vid, sizeof (vbuf.vnic_vid),
"%d", vnic->va_vid);
+
+ if (zonename[0] != '\0')
+ (void) snprintf(vbuf.vnic_zone,
+ sizeof (vbuf.vnic_zone), "%s", zonename);
+ else
+ (void) strlcpy(vbuf.vnic_zone, "--",
+ sizeof (vbuf.vnic_zone));
}
ofmt_print(state->vs_ofmt, &vbuf);
@@ -5130,10 +5177,11 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPl:si:o:", lopts,
+ while ((option = getopt_long(argc, argv, ":pPl:si:o:z:", lopts,
NULL)) != -1) {
switch (option) {
case 'p':
@@ -5172,6 +5220,9 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
o_arg = B_TRUE;
fields_str = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
}
@@ -5182,8 +5233,8 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
/* get vnic ID (optional last argument) */
if (optind == (argc - 1)) {
- status = dladm_name2info(handle, argv[optind], &linkid, NULL,
- NULL, NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind],
+ &linkid, NULL, NULL, NULL);
if (status != DLADM_STATUS_OK) {
die_dlerr(status, "invalid vnic name '%s'",
argv[optind]);
@@ -5194,8 +5245,8 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
}
if (l_arg) {
- status = dladm_name2info(handle, state.vs_link, &dev_linkid,
- NULL, NULL, NULL);
+ status = dladm_zname2info(handle, zonename, state.vs_link,
+ &dev_linkid, NULL, NULL, NULL);
if (status != DLADM_STATUS_OK) {
die_dlerr(status, "invalid link name '%s'",
state.vs_link);
@@ -5207,6 +5258,7 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
state.vs_etherstub = etherstub;
state.vs_found = B_FALSE;
state.vs_flags = flags;
+ state.vs_zonename = zonename;
if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) {
if (etherstub)
@@ -6695,6 +6747,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(propstr, DLADM_STRSIZE);
opterr = 0;
@@ -6705,7 +6758,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
state.ls_header = B_TRUE;
state.ls_retstatus = DLADM_STATUS_OK;
- while ((option = getopt_long(argc, argv, ":p:cPo:",
+ while ((option = getopt_long(argc, argv, ":p:cPo:z:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -6724,6 +6777,9 @@ do_show_linkprop(int argc, char **argv, const char *use)
case 'o':
fields_str = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -6731,8 +6787,8 @@ do_show_linkprop(int argc, char **argv, const char *use)
}
if (optind == (argc - 1)) {
- if ((status = dladm_name2info(handle, argv[optind], &linkid,
- NULL, NULL, NULL)) != DLADM_STATUS_OK) {
+ if ((status = dladm_zname2info(handle, zonename, argv[optind],
+ &linkid, NULL, NULL, NULL)) != DLADM_STATUS_OK) {
die_dlerr(status, "link %s is not valid", argv[optind]);
}
} else if (optind != argc) {
@@ -6743,6 +6799,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
!= DLADM_STATUS_OK)
die("invalid link properties specified");
state.ls_proplist = proplist;
+ state.ls_zonename = zonename;
state.ls_status = DLADM_STATUS_OK;
if (state.ls_parsable)
@@ -6787,6 +6844,17 @@ show_linkprop_onelink(dladm_handle_t hdl, datalink_id_t linkid, void *arg)
return (DLADM_WALK_CONTINUE);
}
+ if (statep->ls_zonename != NULL) {
+ datalink_id_t tlinkid;
+
+ if (dladm_zname2info(hdl, statep->ls_zonename, statep->ls_link,
+ &tlinkid, NULL, NULL, NULL) != DLADM_STATUS_OK ||
+ linkid != tlinkid) {
+ statep->ls_status = DLADM_STATUS_NOTFOUND;
+ return (DLADM_WALK_CONTINUE);
+ }
+ }
+
if ((statep->ls_persist && !(flags & DLADM_OPT_PERSIST)) ||
(!statep->ls_persist && !(flags & DLADM_OPT_ACTIVE))) {
statep->ls_status = DLADM_STATUS_BADARG;
@@ -6869,11 +6937,12 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
dladm_status_t status = DLADM_STATUS_OK;
char propstr[DLADM_STRSIZE];
dladm_arg_list_t *proplist = NULL;
+ char *zonename = NULL;
opterr = 0;
bzero(propstr, DLADM_STRSIZE);
- while ((option = getopt_long(argc, argv, ":p:R:t",
+ while ((option = getopt_long(argc, argv, ":p:R:tz:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -6888,6 +6957,9 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
@@ -6910,8 +6982,8 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
altroot_cmd(altroot, argc, argv);
}
- status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
- NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind], &linkid,
+ NULL, NULL, NULL);
if (status != DLADM_STATUS_OK)
die_dlerr(status, "link %s is not valid", argv[optind]);
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_db.c b/usr/src/cmd/dlmgmtd/dlmgmt_db.c
index 99307dbc03..a03ce65d6b 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_db.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_db.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent Inc. All rights reserved.
*/
#include <assert.h>
@@ -43,6 +44,8 @@
#include <libcontract.h>
#include <libcontract_priv.h>
#include <sys/contract/process.h>
+#include <sys/vnic.h>
+#include <zone.h>
#include "dlmgmt_impl.h"
typedef enum dlmgmt_db_op {
@@ -552,6 +555,10 @@ dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
linkp->ll_zoneid, flags, &err)) == NULL)
return (err);
+ /* If transient op and onloan, use the global zone cache file. */
+ if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
+ req->ls_zoneid = GLOBAL_ZONEID;
+
/*
* If the return error is EINPROGRESS, this request is handled
* asynchronously; return success.
@@ -714,11 +721,13 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
char attr_name[MAXLINKATTRLEN];
size_t attr_buf_len = 0;
void *attr_buf = NULL;
+ boolean_t rename;
curr = buf;
len = strlen(buf);
attr_name[0] = '\0';
for (i = 0; i < len; i++) {
+ rename = B_FALSE;
char c = buf[i];
boolean_t match = (c == '=' ||
(c == ',' && !found_type) || c == ';');
@@ -768,6 +777,21 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
goto parse_fail;
linkp->ll_media =
(uint32_t)*(int64_t *)attr_buf;
+ } else if (strcmp(attr_name, "zone") == 0) {
+ if (read_str(curr, &attr_buf) == 0)
+ goto parse_fail;
+ linkp->ll_zoneid = getzoneidbyname(attr_buf);
+ if (linkp->ll_zoneid == -1) {
+ if (errno == EFAULT)
+ abort();
+ /*
+ * If we can't find the zone, assign the
+ * link to the GZ and mark it for being
+ * renamed.
+ */
+ linkp->ll_zoneid = 0;
+ rename = B_TRUE;
+ }
} else {
attr_buf_len = translators[type].read_func(curr,
&attr_buf);
@@ -811,6 +835,16 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
(void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
}
+
+ /*
+ * The zone that this link belongs to has died, we are
+ * reparenting it to the GZ and renaming it to avoid name
+ * collisions.
+ */
+ if (rename == B_TRUE) {
+ (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
+ "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
+ }
curr = buf + i + 1;
}
@@ -1222,12 +1256,19 @@ generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
if (!persist) {
+ char zname[ZONENAME_MAX];
/*
- * We store the linkid in the active database so that dlmgmtd
- * can recover in the event that it is restarted.
+ * We store the linkid and the zone name in the active database
+ * so that dlmgmtd can recover in the event that it is
+ * restarted.
*/
u64 = linkp->ll_linkid;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
+
+ if (getzonenamebyid(linkp->ll_zoneid, zname,
+ sizeof (zname)) != -1) {
+ ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
+ }
}
u64 = linkp->ll_class;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
@@ -1382,13 +1423,49 @@ dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
}
/*
+ * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
+ *
+ * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
+ * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
+ * Because of the upcall architecture of dlmgmtd this can lead to deadlock
+ * with the following scenario:
+ * a) the thread preparing to fork will have acquired the malloc locks
+ * then attempt to suspend every thread in preparation to fork.
+ * b) all of the upcalls will be blocked in door_ucred() trying to malloc()
+ * and get the credentials of their caller.
+ * c) we can't suspend the in-kernel thread making the upcall.
+ *
+ * Thus, we cannot serve door requests because we're blocked in malloc()
+ * which fork() owns, but fork() is in turn blocked on the in-kernel thread
+ * making the door upcall. This is a fundamental architectural problem with
+ * any server handling upcalls and also trying to fork().
+ *
+ * To minimize the chance of this deadlock occuring, we check ahead of time to
+ * see if the file we want to read actually exists in the zone (which it almost
+ * never does), so we don't need fork in that case (i.e. rarely to never).
+ */
+static boolean_t
+zone_file_exists(char *zoneroot, char *filename)
+{
+ struct stat sb;
+ char fname[MAXPATHLEN];
+
+ (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
+
+ if (stat(fname, &sb) == -1)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
* Initialize the datalink <link name, linkid> mapping and the link's
* attributes list based on the configuration file /etc/dladm/datalink.conf
* and the active configuration cache file
* /etc/svc/volatile/dladm/datalink-management:default.cache.
*/
int
-dlmgmt_db_init(zoneid_t zoneid)
+dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
{
dlmgmt_db_req_t *req;
int err;
@@ -1398,22 +1475,36 @@ dlmgmt_db_init(zoneid_t zoneid)
DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
return (err);
- if ((err = dlmgmt_process_db_req(req)) != 0) {
- /*
- * If we get back ENOENT, that means that the active
- * configuration file doesn't exist yet, and is not an error.
- * We'll create it down below after we've loaded the
- * persistent configuration.
- */
- if (err != ENOENT)
- goto done;
+ /* Handle running in a non-native branded zone (i.e. has /native) */
+ if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
+ char tdir[MAXPATHLEN];
+
+ (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
+ (void) strlcpy(cachefile, tdir, sizeof (cachefile));
+ }
+
+ if (zone_file_exists(zoneroot, cachefile)) {
+ if ((err = dlmgmt_process_db_req(req)) != 0) {
+ /*
+ * If we get back ENOENT, that means that the active
+ * configuration file doesn't exist yet, and is not an
+ * error. We'll create it down below after we've
+ * loaded the persistent configuration.
+ */
+ if (err != ENOENT)
+ goto done;
+ boot = B_TRUE;
+ }
+ } else {
boot = B_TRUE;
}
- req->ls_flags = DLMGMT_PERSIST;
- err = dlmgmt_process_db_req(req);
- if (err != 0 && err != ENOENT)
- goto done;
+ if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
+ req->ls_flags = DLMGMT_PERSIST;
+ err = dlmgmt_process_db_req(req);
+ if (err != 0 && err != ENOENT)
+ goto done;
+ }
err = 0;
if (rewrite_needed) {
/*
@@ -1442,6 +1533,11 @@ done:
/*
* Remove all links in the given zoneid.
+ *
+ * We do this work in two different passes. In the first pass, we remove any
+ * entry that hasn't been loaned and mark every entry that has been loaned as
+ * something that is going to be tombstomed. In the second pass, we drop the
+ * table lock for every entry and remove the tombstombed entry for our zone.
*/
void
dlmgmt_db_fini(zoneid_t zoneid)
@@ -1451,9 +1547,66 @@ dlmgmt_db_fini(zoneid_t zoneid)
while (linkp != NULL) {
next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
if (linkp->ll_zoneid == zoneid) {
- (void) dlmgmt_destroy_common(linkp,
- DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ boolean_t onloan = linkp->ll_onloan;
+
+ /*
+ * Cleanup any VNICs that were loaned to the zone
+ * before the zone goes away and we can no longer
+ * refer to the VNIC by the name/zoneid.
+ */
+ if (onloan) {
+ (void) dlmgmt_delete_db_entry(linkp,
+ DLMGMT_ACTIVE);
+ linkp->ll_tomb = B_TRUE;
+ } else {
+ (void) dlmgmt_destroy_common(linkp,
+ DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ }
+
}
linkp = next_linkp;
}
+
+again:
+ linkp = avl_first(&dlmgmt_name_avl);
+ while (linkp != NULL) {
+ vnic_ioc_delete_t ioc;
+
+ next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
+
+ if (linkp->ll_zoneid != zoneid) {
+ linkp = next_linkp;
+ continue;
+ }
+ ioc.vd_vnic_id = linkp->ll_linkid;
+ if (linkp->ll_tomb != B_TRUE)
+ abort();
+
+ /*
+ * We have to drop the table lock while going up into the
+ * kernel. If we hold the table lock while deleting a vnic, we
+ * may get blocked on the mac perimeter and the holder of it may
+ * want something from dlmgmtd.
+ */
+ dlmgmt_table_unlock();
+
+ if (ioctl(dladm_dld_fd(dld_handle),
+ VNIC_IOC_DELETE, &ioc) < 0)
+ dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
+ "delete VNIC ioctl failed %d %d",
+ ioc.vd_vnic_id, errno);
+
+ /*
+ * Even though we've dropped the lock, we know that nothing else
+ * could have removed us. Therefore, it should be safe to go
+ * through and delete ourselves, but do nothing else. We'll have
+ * to restart iteration from the beginning. This can be painful.
+ */
+ dlmgmt_table_lock(B_TRUE);
+
+ (void) dlmgmt_destroy_common(linkp,
+ DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ goto again;
+ }
+
}
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_door.c b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
index 11e4329669..137c2a6fb3 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_door.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -58,6 +59,10 @@
#include <libsysevent.h>
#include <libdlmgmt.h>
#include <librcm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "dlmgmt_impl.h"
typedef void dlmgmt_door_handler_t(void *, void *, size_t *, zoneid_t,
@@ -379,6 +384,11 @@ dlmgmt_upcall_destroy(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EINPROGRESS;
+ goto done;
+ }
+
if (((linkp->ll_flags & flags) & DLMGMT_ACTIVE) != 0) {
if ((err = dlmgmt_delete_db_entry(linkp, DLMGMT_ACTIVE)) != 0)
goto done;
@@ -439,6 +449,10 @@ dlmgmt_getlinkid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
dlmgmt_link_t *linkp;
int err = 0;
+ /* Enable the global zone to lookup links it has given away. */
+ if (zoneid == GLOBAL_ZONEID && getlinkid->ld_zoneid != -1)
+ zoneid = getlinkid->ld_zoneid;
+
/*
* Hold the reader lock to access the link
*/
@@ -648,6 +662,12 @@ dlmgmt_remapid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
+
if (link_by_name(remapid->ld_link, linkp->ll_zoneid) != NULL) {
err = EEXIST;
goto done;
@@ -709,6 +729,11 @@ dlmgmt_upid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
if (linkp->ll_flags & DLMGMT_ACTIVE) {
err = EINVAL;
goto done;
@@ -1216,6 +1241,11 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
/* We can only assign an active link to a zone. */
if (!(linkp->ll_flags & DLMGMT_ACTIVE)) {
err = EINVAL;
@@ -1245,7 +1275,19 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
"zone %d: %s", linkid, oldzoneid, strerror(err));
goto done;
}
- avl_remove(&dlmgmt_loan_avl, linkp);
+
+ if (newzoneid == GLOBAL_ZONEID && linkp->ll_onloan) {
+ /*
+ * We can only reassign a loaned VNIC back to the
+ * global zone when the zone is shutting down, since
+ * otherwise the VNIC is in use by the zone and will be
+ * busy. Leave the VNIC assigned to the zone so we can
+ * still see it and delete it when dlmgmt_zonehalt()
+ * runs.
+ */
+ goto done;
+ }
+
linkp->ll_onloan = B_FALSE;
}
if (newzoneid != GLOBAL_ZONEID) {
@@ -1256,7 +1298,6 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
(void) zone_add_datalink(oldzoneid, linkid);
goto done;
}
- avl_add(&dlmgmt_loan_avl, linkp);
linkp->ll_onloan = B_TRUE;
}
@@ -1309,6 +1350,10 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
int err = 0;
dlmgmt_door_zonehalt_t *zonehalt = argp;
dlmgmt_zonehalt_retval_t *retvalp = retp;
+ static char my_pid[10];
+
+ if (my_pid[0] == NULL)
+ (void) snprintf(my_pid, sizeof (my_pid), "%d\n", getpid());
if ((err = dlmgmt_checkprivs(0, cred)) == 0) {
if (zoneid != GLOBAL_ZONEID) {
@@ -1316,9 +1361,26 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
} else if (zonehalt->ld_zoneid == GLOBAL_ZONEID) {
err = EINVAL;
} else {
+ /*
+ * dls and mac don't honor the locking rules defined in
+ * mac. In order to try and make that case less likely
+ * to happen, we try to serialize some of the zone
+ * activity here between dlmgmtd and the brands on
+ * /etc/dladm/zone.lck
+ */
+ int fd;
+
+ while ((fd = open(ZONE_LOCK, O_WRONLY |
+ O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
+ (void) sleep(1);
+ (void) write(fd, my_pid, sizeof (my_pid));
+ (void) close(fd);
+
dlmgmt_table_lock(B_TRUE);
dlmgmt_db_fini(zonehalt->ld_zoneid);
dlmgmt_table_unlock();
+
+ (void) unlink(ZONE_LOCK);
}
}
retvalp->lr_err = err;
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
index cdfd0d8a4d..dde27ef66e 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -67,6 +68,7 @@ typedef struct dlmgmt_link_s {
avl_node_t ll_loan_node;
uint32_t ll_flags;
uint32_t ll_gen; /* generation number */
+ boolean_t ll_tomb; /* tombstombed */
} dlmgmt_link_t;
/*
@@ -84,6 +86,8 @@ typedef struct dlmgmt_dlconf_s {
avl_node_t ld_node;
} dlmgmt_dlconf_t;
+#define ZONE_LOCK "/etc/dladm/zone.lck"
+
extern boolean_t debug;
extern const char *progname;
extern char cachefile[];
@@ -138,7 +142,7 @@ void dlmgmt_handler(void *, char *, size_t, door_desc_t *, uint_t);
void dlmgmt_log(int, const char *, ...);
int dlmgmt_write_db_entry(const char *, dlmgmt_link_t *, uint32_t);
int dlmgmt_delete_db_entry(dlmgmt_link_t *, uint32_t);
-int dlmgmt_db_init(zoneid_t);
+int dlmgmt_db_init(zoneid_t, char *);
void dlmgmt_db_fini(zoneid_t);
#ifdef __cplusplus
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_main.c b/usr/src/cmd/dlmgmtd/dlmgmt_main.c
index c02610bb5f..d8397cc0e6 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_main.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_main.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -125,15 +126,24 @@ dlmgmt_door_fini(void)
dlmgmt_door_fd = -1;
}
-int
+static int
dlmgmt_door_attach(zoneid_t zoneid, char *rootdir)
{
int fd;
int err = 0;
char doorpath[MAXPATHLEN];
+ struct stat statbuf;
- (void) snprintf(doorpath, sizeof (doorpath), "%s%s", rootdir,
- DLMGMT_DOOR);
+ /* Handle running in a non-native branded zone (i.e. has /native) */
+ (void) snprintf(doorpath, sizeof (doorpath), "%s/native%s",
+ rootdir, DLMGMT_TMPFS_DIR);
+ if (stat(doorpath, &statbuf) == 0) {
+ (void) snprintf(doorpath, sizeof (doorpath), "%s/native%s",
+ rootdir, DLMGMT_DOOR);
+ } else {
+ (void) snprintf(doorpath, sizeof (doorpath), "%s%s",
+ rootdir, DLMGMT_DOOR);
+ }
/*
* Create the door file for dlmgmtd.
@@ -192,8 +202,16 @@ dlmgmt_zone_init(zoneid_t zoneid)
(void) snprintf(tmpfsdir, sizeof (tmpfsdir), "%s%s", rootdir,
DLMGMT_TMPFS_DIR);
if (stat(tmpfsdir, &statbuf) < 0) {
- if (mkdir(tmpfsdir, (mode_t)0755) < 0)
- return (errno);
+ if (mkdir(tmpfsdir, (mode_t)0755) < 0) {
+ /*
+ * Handle running in a non-native branded zone
+ * (i.e. has /native)
+ */
+ (void) snprintf(tmpfsdir, sizeof (tmpfsdir),
+ "%s/native%s", rootdir, DLMGMT_TMPFS_DIR);
+ if (mkdir(tmpfsdir, (mode_t)0755) < 0)
+ return (errno);
+ }
} else if ((statbuf.st_mode & S_IFMT) != S_IFDIR) {
return (ENOTDIR);
}
@@ -203,7 +221,7 @@ dlmgmt_zone_init(zoneid_t zoneid)
return (EPERM);
}
- if ((err = dlmgmt_db_init(zoneid)) != 0)
+ if ((err = dlmgmt_db_init(zoneid, rootdir)) != 0)
return (err);
return (dlmgmt_door_attach(zoneid, rootdir));
}
@@ -214,7 +232,7 @@ dlmgmt_zone_init(zoneid_t zoneid)
static int
dlmgmt_allzones_init(void)
{
- int err, i;
+ int i;
zoneid_t *zids = NULL;
uint_t nzids, nzids_saved;
@@ -235,11 +253,37 @@ again:
}
for (i = 0; i < nzids; i++) {
- if ((err = dlmgmt_zone_init(zids[i])) != 0)
- break;
+ int res;
+ zone_status_t status;
+
+ /*
+ * Skip over zones that have gone away or are going down
+ * since we got the list. Process all zones in the list,
+ * logging errors for any that failed.
+ */
+ if (zone_getattr(zids[i], ZONE_ATTR_STATUS, &status,
+ sizeof (status)) < 0)
+ continue;
+ switch (status) {
+ case ZONE_IS_SHUTTING_DOWN:
+ case ZONE_IS_EMPTY:
+ case ZONE_IS_DOWN:
+ case ZONE_IS_DYING:
+ case ZONE_IS_DEAD:
+ /* FALLTHRU */
+ continue;
+ default:
+ break;
+ }
+ if ((res = dlmgmt_zone_init(zids[i])) != 0) {
+ (void) fprintf(stderr, "zone (%ld) init error %s",
+ zids[i], strerror(res));
+ dlmgmt_log(LOG_ERR, "zone (%d) init error %s",
+ zids[i], strerror(res));
+ }
}
free(zids);
- return (err);
+ return (0);
}
static int
@@ -262,6 +306,8 @@ dlmgmt_init(void)
return (err);
}
+ (void) unlink(ZONE_LOCK);
+
/*
* First derive the name of the cache file from the FMRI name. This
* cache name is used to keep active datalink configuration.
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_util.c b/usr/src/cmd/dlmgmtd/dlmgmt_util.c
index afcfbed37b..7493ee3577 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_util.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_util.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -45,13 +46,10 @@
/*
* There are three datalink AVL tables. The dlmgmt_name_avl tree contains all
* datalinks and is keyed by zoneid and link name. The dlmgmt_id_avl also
- * contains all datalinks, and it is keyed by link ID. The dlmgmt_loan_avl is
- * keyed by link name, and contains the set of global-zone links that are
- * currently on loan to non-global zones.
+ * contains all datalinks, and it is keyed by link ID.
*/
avl_tree_t dlmgmt_name_avl;
avl_tree_t dlmgmt_id_avl;
-avl_tree_t dlmgmt_loan_avl;
avl_tree_t dlmgmt_dlconf_avl;
@@ -162,8 +160,6 @@ dlmgmt_linktable_init(void)
offsetof(dlmgmt_link_t, ll_name_node));
avl_create(&dlmgmt_id_avl, cmp_link_by_id, sizeof (dlmgmt_link_t),
offsetof(dlmgmt_link_t, ll_id_node));
- avl_create(&dlmgmt_loan_avl, cmp_link_by_name, sizeof (dlmgmt_link_t),
- offsetof(dlmgmt_link_t, ll_loan_node));
avl_create(&dlmgmt_dlconf_avl, cmp_dlconf_by_id,
sizeof (dlmgmt_dlconf_t), offsetof(dlmgmt_dlconf_t, ld_node));
dlmgmt_nextlinkid = 1;
@@ -181,7 +177,6 @@ dlmgmt_linktable_fini(void)
avl_destroy(&dlmgmt_dlconf_avl);
avl_destroy(&dlmgmt_name_avl);
- avl_destroy(&dlmgmt_loan_avl);
avl_destroy(&dlmgmt_id_avl);
}
@@ -385,7 +380,6 @@ link_activate(dlmgmt_link_t *linkp)
linkp->ll_zoneid = zoneid;
avl_add(&dlmgmt_name_avl, linkp);
- avl_add(&dlmgmt_loan_avl, linkp);
linkp->ll_onloan = B_TRUE;
}
} else if (linkp->ll_zoneid != GLOBAL_ZONEID) {
@@ -430,10 +424,6 @@ link_by_name(const char *name, zoneid_t zoneid)
(void) strlcpy(link.ll_link, name, MAXLINKNAMELEN);
link.ll_zoneid = zoneid;
linkp = avl_find(&dlmgmt_name_avl, &link, NULL);
- if (linkp == NULL && zoneid == GLOBAL_ZONEID) {
- /* The link could be on loan to a non-global zone? */
- linkp = avl_find(&dlmgmt_loan_avl, &link, NULL);
- }
return (linkp);
}
@@ -461,6 +451,7 @@ dlmgmt_create_common(const char *name, datalink_class_t class, uint32_t media,
linkp->ll_linkid = dlmgmt_nextlinkid;
linkp->ll_zoneid = zoneid;
linkp->ll_gen = 0;
+ linkp->ll_tomb = B_FALSE;
if (avl_find(&dlmgmt_name_avl, linkp, &name_where) != NULL ||
avl_find(&dlmgmt_id_avl, linkp, &id_where) != NULL) {
@@ -511,8 +502,6 @@ dlmgmt_destroy_common(dlmgmt_link_t *linkp, uint32_t flags)
if ((flags & DLMGMT_ACTIVE) && linkp->ll_zoneid != GLOBAL_ZONEID) {
(void) zone_remove_datalink(linkp->ll_zoneid, linkp->ll_linkid);
- if (linkp->ll_onloan)
- avl_remove(&dlmgmt_loan_avl, linkp);
}
if (linkp->ll_flags == 0) {
diff --git a/usr/src/cmd/dlmgmtd/svc-dlmgmtd b/usr/src/cmd/dlmgmtd/svc-dlmgmtd
index 7559207535..a75e71f9b3 100644
--- a/usr/src/cmd/dlmgmtd/svc-dlmgmtd
+++ b/usr/src/cmd/dlmgmtd/svc-dlmgmtd
@@ -23,17 +23,16 @@
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
-# ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
-# The real daemon is not started in a non-global zone. But we need to
-# create a dummy background process to preserve contract lifetime.
+# The real daemon is not started in a non-global zone. Exit to leave
+# an empty contract.
if smf_is_nonglobalzone; then
- (while true ; do sleep 3600 ; done) &
- exit $SMF_EXIT_OK
+ exit $SMF_EXIT_NODAEMON
fi
# Start the dlmgmtd daemon.
diff --git a/usr/src/cmd/dlstat/dlstat.c b/usr/src/cmd/dlstat/dlstat.c
index a931ba82ff..2615fdbb12 100644
--- a/usr/src/cmd/dlstat/dlstat.c
+++ b/usr/src/cmd/dlstat/dlstat.c
@@ -62,7 +62,7 @@ typedef struct link_chain_s {
struct link_chain_s *lc_next;
} link_chain_t;
-typedef void * (*stats2str_t)(const char *, void *,
+typedef void * (*stats2str_t)(const char *, const char *, void *,
char, boolean_t);
typedef struct show_state {
@@ -141,6 +141,7 @@ typedef struct total_fields_buf_s {
char t_rbytes[MAXSTATLEN];
char t_opackets[MAXSTATLEN];
char t_obytes[MAXSTATLEN];
+ char t_zone[ZONENAME_MAX];
} total_fields_buf_t;
static ofmt_field_t total_s_fields[] = {
@@ -154,6 +155,8 @@ static ofmt_field_t total_s_fields[] = {
offsetof(total_fields_buf_t, t_opackets), print_default_cb},
{ "OBYTES", 8,
offsetof(total_fields_buf_t, t_obytes), print_default_cb},
+{ "ZONE", 20,
+ offsetof(total_fields_buf_t, t_zone), print_default_cb},
{ NULL, 0, 0, NULL}};
/*
@@ -957,8 +960,8 @@ cleanup_removed_links(show_state_t *state)
}
void *
-print_total_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_total_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
total_stat_entry_t *sentry = statentry;
total_stat_t *link_stats = &sentry->tse_stats;
@@ -970,6 +973,7 @@ print_total_stats(const char *linkname, void *statentry, char unit,
(void) snprintf(buf->t_linkname, sizeof (buf->t_linkname), "%s",
linkname);
+ (void) snprintf(buf->t_zone, sizeof (buf->t_zone), "%s", zonename);
map_to_units(buf->t_ipackets, sizeof (buf->t_ipackets),
link_stats->ts_ipackets, unit, parsable);
@@ -988,8 +992,8 @@ done:
}
void *
-print_rx_generic_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_generic_ring_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1022,8 +1026,8 @@ done:
}
void *
-print_tx_generic_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_generic_ring_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1056,8 +1060,8 @@ done:
}
void *
-print_rx_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_ring_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1090,8 +1094,8 @@ done:
}
void *
-print_tx_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_ring_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1124,8 +1128,8 @@ done:
}
void *
-print_rx_generic_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_generic_lane_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
rx_lane_stat_entry_t *sentry = statentry;
rx_lane_stat_t *link_stats = &sentry->rle_stats;
@@ -1172,8 +1176,8 @@ done:
}
void *
-print_tx_generic_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_generic_lane_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
tx_lane_stat_entry_t *sentry = statentry;
tx_lane_stat_t *link_stats = &sentry->tle_stats;
@@ -1217,8 +1221,8 @@ done:
}
void *
-print_rx_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_lane_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
rx_lane_stat_entry_t *sentry = statentry;
rx_lane_stat_t *link_stats = &sentry->rle_stats;
@@ -1283,9 +1287,8 @@ done:
}
void *
-print_tx_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
-{
+print_tx_lane_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable) {
tx_lane_stat_entry_t *sentry = statentry;
tx_lane_stat_t *link_stats = &sentry->tle_stats;
tx_lane_fields_buf_t *buf = NULL;
@@ -1338,8 +1341,8 @@ done:
}
void *
-print_fanout_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_fanout_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
fanout_stat_entry_t *sentry = statentry;
fanout_stat_t *link_stats = &sentry->fe_stats;
@@ -1392,8 +1395,8 @@ done:
}
void *
-print_aggr_port_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_aggr_port_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
aggr_port_stat_entry_t *sentry = statentry;
aggr_port_stat_t *link_stats = &sentry->ape_stats;
@@ -1470,7 +1473,8 @@ done:
void
walk_dlstat_stats(show_state_t *state, const char *linkname,
- dladm_stat_type_t stattype, dladm_stat_chain_t *diff_stat)
+ const char *zonename, dladm_stat_type_t stattype,
+ dladm_stat_chain_t *diff_stat)
{
dladm_stat_chain_t *curr;
@@ -1480,7 +1484,8 @@ walk_dlstat_stats(show_state_t *state, const char *linkname,
/* Format the raw numbers for printing */
fields_buf = state->ls_stats2str[stattype](linkname,
- curr->dc_statentry, state->ls_unit, state->ls_parsable);
+ zonename, curr->dc_statentry, state->ls_unit,
+ state->ls_parsable);
/* Print the stats */
if (fields_buf != NULL)
ofmt_print(state->ls_ofmt, fields_buf);
@@ -1495,12 +1500,20 @@ show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg)
int i;
dladm_stat_chain_t *diff_stat;
char linkname[DLPI_LINKNAME_MAX];
+ char zonename[DLADM_PROP_VAL_MAX + 1];
+ char *valptr[1];
+ uint_t valcnt = 1;
if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname,
DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) {
goto done;
}
+ valptr[0] = zonename;
+ if (dladm_get_linkprop(handle, linkid, DLADM_PROP_VAL_CURRENT, "zone",
+ (char **)valptr, &valcnt) != 0)
+ zonename[0] = '\0';
+
for (i = 0; i < DLADM_STAT_NUM_STATS; i++) {
if (state->ls_stattype[i]) {
/*
@@ -1508,7 +1521,8 @@ show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg)
* Stats are returned as chain of raw numbers
*/
diff_stat = query_link_stats(handle, linkid, arg, i);
- walk_dlstat_stats(state, linkname, i, diff_stat);
+ walk_dlstat_stats(state, linkname, zonename, i,
+ diff_stat);
dladm_link_stat_free(diff_stat);
}
}
@@ -1628,7 +1642,7 @@ do_show(int argc, char *argv[], const char *use)
char *o_fields_str = NULL;
char *total_stat_fields =
- "link,ipkts,rbytes,opkts,obytes";
+ "link,ipkts,rbytes,opkts,obytes,zone";
char *rx_total_stat_fields =
"link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50";
char *tx_total_stat_fields =
diff --git a/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile b/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
index 99a571bee8..7aa028b4c1 100644
--- a/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
+++ b/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
@@ -23,7 +23,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
PROG = jdtrace
SRCS = jdtrace.c
@@ -77,7 +76,7 @@ $(PROG): $(SRCS)
JFLAGS= -g -cp $(CLASSPATH) -d $(CLASSDIR) -deprecation
JFLAGS += -target 1.5
-JFLAGS += -Xlint
+JFLAGS += -Xlint -Xlint:-path
COMPILE.java=$(JAVAC) $(JFLAGS)
JAVASRC= JDTrace.java Getopt.java
diff --git a/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl b/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
index d6f1c8c277..e7f9189822 100644
--- a/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
+++ b/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
@@ -566,7 +566,7 @@ $defdir = -d $dt_tst ? $dt_tst : '.';
$bindir = -d $dt_bin ? $dt_bin : '.';
if (!$opt_F) {
- my @dependencies = ("gcc", "make", "java", "perl");
+ my @dependencies = ("gcc", "cc", "make", "java", "perl", "printenv");
for my $dep (@dependencies) {
if (!inpath($dep)) {
diff --git a/usr/src/cmd/dtrace/test/tst/common/Makefile b/usr/src/cmd/dtrace/test/tst/common/Makefile
index 49d810a720..efa32d9179 100644
--- a/usr/src/cmd/dtrace/test/tst/common/Makefile
+++ b/usr/src/cmd/dtrace/test/tst/common/Makefile
@@ -150,6 +150,14 @@ usdt/tst.forker.o: usdt/forker.h
usdt/forker.h: usdt/forker.d
$(DTRACE) -h -s usdt/forker.d -o usdt/forker.h
+ustack/tst.unpriv.exe: ustack/tst.unpriv.o ustack/unpriv_helper.o
+ $(LINK.c) -o ustack/tst.unpriv.exe \
+ ustack/tst.unpriv.o ustack/unpriv_helper.o $(LDLIBS)
+ $(POST_PROCESS) ; $(STRIP_STABS)
+
+ustack/unpriv_helper.o: ustack/unpriv_helper.d
+ $(COMPILE.d) -o ustack/unpriv_helper.o -s ustack/unpriv_helper.d
+
usdt/tst.lazyprobe.exe: usdt/tst.lazyprobe.o usdt/lazyprobe.o
$(LINK.c) -o usdt/tst.lazyprobe.exe \
usdt/tst.lazyprobe.o usdt/lazyprobe.o $(LDLIBS)
diff --git a/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile b/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
index fe213dd22a..26bf656edc 100644
--- a/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
+++ b/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
@@ -23,7 +23,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
include $(SRC)/Makefile.master
@@ -62,7 +61,7 @@ install: all $(PROTO_TEST_JAR)
JFLAGS= -g -cp $(CLASSPATH) -d $(CLASSDIR) -deprecation
JFLAGS += -target 1.5
-JFLAGS += -Xlint
+JFLAGS += -Xlint -Xlint:-path
COMPILE.java=$(JAVAC) $(JFLAGS)
$(TEST_JAR): $(SRCDIR)/*.java
diff --git a/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d b/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
index e2882b3f8e..a9138d2f54 100644
--- a/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
+++ b/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
@@ -19,10 +19,6 @@
* CDDL HEADER END
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
-
#pragma D option quiet
BEGIN
diff --git a/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh b/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh
new file mode 100644
index 0000000000..c5921b8d28
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh
@@ -0,0 +1,69 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+dtrace=$1
+DIR=/var/tmp/dtest.$$
+
+mkdir $DIR
+cd $DIR
+
+expected=`od -t u8 -N 8 /dev/urandom | head -1 | cut -d ' ' -f2`
+
+$dtrace -x bufpolicy=ring -x bufsize=10k -qs /dev/stdin > /dev/null 2>&1 <<EOF &
+ tick-1ms
+ /i < 10000/
+ {
+ printf("%d: expected is $expected!\n", i++);
+ }
+
+ tick-1ms
+ /i >= 10000/
+ {
+ exit(0);
+ }
+EOF
+
+background=$!
+
+#
+# Give some time for the enabling to get there...
+#
+sleep 2
+
+echo "::walk dtrace_state | ::dtrace" | mdb -k | tee test.out
+grep "expected is $expected" test.out 2> /dev/null 1>&2
+status=$?
+
+kill $background
+
+cd /
+/usr/bin/rm -rf $DIR
+
+exit $status
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c
new file mode 100644
index 0000000000..43ba244444
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c
@@ -0,0 +1,7 @@
+int
+main(int argc, char *argv[])
+{
+ for (;;)
+ ;
+ return (0);
+}
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh
new file mode 100644
index 0000000000..26c430bff7
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+ppriv -s A=basic,dtrace_user,dtrace_proc $$
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+file=out.$$
+dtrace=$1
+
+rm -f $file
+
+dir=`/bin/dirname $tst`
+
+$dtrace -o $file -c $dir/tst.unpriv.exe -ws /dev/stdin <<EOF
+ profile-1234hz
+ /pid == \$target/
+ {
+ @[ustack(20, 8192)] = count();
+ }
+
+ tick-1s
+ {
+ secs++;
+ }
+
+ tick-1s
+ /secs > 10/
+ {
+ trace("test timed out");
+ exit(1);
+ }
+
+ profile-1234hz
+ /pid == \$target && secs > 5/
+ {
+ raise(SIGINT);
+ exit(0);
+ }
+EOF
+
+status=$?
+exit $status
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d b/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d
new file mode 100644
index 0000000000..eb7b0e9e9d
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d
@@ -0,0 +1,4 @@
+dtrace:helper:ustack:
+{
+ this->otherstr = "doogle";
+}
diff --git a/usr/src/cmd/flowadm/flowadm.c b/usr/src/cmd/flowadm/flowadm.c
index 374fa1675c..34e597dc78 100644
--- a/usr/src/cmd/flowadm/flowadm.c
+++ b/usr/src/cmd/flowadm/flowadm.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -233,9 +234,9 @@ usage(void)
(void) fprintf(stderr, gettext("usage: flowadm <subcommand>"
" <args>...\n"
" add-flow [-t] -l <link> -a <attr>=<value>[,...]\n"
- "\t\t [-p <prop>=<value>,...] <flow>\n"
- " remove-flow [-t] {-l <link> | <flow>}\n"
- " show-flow [-p] [-l <link>] "
+ "\t\t [-p <prop>=<value>,...] [-z zonename] <flow>\n"
+ " remove-flow [-t] [-z zonename] {-l <link> | <flow>}\n"
+ " show-flow [-p] [-l <link>] [-z zonename] "
"[<flow>]\n\n"
" set-flowprop [-t] -p <prop>=<value>[,...] <flow>\n"
" reset-flowprop [-t] [-p <prop>,...] <flow>\n"
@@ -333,11 +334,12 @@ do_add_flow(int argc, char *argv[])
dladm_arg_list_t *proplist = NULL;
dladm_arg_list_t *attrlist = NULL;
dladm_status_t status;
+ char *zonename = NULL;
bzero(propstr, DLADM_STRSIZE);
bzero(attrstr, DLADM_STRSIZE);
- while ((option = getopt_long(argc, argv, "tR:l:a:p:",
+ while ((option = getopt_long(argc, argv, "tR:l:a:p:z:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 't':
@@ -351,9 +353,6 @@ do_add_flow(int argc, char *argv[])
MAXLINKNAMELEN) >= MAXLINKNAMELEN) {
die("link name too long");
}
- if (dladm_name2info(handle, devname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", devname);
l_arg = B_TRUE;
break;
case 'a':
@@ -368,6 +367,9 @@ do_add_flow(int argc, char *argv[])
DLADM_STRSIZE)
die("property list too long '%s'", propstr);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
}
@@ -376,6 +378,10 @@ do_add_flow(int argc, char *argv[])
die("link is required");
}
+ if (dladm_zname2info(handle, zonename, devname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", devname);
+
opterr = 0;
index = optind;
@@ -414,11 +420,12 @@ do_remove_flow(int argc, char *argv[])
boolean_t l_arg = B_FALSE;
remove_flow_state_t state;
dladm_status_t status;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":tR:l:",
+ while ((option = getopt_long(argc, argv, ":tR:l:z:",
longopts, NULL)) != -1) {
switch (option) {
case 't':
@@ -432,12 +439,11 @@ do_remove_flow(int argc, char *argv[])
MAXLINKNAMELEN) >= MAXLINKNAMELEN) {
die("link name too long");
}
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK) {
- die("invalid link '%s'", linkname);
- }
l_arg = B_TRUE;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
break;
@@ -458,6 +464,12 @@ do_remove_flow(int argc, char *argv[])
/* if link is specified then flow name should not be there */
if (optind == argc-1)
usage();
+
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK) {
+ die("invalid link '%s'", linkname);
+ }
+
/* walk the link to find flows and remove them */
state.fs_tempop = t_arg;
state.fs_altroot = altroot;
@@ -597,11 +609,12 @@ do_show_flow(int argc, char *argv[])
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPl:o:",
+ while ((option = getopt_long(argc, argv, ":pPl:o:z:",
longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -622,17 +635,23 @@ do_show_flow(int argc, char *argv[])
if (strlcpy(linkname, optarg, MAXLINKNAMELEN)
>= MAXLINKNAMELEN)
die("link name too long\n");
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", linkname);
l_arg = B_TRUE;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
break;
}
}
+ if (l_arg) {
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", linkname);
+ }
+
/* get flow name (optional last argument */
if (optind == (argc-1)) {
if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN)
diff --git a/usr/src/cmd/flowstat/flowstat.c b/usr/src/cmd/flowstat/flowstat.c
index 3ddff9e34f..781ce6b0f0 100644
--- a/usr/src/cmd/flowstat/flowstat.c
+++ b/usr/src/cmd/flowstat/flowstat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -190,9 +191,9 @@ static char *progname;
static dladm_handle_t handle = NULL;
const char *usage_ermsg = "flowstat [-r | -t] [-i interval] "
- "[-l link] [flow]\n"
+ "[-l link] [-z zonename] [flow]\n"
" flowstat [-S] [-A] [-i interval] [-p] [ -o field[,...]]\n"
- " [-u R|K|M|G|T|P] [-l link] [flow]\n"
+ " [-u R|K|M|G|T|P] [-l link] [-z zonename] [flow]\n"
" flowstat -h [-a] [-d] [-F format]"
" [-s <DD/MM/YYYY,HH:MM:SS>]\n"
" [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> "
@@ -556,6 +557,7 @@ main(int argc, char *argv[])
show_flow_state_t state;
char *fields_str = NULL;
char *o_fields_str = NULL;
+ char *zonename = NULL;
char *total_stat_fields =
"flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs";
@@ -582,10 +584,11 @@ main(int argc, char *argv[])
if ((status = dladm_open(&handle)) != DLADM_STATUS_OK)
die_dlerr(status, "could not open /dev/dld");
+ linkname[0] = '\0';
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":rtApSi:o:u:l:h",
+ while ((option = getopt_long(argc, argv, ":rtApSi:o:u:l:hz:",
NULL, NULL)) != -1) {
switch (option) {
case 'r':
@@ -642,9 +645,6 @@ main(int argc, char *argv[])
if (strlcpy(linkname, optarg, MAXLINKNAMELEN)
>= MAXLINKNAMELEN)
die("link name too long\n");
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", linkname);
break;
case 'h':
if (r_arg || t_arg || p_arg || o_arg || u_arg ||
@@ -655,6 +655,9 @@ main(int argc, char *argv[])
do_show_history(argc, argv);
return (0);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, usage_ermsg);
break;
@@ -683,6 +686,12 @@ main(int argc, char *argv[])
die("the option -A is not compatible with "
"-r, -t, -p, -o, -u, -i");
+ if (linkname[0] != '\0') {
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", linkname);
+ }
+
/* get flow name (optional last argument) */
if (optind == (argc-1)) {
if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN)
diff --git a/usr/src/cmd/fs.d/Makefile b/usr/src/cmd/fs.d/Makefile
index 8e8faaa643..6cf7642d17 100644
--- a/usr/src/cmd/fs.d/Makefile
+++ b/usr/src/cmd/fs.d/Makefile
@@ -19,6 +19,7 @@
# CDDL HEADER END
#
# Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
#
# The filesystem independent utilities clri, fsdb, dcopy, labelit, and mkfs
@@ -38,8 +39,8 @@ DEFAULTFILES= fs.dfl
include ../Makefile.cmd
-SUBDIR1= lofs zfs
-SUBDIR2= dev fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs cachefs \
+SUBDIR1= bootfs hyprlofs lofs zfs
+SUBDIR2= dev fd pcfs nfs hsfs lxproc proc ctfs udfs ufs tmpfs cachefs \
autofs mntfs objfs sharefs smbclnt reparsed
SUBDIRS= $(SUBDIR1) $(SUBDIR2)
I18NDIRS= $(SUBDIR2)
diff --git a/usr/src/cmd/fs.d/bootfs/Makefile b/usr/src/cmd/fs.d/bootfs/Makefile
new file mode 100644
index 0000000000..d0ac4311f4
--- /dev/null
+++ b/usr/src/cmd/fs.d/bootfs/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= bootfs
+LIBPROG= mount
+
+include ../Makefile.fstype
+include ../Makefile.mount
+include ../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/bootfs/mount.c b/usr/src/cmd/fs.d/bootfs/mount.c
new file mode 100644
index 0000000000..5363a4f872
--- /dev/null
+++ b/usr/src/cmd/fs.d/bootfs/mount.c
@@ -0,0 +1,139 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define MNTTYPE_BOOTFS "bootfs"
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(2);
+}
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F bootfs' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", MNTTYPE_BOOTFS,
+ myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+
+ if (mount(special, mountp, flags | MS_OPTIONSTR, MNTTYPE_BOOTFS, NULL,
+ 0, optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(2);
+ }
+ if (optsize && !qflg) {
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/fs.d/hyprlofs/Makefile b/usr/src/cmd/fs.d/hyprlofs/Makefile
new file mode 100644
index 0000000000..1a3aaf18d3
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/Makefile
@@ -0,0 +1,42 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved
+#
+
+SUBDIRS= hlcfg mount
+
+all:= TARGET= all
+install:= TARGET= install
+clean:= TARGET= clean
+clobber:= TARGET= clobber
+lint:= TARGET= lint
+
+.KEEP_STATE:
+
+.PARALLEL: $(SUBDIRS)
+
+all install clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile b/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile
new file mode 100644
index 0000000000..d2ae22e9fd
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= hyprlofs
+LIBPROG= hlcfg
+
+include ../../Makefile.fstype
+include ../../Makefile.mount
+include ../../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c b/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c
new file mode 100644
index 0000000000..16e8e32b1c
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c
@@ -0,0 +1,244 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This is a simple test program to exercise the hyprlofs ioctls. This is
+ * not designed as a full featured CLI and only does minimal error checking
+ * and reporting.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <strings.h>
+#include <sys/errno.h>
+#include <sys/fs/hyprlofs.h>
+
+extern int errno;
+
+char *usage = "usage: <fs path> add [<file name> <alias>]+\n"
+ " <fs path> addl [<file name>]+\n"
+ " <fs path> rm [<alias>]+\n"
+ " <fs path> clear"
+ " <fs path> get";
+
+typedef enum {
+ CMD_ADD,
+ CMD_RM,
+ CMD_CLR,
+ CMD_ADDL,
+ CMD_GET
+} cmd_t;
+
+static int
+get_entries(int fd)
+{
+ int err;
+ int i;
+ hyprlofs_curr_entries_t e;
+ hyprlofs_curr_entry_t *ep;
+
+ e.hce_cnt = 0;
+ e.hce_entries = NULL;
+
+ err = ioctl(fd, HYPRLOFS_GET_ENTRIES, &e);
+ if (err != 0 && errno != E2BIG) {
+ perror("ioctl");
+ return (1);
+ }
+
+ if (err == 0) {
+ (void) printf("success, but no entries\n");
+ return (0);
+ }
+
+ /*
+ * E2BIG is what we expect when there are existing mappings
+ * since the current cnt is still returned in that case.
+ */
+ (void) printf("cnt: %d\n", e.hce_cnt);
+
+ /* alloc array and call again, then print array */
+ if ((ep = (hyprlofs_curr_entry_t *)
+ malloc(sizeof (hyprlofs_curr_entry_t) * e.hce_cnt)) == NULL) {
+ (void) fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+
+ e.hce_entries = ep;
+ errno = 0;
+ if (ioctl(fd, HYPRLOFS_GET_ENTRIES, &e) != 0) {
+ /*
+ * Not handling an increase here. We would need to free and
+ * start over to do that, but ok for a test program.
+ */
+ perror("ioctl");
+ free(ep);
+ return (1);
+ }
+ for (i = 0; i < e.hce_cnt; i++)
+ (void) printf("%s %s\n", ep[i].hce_path, ep[i].hce_name);
+
+ free(ep);
+ return (0);
+}
+
+int
+main(int argc, char **argv)
+{
+ int i, ap;
+ cmd_t cmd;
+ int cnt = 0;
+ int fd;
+ int rv = 0;
+ hyprlofs_entry_t *e = NULL;
+ hyprlofs_entries_t ents;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+
+ if ((fd = open(argv[1], O_RDONLY)) < 0) {
+ perror("can't open hyprlofs mount");
+ exit(1);
+ }
+
+ if (strcmp(argv[2], "add") == 0) {
+ cmd = CMD_ADD;
+ } else if (strcmp(argv[2], "rm") == 0) {
+ cmd = CMD_RM;
+ } else if (strcmp(argv[2], "clear") == 0) {
+ cmd = CMD_CLR;
+ } else if (strcmp(argv[2], "addl") == 0) {
+ cmd = CMD_ADDL;
+ } else if (strcmp(argv[2], "get") == 0) {
+ cmd = CMD_GET;
+ } else {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+
+ /* Count up the number of parameters. The arg format varies w/ cmd */
+ switch (cmd) {
+ case CMD_ADD:
+ for (i = 3; i < argc; i++) {
+ /* argv[i] is the file path */
+
+ /* The next arg is the alias */
+ if (++i >= argc) {
+ (void) fprintf(stderr, "missing alias for %s\n",
+ argv[i - 1]);
+ exit(1);
+ }
+
+ cnt++;
+ }
+ break;
+ case CMD_ADDL:
+ cnt = argc - 3;
+ break;
+ case CMD_RM:
+ cnt = argc - 3;
+ break;
+ case CMD_CLR: /*FALLTHRU*/
+ case CMD_GET:
+ if (argc > 3) {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+ break;
+ }
+
+ if (cnt > 0) {
+ if ((e = (hyprlofs_entry_t *)malloc(sizeof (hyprlofs_entry_t) *
+ cnt)) == NULL) {
+ (void) fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ }
+
+ /*
+ * Format up the args.
+ * We only setup the path member for the add cmd.
+ * We won't run this loop for the clear cmd.
+ * The addl command is special since we use basename to get the alias.
+ */
+ for (i = 0, ap = 3; i < cnt; i++, ap++) {
+ if (cmd == CMD_ADDL) {
+ e[i].hle_path = argv[ap];
+ e[i].hle_plen = strlen(e[i].hle_path);
+
+ e[i].hle_name = basename(argv[ap]);
+ e[i].hle_nlen = strlen(e[i].hle_name);
+
+ continue;
+ }
+
+ if (cmd == CMD_ADD) {
+ e[i].hle_path = argv[ap++];
+ e[i].hle_plen = strlen(e[i].hle_path);
+ }
+
+ e[i].hle_name = argv[ap];
+ e[i].hle_nlen = strlen(e[i].hle_name);
+ }
+
+ ents.hle_entries = e;
+ ents.hle_len = cnt;
+
+ switch (cmd) {
+ case CMD_ADD: /*FALLTHRU*/
+ case CMD_ADDL:
+ if (ioctl(fd, HYPRLOFS_ADD_ENTRIES, &ents) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_RM:
+ if (ioctl(fd, HYPRLOFS_RM_ENTRIES, &ents) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_CLR:
+ if (ioctl(fd, HYPRLOFS_RM_ALL) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_GET:
+ rv = get_entries(fd);
+ break;
+ }
+
+ (void) close(fd);
+ if (cnt > 0)
+ free(e);
+ return (rv);
+}
diff --git a/usr/src/cmd/fs.d/hyprlofs/mount/Makefile b/usr/src/cmd/fs.d/hyprlofs/mount/Makefile
new file mode 100644
index 0000000000..a0b63d211c
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/mount/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2012 Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= hyprlofs
+LIBPROG= mount
+
+include ../../Makefile.fstype
+include ../../Makefile.mount
+include ../../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/hyprlofs/mount/mount.c b/usr/src/cmd/fs.d/hyprlofs/mount/mount.c
new file mode 100644
index 0000000000..a95c9ca3c2
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/mount/mount.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ */
+
+#define HLFS
+#define MNTTYPE_HYFS "hyprlofs"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define RET_OK 0
+/*
+ * /sbin/mount and the fs-local method understand this exit code to
+ * mean that all the mount failures were related to hyprlofs mounts. Since
+ * this program only attempts to mount hyfs file systems, when it fails
+ * it returns this exit status.
+ */
+#define RET_ERR 111
+
+static void usage(void);
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static char fstype[] = MNTTYPE_HYFS;
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F hyprlofs' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", fstype, myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+ if (mount(special, mountp, flags | MS_OPTIONSTR, fstype, NULL, 0,
+ optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(RET_ERR);
+ }
+ if (optsize && !qflg)
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ return (0);
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(RET_ERR);
+}
diff --git a/usr/src/cmd/fs.d/lxproc/Makefile b/usr/src/cmd/fs.d/lxproc/Makefile
new file mode 100644
index 0000000000..77075ef1dd
--- /dev/null
+++ b/usr/src/cmd/fs.d/lxproc/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+FSTYPE= lxproc
+LIBPROG= mount
+
+include ../Makefile.fstype
+include ../Makefile.mount
+include ../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/lxproc/mount.c b/usr/src/cmd/fs.d/lxproc/mount.c
new file mode 100644
index 0000000000..5a000997bd
--- /dev/null
+++ b/usr/src/cmd/fs.d/lxproc/mount.c
@@ -0,0 +1,140 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define RET_OK 0
+#define RET_ERR 33
+
+static void usage(void);
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static char fstype[] = "lxproc";
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F lxproc' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", fstype, myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+ if (mount(special, mountp, flags | MS_OPTIONSTR, fstype, NULL, 0,
+ optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(RET_ERR);
+ }
+ if (optsize && !qflg)
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ return (0);
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(RET_ERR);
+}
diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
index ba2420362a..b79fff4125 100644
--- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
+++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
#include <stdlib.h>
@@ -344,8 +345,23 @@ fs_smf_get_prop(smf_fstype_t fstype, char *prop_name, char *cbuf,
} else {
ret = scf_error();
}
- if ((ret != 0) && scf_error() != SCF_ERROR_NONE)
- fprintf(stdout, gettext("%s\n"), scf_strerror(ret));
+ if ((ret != 0) && scf_error() != SCF_ERROR_NONE) {
+ /*
+ * This is a workaround for the NFS service manifests not
+ * containing the proper properties in local zones.
+ *
+ * When in a local zone and the property doesn't exist on an NFS
+ * service (most likely nfs/server or nfs/client), don't print
+ * the error. The caller will still see the correct error code,
+ * but a user creating a delegated dataset or mounting an NFS
+ * share won't see this spurious error.
+ */
+ if (getzoneid() == GLOBAL_ZONEID ||
+ scf_error() != SCF_ERROR_NOT_FOUND) {
+ fprintf(stdout, gettext("%s\n"), scf_strerror(ret));
+ }
+ }
+
out:
fs_smf_fini(phandle);
return (ret);
diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
index c06327d801..f0b70907aa 100644
--- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
+++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SMFCFG_H
@@ -42,6 +43,7 @@
#include <locale.h>
#include <errno.h>
#include <sys/types.h>
+#include <zone.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/cmd/fs.d/nfs/mount/mount.c b/usr/src/cmd/fs.d/nfs/mount/mount.c
index c228a91d05..b3b255d098 100644
--- a/usr/src/cmd/fs.d/nfs/mount/mount.c
+++ b/usr/src/cmd/fs.d/nfs/mount/mount.c
@@ -2104,7 +2104,7 @@ get_fh(struct nfs_args *args, char *fshost, char *fspath, int *versp,
}
while ((cl = clnt_create_vers(fshost, MOUNTPROG, &outvers,
- vers_min, vers_to_try, "datagram_v")) == NULL) {
+ vers_min, vers_to_try, NULL)) == NULL) {
if (rpc_createerr.cf_stat == RPC_UNKNOWNHOST) {
pr_err(gettext("%s: %s\n"), fshost,
clnt_spcreateerror(""));
diff --git a/usr/src/cmd/fs.d/nfs/mountd/mountd.c b/usr/src/cmd/fs.d/nfs/mountd/mountd.c
index 0d9ac76d30..666ad427d5 100644
--- a/usr/src/cmd/fs.d/nfs/mountd/mountd.c
+++ b/usr/src/cmd/fs.d/nfs/mountd/mountd.c
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -419,6 +420,13 @@ main(int argc, char *argv[])
exit(1);
}
+ /* Mountd cannot run in a non-global zone. */
+ if (getzoneid() != GLOBAL_ZONEID) {
+ (void) fprintf(stderr, "%s: can only run in the global zone\n",
+ argv[0]);
+ exit(1);
+ }
+
if (getrlimit(RLIMIT_NOFILE, &rl) != 0) {
syslog(LOG_ERR, "getrlimit failed");
} else {
diff --git a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
index 6c0e0bda5e..c34c39a13e 100644
--- a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
+++ b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -176,6 +177,13 @@ main(int ac, char *av[])
exit(1);
}
+ /* Nfsd cannot run in a non-global zone. */
+ if (getzoneid() != GLOBAL_ZONEID) {
+ (void) fprintf(stderr, "%s: can only run in the global zone\n",
+ av[0]);
+ exit(1);
+ }
+
(void) enable_extended_FILE_stdio(-1, -1);
/*
diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server
index b0c6d155a5..8542ce9b9f 100644
--- a/usr/src/cmd/fs.d/nfs/svc/nfs-server
+++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server
@@ -50,13 +50,13 @@ configure_ipfilter()
#
# Nothing to do if:
+ # - service's policy is 'use_global'
# - ipfilter isn't online
# - global policy is 'custom'
- # - service's policy is 'use_global'
#
+ [ "`get_policy $SMF_FMRI`" = "use_global" ] && return 0
service_check_state $IPF_FMRI $SMF_ONLINE || return 0
[ "`get_global_def_policy`" = "custom" ] && return 0
- [ "`get_policy $SMF_FMRI`" = "use_global" ] && return 0
svcadm restart $IPF_FMRI
}
diff --git a/usr/src/cmd/fs.d/nfs/umount/umount.c b/usr/src/cmd/fs.d/nfs/umount/umount.c
index aabdc8a592..66d280bcdb 100644
--- a/usr/src/cmd/fs.d/nfs/umount/umount.c
+++ b/usr/src/cmd/fs.d/nfs/umount/umount.c
@@ -297,7 +297,7 @@ retry:
*/
timep = (quick ? &create_timeout : NULL);
cl = clnt_create_timed(list[i].host, MOUNTPROG, vers,
- "datagram_n", timep);
+ NULL, timep);
/*
* Do not print any error messages in case of forced
* unmount.
diff --git a/usr/src/cmd/halt/halt.c b/usr/src/cmd/halt/halt.c
index 36867c88dc..3b57e02c64 100644
--- a/usr/src/cmd/halt/halt.c
+++ b/usr/src/cmd/halt/halt.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
@@ -1236,6 +1237,17 @@ do_archives_update(int do_fast_reboot)
pid_t pid;
char *cmd_argv[MAXARGS];
+#if defined(__i386)
+ {
+ /*
+ * bootadm will complain and exit if not a grub root, so
+ * just skip running it.
+ */
+ struct stat sb;
+ if (stat("/boot/grub/stage2", &sb) == -1)
+ return;
+ }
+#endif /* __i386 */
cmd_argv[i++] = "/sbin/bootadm";
cmd_argv[i++] = "-ea";
@@ -1300,7 +1312,7 @@ main(int argc, char *argv[])
optstring = "dlnqfp";
usage = gettext("usage: %s [ -dlnq(p|f) ] [ boot args ]\n");
#endif
- cmd = A_SHUTDOWN;
+ cmd = A_REBOOT;
fcn = AD_BOOT;
} else {
(void) fprintf(stderr,
@@ -1498,7 +1510,8 @@ main(int argc, char *argv[])
* check_zone_haltedness later on.
*/
if (zoneid == GLOBAL_ZONEID && cmd != A_DUMP) {
- need_check_zones = halt_zones();
+ if (!qflag)
+ need_check_zones = halt_zones();
}
#if defined(__i386)
@@ -1598,7 +1611,7 @@ main(int argc, char *argv[])
(void) signal(SIGINT, SIG_IGN);
- if (!qflag && !nosync) {
+ if (!nosync) {
struct utmpx wtmpx;
bzero(&wtmpx, sizeof (struct utmpx));
diff --git a/usr/src/cmd/ibd_upgrade/ibd_delete_link.c b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
index b9d10a56cd..f63630207d 100644
--- a/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
+++ b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <stdio.h>
@@ -86,6 +87,7 @@ ibd_delete_link(dladm_handle_t dlh, char *link)
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = -1;
if ((status = ibd_dladm_door_call(dlh, &getlinkid, sizeof (getlinkid),
&retval, sizeof (retval))) != DLADM_STATUS_OK) {
diff --git a/usr/src/cmd/init/Makefile b/usr/src/cmd/init/Makefile
index a867a0e780..b4b857882b 100644
--- a/usr/src/cmd/init/Makefile
+++ b/usr/src/cmd/init/Makefile
@@ -25,11 +25,13 @@
#
PROG= init
+OBJS= init.o
ROOTFS_PROG= $(PROG)
DEFAULTFILES= init.dfl
include ../Makefile.cmd
+include ../Makefile.ctf
LDLIBS += -lpam -lbsm -lcontract -lscf
CERRWARN += -_gcc=-Wno-parentheses
@@ -41,6 +43,10 @@ CLOBBERFILES= $(STATIC)
all: $(ROOTFS_PROG)
+$(ROOTFS_PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
install: all $(ROOTETCDEFAULTFILES) $(ROOTSBINPROG)
$(RM) $(ROOTETCPROG)
$(RM) $(ROOTUSRSBINPROG)
@@ -58,4 +64,8 @@ clean:
lint: lint_PROG
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
include ../Makefile.targ
diff --git a/usr/src/cmd/init/init.c b/usr/src/cmd/init/init.c
index 031a053b65..f6f8bccdbb 100644
--- a/usr/src/cmd/init/init.c
+++ b/usr/src/cmd/init/init.c
@@ -23,6 +23,7 @@
* Copyright (c) 2013 Gary Mills
*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -144,6 +145,8 @@
#define UT_USER_SZ 32 /* Size of a utmpx ut_user field */
#define UT_LINE_SZ 32 /* Size of a utmpx ut_line field */
+#define CHECK_SVC SCF_INSTANCE_FS_MINIMAL
+
/*
* SLEEPTIME The number of seconds "init" sleeps between wakeups if
* nothing else requires this "init" wakeup.
@@ -696,9 +699,8 @@ main(int argc, char *argv[])
console(B_FALSE,
"\n\n%s Release %s Version %s %d-bit\r\n",
un.sysname, un.release, un.version, bits);
- console(B_FALSE,
- "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
- " All rights reserved.\r\n");
+ console(B_FALSE, "Copyright (c) 2010-2012, "
+ "Joyent Inc. All rights reserved.\r\n");
}
/*
@@ -3509,6 +3511,28 @@ bail:
}
/*
+ * Attempt to confirm that svc.startd is ready to accept a user-initiated
+ * run-level change. startd is not ready until it has started its
+ * _scf_notify_wait thread to watch for events from svc.configd. This is
+ * inherently racy. To workaround this, we check the status of a file that
+ * startd will create once it has started the _scf_notify_wait thread.
+ * If we don't see this file after one minute, then charge ahead.
+ */
+static void
+verify_startd_ready()
+{
+ struct stat64 buf;
+ int i;
+
+ for (i = 0; i < 60; i++) {
+ if (stat64("/etc/svc/volatile/startd.ready", &buf) == 0)
+ return;
+ sleep(1);
+ }
+ console(B_TRUE, "verify startd timeout\n");
+}
+
+/*
* Function to handle requests from users to main init running as process 1.
*/
static void
@@ -3596,6 +3620,12 @@ userinit(int argc, char **argv)
(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
/*
+ * Before we tell init to start a run-level change, we need to be
+ * sure svc.startd is ready to accept that.
+ */
+ verify_startd_ready();
+
+ /*
* Signal init; init will take care of telling svc.startd.
*/
if (kill(init_pid, init_signal) == FAILURE) {
@@ -4305,9 +4335,7 @@ contract_event(struct pollfd *poll)
if (ret == 0) {
if (cookie == STARTD_COOKIE &&
do_restart_startd) {
- if (smf_debug)
- console(B_TRUE, "Restarting "
- "svc.startd.\n");
+ console(B_TRUE, "Restarting svc.startd.\n");
/*
* Account for the failure. If the failure rate
diff --git a/usr/src/cmd/initpkg/mountall.sh b/usr/src/cmd/initpkg/mountall.sh
index 56f2798e18..38693dd8da 100644
--- a/usr/src/cmd/initpkg/mountall.sh
+++ b/usr/src/cmd/initpkg/mountall.sh
@@ -28,6 +28,8 @@
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
# All Rights Reserved
#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
usage () {
if [ -n "$1" ]; then
@@ -148,6 +150,9 @@ isremote() {
# Get list of remote FS types (just once)
RemoteFSTypes=`while read t junk; do echo $t; done < /etc/dfs/fstypes`
+# Ensure nfs/smbfs are remote FS types even if not delivered from fstypes
+isremote "nfs" || set -A RemoteFSTypes "nfs"
+isremote "smbfs" || set -A RemoteFSTypes "smbfs"
#
# Process command line args
diff --git a/usr/src/cmd/initpkg/shutdown.sh b/usr/src/cmd/initpkg/shutdown.sh
index e65224c632..820d50310c 100644
--- a/usr/src/cmd/initpkg/shutdown.sh
+++ b/usr/src/cmd/initpkg/shutdown.sh
@@ -41,7 +41,7 @@ usage() {
}
notify() {
- /usr/sbin/wall -a <<-!
+ /usr/sbin/wall -Za <<-!
$*
!
if [ -x /usr/sbin/showmount -a -x /usr/sbin/rwall ]
diff --git a/usr/src/cmd/initpkg/umountall.sh b/usr/src/cmd/initpkg/umountall.sh
index c9a94fd8f1..4a45e19e18 100644
--- a/usr/src/cmd/initpkg/umountall.sh
+++ b/usr/src/cmd/initpkg/umountall.sh
@@ -25,6 +25,7 @@
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
# All Rights Reserved
#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
usage () {
@@ -98,6 +99,9 @@ isremote() {
# Get list of remote FS types (just once)
RemoteFSTypes=`while read t junk; do echo $t; done < /etc/dfs/fstypes`
+# Ensure nfs/smbfs are remote FS types even if not delivered from fstypes
+isremote "nfs" || set -A RemoteFSTypes "nfs"
+isremote "smbfs" || set -A RemoteFSTypes "smbfs"
#
# Process command line args
diff --git a/usr/src/cmd/ipf/lib/common/load_hash.c b/usr/src/cmd/ipf/lib/common/load_hash.c
index e43ddf54a3..d91a831f44 100644
--- a/usr/src/cmd/ipf/lib/common/load_hash.c
+++ b/usr/src/cmd/ipf/lib/common/load_hash.c
@@ -7,6 +7,8 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -17,6 +19,10 @@
#include "netinet/ip_lookup.h"
#include "netinet/ip_htable.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static int hashfd = -1;
@@ -35,6 +41,12 @@ ioctlfunc_t iocfunc;
hashfd = open(IPLOOKUP_NAME, O_RDWR);
if ((hashfd == -1) && ((opts & OPT_DONOTHING) == 0))
return -1;
+#if SOLARIS
+ if (setzone(hashfd) != 0) {
+ close(hashfd);
+ return -1;
+ }
+#endif
for (n = 0, a = list; a != NULL; a = a->ipe_next)
n++;
diff --git a/usr/src/cmd/ipf/lib/common/load_hashnode.c b/usr/src/cmd/ipf/lib/common/load_hashnode.c
index 8cf1bb2c67..dd343f3ab1 100644
--- a/usr/src/cmd/ipf/lib/common/load_hashnode.c
+++ b/usr/src/cmd/ipf/lib/common/load_hashnode.c
@@ -7,6 +7,8 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -17,6 +19,10 @@
#include "netinet/ip_lookup.h"
#include "netinet/ip_htable.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static int hashfd = -1;
@@ -34,6 +40,12 @@ ioctlfunc_t iocfunc;
hashfd = open(IPLOOKUP_NAME, O_RDWR);
if ((hashfd == -1) && ((opts & OPT_DONOTHING) == 0))
return -1;
+#if SOLARIS
+ if (setzone(hashfd) != 0) {
+ close(hashfd);
+ return -1;
+ }
+#endif
op.iplo_type = IPLT_HASH;
op.iplo_unit = unit;
diff --git a/usr/src/cmd/ipf/lib/common/load_pool.c b/usr/src/cmd/ipf/lib/common/load_pool.c
index b8146c060c..e09f77f6b4 100644
--- a/usr/src/cmd/ipf/lib/common/load_pool.c
+++ b/usr/src/cmd/ipf/lib/common/load_pool.c
@@ -7,6 +7,8 @@
*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -17,6 +19,10 @@
#include "netinet/ip_lookup.h"
#include "netinet/ip_pool.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static int poolfd = -1;
@@ -32,6 +38,12 @@ ioctlfunc_t iocfunc;
poolfd = open(IPLOOKUP_NAME, O_RDWR);
if ((poolfd == -1) && ((opts & OPT_DONOTHING) == 0))
return -1;
+#if SOLARIS
+ if (setzone(poolfd) != 0) {
+ close(poolfd);
+ return -1;
+ }
+#endif
op.iplo_unit = plp->ipo_unit;
op.iplo_type = IPLT_POOL;
diff --git a/usr/src/cmd/ipf/lib/common/load_poolnode.c b/usr/src/cmd/ipf/lib/common/load_poolnode.c
index e992a80281..37c7ef861e 100644
--- a/usr/src/cmd/ipf/lib/common/load_poolnode.c
+++ b/usr/src/cmd/ipf/lib/common/load_poolnode.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* $Id: load_poolnode.c,v 1.3.2.1 2004/03/06 14:33:29 darrenr Exp $
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <fcntl.h>
@@ -12,6 +14,10 @@
#include "netinet/ip_lookup.h"
#include "netinet/ip_pool.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static int poolfd = -1;
@@ -29,6 +35,12 @@ ioctlfunc_t iocfunc;
poolfd = open(IPLOOKUP_NAME, O_RDWR);
if ((poolfd == -1) && ((opts & OPT_DONOTHING) == 0))
return -1;
+#if SOLARIS
+ if (setzone(poolfd) != 0) {
+ close(poolfd);
+ return -1;
+ }
+#endif
op.iplo_unit = role;
op.iplo_type = IPLT_POOL;
diff --git a/usr/src/cmd/ipf/tools/Makefile.tools b/usr/src/cmd/ipf/tools/Makefile.tools
index 5d8bee1d04..ce0db79970 100644
--- a/usr/src/cmd/ipf/tools/Makefile.tools
+++ b/usr/src/cmd/ipf/tools/Makefile.tools
@@ -23,18 +23,19 @@
# Use is subject to license terms.
#
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2012, Joyent Inc. All rights reserved.
#
PROG= ipf ipfs ipmon ipnat ippool ipfstat
IPFPROG= ipftest
-IPF_OBJS= ipf.o ipfcomp.o ipf_y.o ipf_l.o
-IPFS_OBJS= ipfs.o
-IPFSTAT_OBJS= ipfstat.o
-IPMON_OBJS= ipmon.o ipmon_y.o ipmon_l.o
-IPNAT_OBJS= ipnat.o ipnat_y.o ipnat_l.o
-IPPOOL_OBJS= ippool.o ippool_y.o ippool_l.o
-IPFTEST_OBJS= ipftest.o \
+IPF_OBJS= ipf.o ipfcomp.o ipfzone.o ipf_y.o ipf_l.o
+IPFS_OBJS= ipfs.o ipfzone.o
+IPFSTAT_OBJS= ipfstat.o ipfzone.o
+IPMON_OBJS= ipmon.o ipfzone.o ipmon_y.o ipmon_l.o
+IPNAT_OBJS= ipnat.o ipfzone.o ipnat_y.o ipnat_l.o
+IPPOOL_OBJS= ippool.o ipfzone.o ippool_y.o ippool_l.o
+IPFTEST_OBJS= ipftest.o ipfzone.o \
ip_fil.o ip_state.o ip_compat.o \
ip_frag.o ip_nat.o ip_nat6.o fil.o \
ip_htable.o ip_lookup.o \
diff --git a/usr/src/cmd/ipf/tools/ipf.c b/usr/src/cmd/ipf/tools/ipf.c
index e81389b342..97bee67b26 100644
--- a/usr/src/cmd/ipf/tools/ipf.c
+++ b/usr/src/cmd/ipf/tools/ipf.c
@@ -5,6 +5,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifdef __FreeBSD__
@@ -21,6 +23,10 @@
#include <sys/ioctl.h>
#include "netinet/ipl.h"
+#ifdef SOLARIS
+#include "ipfzone.h"
+#endif
+
#if !defined(lint)
static const char sccsid[] = "@(#)ipf.c 1.23 6/5/96 (C) 1993-2000 Darren Reed";
static const char rcsid[] = "@(#)$Id: ipf.c,v 1.35.2.3 2004/12/15 18:27:17 darrenr Exp $";
@@ -62,9 +68,14 @@ static ioctlfunc_t iocfunctions[IPL_LOGSIZE] = { ioctl, ioctl, ioctl,
static void usage()
{
- fprintf(stderr, "usage: ipf [-6AdDEInoPrRsvVyzZ] %s %s %s\n",
+ fprintf(stderr, "usage: ipf [-6AdDEGInoPrRsvVyzZ] %s %s %s",
"[-l block|pass|nomatch|state|nat]", "[-cc] [-F i|o|a|s|S|u]",
"[-f filename] [-T <tuneopts>]");
+#if SOLARIS
+ fprintf(stderr, " [zonename]\n");
+#else
+ fprintf(stderr, "\n");
+#endif
exit(1);
}
@@ -74,11 +85,20 @@ int argc;
char *argv[];
{
int c;
+ const char *optstr = "6Ac:dDEf:F:GIl:noPrRsT:vVyzZ";
if (argc < 2)
usage();
- while ((c = getopt(argc, argv, "6Ac:dDEf:F:Il:noPrRsT:vVyzZ")) != -1) {
+#if SOLARIS
+ /*
+ * We need to set the zone name before calling the functions
+ * in the switch statement below
+ */
+ getzonearg(argc, argv, optstr);
+#endif
+
+ while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c)
{
case '?' :
@@ -187,6 +207,14 @@ int check;
if ((fd = open(ipfdev, O_RDWR)) == -1)
if ((fd = open(ipfdev, O_RDONLY)) == -1)
perror("open device");
+
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ return -1;
+ }
+#endif
+
return fd;
}
@@ -307,6 +335,13 @@ char *opt;
if (opts & OPT_VERBOSE)
printf("set state log flag\n");
xfd = open(IPSTATE_NAME, O_RDWR);
+#if SOLARIS
+ if (xfd >= 0 && setzone(xfd) != 0) {
+ close(xfd);
+ xfd = -1;
+ }
+#endif
+
if (xfd >= 0) {
logopt = 0;
if (ioctl(xfd, SIOCGETLG, &logopt))
@@ -324,6 +359,13 @@ char *opt;
if (opts & OPT_VERBOSE)
printf("set nat log flag\n");
xfd = open(IPNAT_NAME, O_RDWR);
+#if SOLARIS
+ if (xfd >= 0 && setzone(xfd) != 0) {
+ close(xfd);
+ xfd = -1;
+ }
+#endif
+
if (xfd >= 0) {
logopt = 0;
if (ioctl(xfd, SIOCGETLG, &logopt))
@@ -516,6 +558,14 @@ static int showversion()
return 1;
}
+#if SOLARIS
+ if (setzone(vfd) != 0) {
+ close(vfd);
+ return 1;
+ }
+#endif
+
+
if (ioctl(vfd, SIOCGETFS, &ipfo)) {
perror("ioctl(SIOCGETFS)");
close(vfd);
diff --git a/usr/src/cmd/ipf/tools/ipfs.c b/usr/src/cmd/ipf/tools/ipfs.c
index da8387324a..72296a09d0 100644
--- a/usr/src/cmd/ipf/tools/ipfs.c
+++ b/usr/src/cmd/ipf/tools/ipfs.c
@@ -5,6 +5,8 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifdef __FreeBSD__
@@ -44,6 +46,9 @@
#include <resolv.h>
#include "ipf.h"
#include "netinet/ipl.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
#if !defined(lint)
static const char rcsid[] = "@(#)Id: ipfs.c,v 1.12 2003/12/01 01:56:53 darrenr Exp";
@@ -86,14 +91,21 @@ char *progname;
void usage()
{
- fprintf(stderr, "usage: %s [-nv] -l\n", progname);
- fprintf(stderr, "usage: %s [-nv] -u\n", progname);
- fprintf(stderr, "usage: %s [-nv] [-d <dir>] -R\n", progname);
- fprintf(stderr, "usage: %s [-nv] [-d <dir>] -W\n", progname);
- fprintf(stderr, "usage: %s [-nv] [-N|-S] [-f <file>] -r\n", progname);
- fprintf(stderr, "usage: %s [-nv] [-N|-S] [-f <file>] -w\n", progname);
- fprintf(stderr, "usage: %s [-nv] [-N|-S] -f <file> -i <if1>,<if2>\n",
- progname);
+#if SOLARIS
+ const char *zoneopt = "[-G|-z zonename] ";
+#else
+ const char *zoneopt = "";
+#endif
+ fprintf(stderr, "usage: %s %s[-nv] -l\n", progname, zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] -u\n", progname, zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] [-d <dir>] -R\n", progname, zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] [-d <dir>] -W\n", progname, zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] [-N|-S] [-f <file>] -r\n", progname,
+ zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] [-N|-S] [-f <file>] -w\n", progname,
+ zoneopt);
+ fprintf(stderr, "usage: %s %s[-nv] [-N|-S] -f <file> -i <if1>,<if2>\n",
+ progname, zoneopt);
exit(1);
}
@@ -218,7 +230,7 @@ char *argv[];
char *dirname = NULL, *filename = NULL, *ifs = NULL;
progname = argv[0];
- while ((c = getopt(argc, argv, "d:f:lNnSRruvWw")) != -1)
+ while ((c = getopt(argc, argv, "d:f:G:lNnSRruvWwz:")) != -1)
switch (c)
{
case 'd' :
@@ -233,6 +245,11 @@ char *argv[];
else
usage();
break;
+#if SOLARIS
+ case 'G' :
+ setzonename_global(optarg);
+ break;
+#endif
case 'i' :
ifs = optarg;
set = 1;
@@ -287,6 +304,11 @@ char *argv[];
rw = 3;
set = 1;
break;
+#if SOLARIS
+ case 'z' :
+ setzonename(optarg);
+ break;
+#endif
case '?' :
default :
usage();
@@ -355,6 +377,14 @@ char *ipfdev;
if ((fd = open(ipfdev, O_RDWR)) == -1)
if ((fd = open(ipfdev, O_RDONLY)) == -1)
perror("open device");
+
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ fd = -1;
+ }
+#endif
+
return fd;
}
diff --git a/usr/src/cmd/ipf/tools/ipfstat.c b/usr/src/cmd/ipf/tools/ipfstat.c
index 27b018e273..49ee7cd23d 100644
--- a/usr/src/cmd/ipf/tools/ipfstat.c
+++ b/usr/src/cmd/ipf/tools/ipfstat.c
@@ -5,6 +5,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifdef __FreeBSD__
@@ -162,6 +164,10 @@ static int sort_dstip __P((const void *, const void *));
static int sort_dstpt __P((const void *, const void *));
#endif
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static void usage(name)
char *name;
@@ -177,6 +183,9 @@ char *name;
#else
fprintf(stderr, " %s -t [-C] ", name);
#endif
+#ifdef SOLARIS
+ fprintf(stderr, "[-G|-z zonename] ");
+#endif
fprintf(stderr, "[-D destination address] [-P protocol] [-S source address] [-T refresh time]\n");
exit(1);
}
@@ -207,9 +216,9 @@ char *argv[];
u_32_t frf;
#ifdef USE_INET6
- options = "6aACdfghIilnostvD:M:N:P:RS:T:";
+ options = "6aACdfgG:hIilnostvD:M:N:P:RS:T:z:";
#else
- options = "aACdfghIilnostvD:M:N:P:RS:T:";
+ options = "aACdfgG:hIilnostvD:M:N:P:RS:T:z:";
#endif
saddr.in4.s_addr = INADDR_ANY; /* default any v4 source addr */
@@ -223,13 +232,20 @@ char *argv[];
opterr = 0;
/*
- * Parse these two arguments now lest there be any buffer overflows
+ * Parse these four arguments now lest there be any buffer overflows
* in the parsing of the rest.
*/
myoptind = optind;
while ((c = getopt(argc, argv, options)) != -1) {
switch (c)
{
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
case 'M' :
memf = optarg;
live_kernel = 0;
@@ -238,6 +254,13 @@ char *argv[];
kern = optarg;
live_kernel = 0;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
}
optind = myoptind;
@@ -247,11 +270,23 @@ char *argv[];
perror("open(IPSTATE_NAME)");
exit(-1);
}
+#if SOLARIS
+ if (setzone(state_fd) != 0) {
+ close(state_fd);
+ exit(-1);
+ }
+#endif
if ((ipf_fd = open(device, O_RDONLY)) == -1) {
fprintf(stderr, "open(%s)", device);
perror("");
exit(-1);
}
+#if SOLARIS
+ if (setzone(ipf_fd) != 0) {
+ close(ipf_fd);
+ exit(-1);
+ }
+#endif
}
if (kern != NULL || memf != NULL) {
@@ -298,6 +333,8 @@ char *argv[];
case 'g' :
opts |= OPT_GROUPS;
break;
+ case 'G' :
+ break;
case 'h' :
opts |= OPT_HITS;
break;
@@ -360,6 +397,8 @@ char *argv[];
opts |= OPT_VERBOSE;
opts |= OPT_UNDEF;
break;
+ case 'z' :
+ break;
default :
usage(argv[0]);
break;
@@ -482,6 +521,13 @@ u_32_t *frfp;
exit(-1);
}
+#if SOLARIS
+ if (setzone(ipf_fd) != 0) {
+ close(ipf_fd);
+ exit(-1);
+ }
+#endif
+
bzero((caddr_t)&ipfo, sizeof(ipfo));
ipfo.ipfo_rev = IPFILTER_VERSION;
ipfo.ipfo_size = sizeof(fr_authstat_t);
diff --git a/usr/src/cmd/ipf/tools/ipfzone.c b/usr/src/cmd/ipf/tools/ipfzone.c
new file mode 100644
index 0000000000..c52b3e879d
--- /dev/null
+++ b/usr/src/cmd/ipf/tools/ipfzone.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * See the IPFILTER.LICENCE file for details on licensing.
+ */
+
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include "netinet/ip_fil.h"
+#include "ipfzone.h"
+
+static ipfzoneobj_t ipzo;
+static boolean_t do_setzone = 0;
+static int num_setzones = 0;
+
+extern int errno;
+extern int optind;
+extern char *optarg;
+
+/*
+ * Get the zonename if it's the last argument and set the zonename
+ * in ipfzo to it
+ */
+void
+getzonearg(int argc, char *argv[], const char *optstr)
+{
+ int c;
+
+ /*
+ * getopt is also used here to set optind so that we can
+ * determine if the last argument belongs to a flag or is
+ * actually a zonename.
+ */
+ while ((c = getopt(argc, argv, optstr)) != -1) {
+ if (c == 'G')
+ ipzo.ipfz_gz = 1;
+ }
+
+ if (optind < argc)
+ setzonename(argv[optind]);
+
+ /*
+ * Reset optind so the next getopt call will go through all of argv
+ * again.
+ */
+ optind = 1;
+}
+
+/*
+ * Get a -z option from argv and set the zonename in ipfzo accordingly
+ */
+void
+getzoneopt(int argc, char *argv[], const char *optstr)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, optstr)) != -1) {
+ if (c == 'G')
+ setzonename_global(optarg);
+
+ if (c == 'z')
+ setzonename(optarg);
+ }
+
+ /*
+ * Reset optind so the next getopt call will go through all of argv
+ * again.
+ */
+ optind = 1;
+}
+
+/*
+ * Set the zonename in ipfzo to the given string
+ */
+void
+setzonename(const char *zonename)
+{
+ memcpy(ipzo.ipfz_zonename, zonename, sizeof (ipzo.ipfz_zonename));
+ do_setzone = B_TRUE;
+ num_setzones++;
+}
+
+/*
+ * Set the zonename in ipfo, and the gz flag to indicate that we want to
+ * act on the GZ-controlled stack
+ */
+void
+setzonename_global(const char *zonename)
+{
+ setzonename(zonename);
+ ipzo.ipfz_gz = 1;
+}
+
+/*
+ * Set the zone that all further ioctls will operate on
+ */
+int
+setzone(int fd)
+{
+ if (!do_setzone)
+ return (0);
+
+ if (num_setzones > 1) {
+ (void) fprintf(stderr,
+ "Only one of -G and -z may be set\n");
+ return (-1);
+ }
+
+ if (ioctl(fd, SIOCIPFZONESET, &ipzo) == -1) {
+ switch (errno) {
+ case ENODEV:
+ (void) fprintf(stderr,
+ "Could not find running zone: %s\n",
+ ipzo.ipfz_zonename);
+ break;
+ case EACCES:
+ (void) fprintf(stderr,
+ "Permission denied setting zone: %s\n",
+ ipzo.ipfz_zonename);
+ break;
+ default:
+ perror("Error setting zone");
+ }
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/ipf/tools/ipfzone.h b/usr/src/cmd/ipf/tools/ipfzone.h
new file mode 100644
index 0000000000..ca6f42ec6a
--- /dev/null
+++ b/usr/src/cmd/ipf/tools/ipfzone.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * See the IPFILTER.LICENCE file for details on licensing.
+ */
+
+#ifndef __IPFZONE_H__
+#define __IPFZONE_H__
+
+void getzonearg(int, char *[], const char *);
+void getzoneopt(int, char *[], const char *);
+int setzone(int);
+void setzonename(const char *);
+void setzonename_global(const char *);
+
+#endif /* __IPFZONE_H__ */
diff --git a/usr/src/cmd/ipf/tools/ipmon.c b/usr/src/cmd/ipf/tools/ipmon.c
index d4a351b015..07d3d40be2 100644
--- a/usr/src/cmd/ipf/tools/ipmon.c
+++ b/usr/src/cmd/ipf/tools/ipmon.c
@@ -5,6 +5,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
@@ -78,6 +80,9 @@
#include "netinet/ip_state.h"
#include "netinet/ip_proxy.h"
#include "ipmon.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
#if !defined(lint)
static const char sccsid[] = "@(#)ipmon.c 1.21 6/5/96 (C)1993-2000 Darren Reed";
@@ -1340,8 +1345,13 @@ printipflog:
static void usage(prog)
char *prog;
{
- fprintf(stderr, "%s: [-abDFhnpstvxX] %s %s %s %s %s %s\n",
- prog, "[-N device]", "[ [-o [NSI]] [-O [NSI]]",
+#if SOLARIS
+ const char *zoneopt = " [-G|-z zonename]";
+#else
+ const char *zoneopt = "";
+#endif
+ fprintf(stderr, "%s: [-abDFhnpstvxX]%s %s %s %s %s %s %s\n",
+ prog, zoneopt, "[-N device]", "[ [-o [NSI]] [-O [NSI]]",
"[-P pidfile]", "[-S device]", "[-f device]",
"filename");
exit(1);
@@ -1380,6 +1390,13 @@ FILE *log;
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
+
if (ioctl(fd, SIOCIPFFB, &flushed) == 0) {
printf("%d bytes flushed from log buffer\n",
flushed);
@@ -1444,6 +1461,7 @@ char *argv[];
char buf[DEFAULT_IPFLOGSIZE], *iplfile[3], *s;
extern int optind;
extern char *optarg;
+ const char *optstr = "?abB:C:Df:G:FhnN:o:O:pP:sS:tvxXz:";
fd[0] = fd[1] = fd[2] = -1;
fdt[0] = fdt[1] = fdt[2] = -1;
@@ -1451,7 +1469,15 @@ char *argv[];
iplfile[1] = IPNAT_NAME;
iplfile[2] = IPSTATE_NAME;
- while ((c = getopt(argc, argv, "?abB:C:Df:FhnN:o:O:pP:sS:tvxX")) != -1)
+#if SOLARIS
+ /*
+ * We need to set the zone name before calling openlog in
+ * the switch statement below
+ */
+ getzoneopt(argc, argv, optstr);
+#endif
+
+ while ((c = getopt(argc, argv, optstr)) != -1)
switch (c)
{
case 'a' :
@@ -1483,6 +1509,11 @@ char *argv[];
flushlogs(iplfile[1], log);
flushlogs(iplfile[2], log);
break;
+#if SOLARIS
+ case 'G' :
+ /* Already handled by getzoneopt() above */
+ break;
+#endif
case 'n' :
opts |= OPT_RESOLVE;
break;
@@ -1535,6 +1566,11 @@ char *argv[];
case 'X' :
opts |= OPT_HEXHDR;
break;
+#if SOLARIS
+ case 'z' :
+ /* Already handled by getzoneopt() above */
+ break;
+#endif
default :
case 'h' :
case '?' :
@@ -1571,6 +1607,13 @@ char *argv[];
exit(1);
/* NOTREACHED */
}
+
+#if SOLARIS
+ if (setzone(fd[i]) != 0) {
+ close(fd[i]);
+ exit(1);
+ }
+#endif
if (!(regular[i] = !S_ISCHR(sb.st_mode)))
devices++;
}
diff --git a/usr/src/cmd/ipf/tools/ipnat.c b/usr/src/cmd/ipf/tools/ipnat.c
index 4a9a37a4a9..a7e37b0295 100644
--- a/usr/src/cmd/ipf/tools/ipnat.c
+++ b/usr/src/cmd/ipf/tools/ipnat.c
@@ -7,6 +7,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -56,6 +58,10 @@
#include "netinet/ipl.h"
#include "kmem.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
#ifdef __hpux
# define nlist nlist64
#endif
@@ -95,7 +101,12 @@ int opts;
void usage(name)
char *name;
{
- fprintf(stderr, "Usage: %s [-CdFhlnrRsv] [-f filename]\n", name);
+ fprintf(stderr, "Usage: %s [-CdFhlnrRsv] [-f filename]", name);
+#if SOLARIS
+ fprintf(stderr, " [-G|-z zonename]\n");
+#else
+ fprintf(stderr, "\n");
+#endif
exit(1);
}
@@ -117,7 +128,7 @@ char *argv[];
kernel = NULL;
mode = O_RDWR;
- while ((c = getopt(argc, argv, "CdFf:hlM:N:nrRsv")) != -1)
+ while ((c = getopt(argc, argv, "CdFf:G:hlM:N:nrRsvz:")) != -1)
switch (c)
{
case 'C' :
@@ -132,6 +143,11 @@ char *argv[];
case 'F' :
opts |= OPT_FLUSH;
break;
+#if SOLARIS
+ case 'G' :
+ setzonename_global(optarg);
+ break;
+#endif
case 'h' :
opts |=OPT_HITS;
break;
@@ -162,6 +178,11 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+#if SOLARIS
+ case 'z' :
+ setzonename(optarg);
+ break;
+#endif
default :
usage(argv[0]);
}
@@ -194,6 +215,12 @@ char *argv[];
STRERROR(errno));
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
bzero((char *)&obj, sizeof(obj));
obj.ipfo_rev = IPFILTER_VERSION;
diff --git a/usr/src/cmd/ipf/tools/ippool.c b/usr/src/cmd/ipf/tools/ippool.c
index 5f18379420..3eca3e1776 100644
--- a/usr/src/cmd/ipf/tools/ippool.c
+++ b/usr/src/cmd/ipf/tools/ippool.c
@@ -5,6 +5,8 @@
*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -42,6 +44,10 @@
#include "netinet/ip_htable.h"
#include "kmem.h"
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
extern int ippool_yyparse __P((void));
extern int ippool_yydebug;
extern FILE *ippool_yyin;
@@ -71,15 +77,25 @@ int use_inet6 = 0;
void usage(prog)
char *prog;
{
+#if SOLARIS
+ const char *zoneopt = "[-G|-z zonename] ";
+#else
+ const char *zoneopt = "";
+#endif
fprintf(stderr, "Usage:\t%s\n", prog);
- fprintf(stderr, "\t\t\t-a [-dnv] [-m <name>] [-o <role>] -i <ipaddr>[/netmask]\n");
- fprintf(stderr, "\t\t\t-A [-dnv] [-m <name>] [-o <role>] [-S <seed>] [-t <type>]\n");
- fprintf(stderr, "\t\t\t-f <file> [-dnuv]\n");
- fprintf(stderr, "\t\t\t-F [-dv] [-o <role>] [-t <type>]\n");
- fprintf(stderr, "\t\t\t-l [-dv] [-m <name>] [-t <type>]\n");
- fprintf(stderr, "\t\t\t-r [-dnv] [-m <name>] [-o <role>] -i <ipaddr>[/netmask]\n");
- fprintf(stderr, "\t\t\t-R [-dnv] [-m <name>] [-o <role>] [-t <type>]\n");
- fprintf(stderr, "\t\t\t-s [-dtv] [-M <core>] [-N <namelist>]\n");
+ fprintf(stderr, "\t\t\t-a [-dnv] %s[-m <name>] [-o <role>] -i <ipaddr>[/netmask]\n",
+ zoneopt);
+ fprintf(stderr, "\t\t\t-A [-dnv] %s[-m <name>] [-o <role>] [-S <seed>] [-t <type>]\n",
+ zoneopt);
+ fprintf(stderr, "\t\t\t-f <file> %s[-dnuv]\n", zoneopt);
+ fprintf(stderr, "\t\t\t-F [-dv] %s[-o <role>] [-t <type>]\n", zoneopt);
+ fprintf(stderr, "\t\t\t-l [-dv] %s[-m <name>] [-t <type>]\n", zoneopt);
+ fprintf(stderr, "\t\t\t-r [-dnv] %s[-m <name>] [-o <role>] -i <ipaddr>[/netmask]\n",
+ zoneopt);
+ fprintf(stderr, "\t\t\t-R [-dnv] %s[-m <name>] [-o <role>] [-t <type>]\n",
+ zoneopt);
+ fprintf(stderr, "\t\t\t-s [-dtv] %s[-M <core>] [-N <namelist>]\n",
+ zoneopt);
exit(1);
}
@@ -140,13 +156,20 @@ char *argv[];
role = IPL_LOGIPF;
bzero((char *)&node, sizeof(node));
- while ((c = getopt(argc, argv, "di:m:no:Rv")) != -1)
+ while ((c = getopt(argc, argv, "di:G:m:no:Rvz:")) != -1)
switch (c)
{
case 'd' :
opts |= OPT_DEBUG;
ippool_yydebug++;
break;
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
case 'i' :
s = strchr(optarg, '/');
if (s == NULL)
@@ -182,6 +205,13 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -219,13 +249,19 @@ char *argv[];
bzero((char *)&iph, sizeof(iph));
bzero((char *)&pool, sizeof(pool));
- while ((c = getopt(argc, argv, "dm:no:RS:t:v")) != -1)
+ while ((c = getopt(argc, argv, "dG:m:no:RS:t:vz:")) != -1)
switch (c)
{
case 'd' :
opts |= OPT_DEBUG;
ippool_yydebug++;
break;
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
case 'm' :
poolname = optarg;
break;
@@ -255,6 +291,13 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -308,13 +351,20 @@ char *argv[], *infile;
infile = optarg;
- while ((c = getopt(argc, argv, "dnRuv")) != -1)
+ while ((c = getopt(argc, argv, "dG:nRuvz:")) != -1)
switch (c)
{
case 'd' :
opts |= OPT_DEBUG;
ippool_yydebug++;
break;
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
case 'n' :
opts |= OPT_DONOTHING;
break;
@@ -327,6 +377,13 @@ char *argv[], *infile;
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -338,6 +395,12 @@ char *argv[], *infile;
perror("open(IPLOOKUP_NAME)");
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
}
if (ippool_parsefile(fd, infile, ioctl) != 0)
@@ -365,12 +428,19 @@ char *argv[];
poolname = NULL;
role = IPL_LOGALL;
- while ((c = getopt(argc, argv, "dm:M:N:o:Rt:v")) != -1)
+ while ((c = getopt(argc, argv, "dG:m:M:N:o:Rt:vz:")) != -1)
switch (c)
{
case 'd' :
opts |= OPT_DEBUG;
break;
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
case 'm' :
poolname = optarg;
break;
@@ -402,6 +472,13 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -413,6 +490,12 @@ char *argv[];
perror("open(IPLOOKUP_NAME)");
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
}
bzero((char *)&op, sizeof(op));
@@ -615,12 +698,19 @@ char *argv[];
bzero((char *)&op, sizeof(op));
- while ((c = getopt(argc, argv, "dM:N:o:t:v")) != -1)
+ while ((c = getopt(argc, argv, "dG:M:N:o:t:vz:")) != -1)
switch (c)
{
case 'd' :
opts |= OPT_DEBUG;
break;
+ case 'G' :
+#if SOLARIS
+ setzonename_global(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
case 'M' :
live_kernel = 0;
core = optarg;
@@ -647,6 +737,13 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -658,6 +755,12 @@ char *argv[];
perror("open(IPLOOKUP_NAME)");
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
}
if (type == IPLT_ALL || type == IPLT_POOL) {
@@ -705,7 +808,7 @@ char *argv[];
type = IPLT_ALL;
role = IPL_LOGALL;
- while ((c = getopt(argc, argv, "do:t:v")) != -1)
+ while ((c = getopt(argc, argv, "do:t:vz:")) != -1)
switch (c)
{
case 'd' :
@@ -728,6 +831,13 @@ char *argv[];
case 'v' :
opts |= OPT_VERBOSE;
break;
+ case 'z' :
+#if SOLARIS
+ setzonename(optarg);
+#else
+ usage(argv[0]);
+#endif
+ break;
}
if (opts & OPT_DEBUG)
@@ -739,6 +849,12 @@ char *argv[];
perror("open(IPLOOKUP_NAME)");
exit(1);
}
+#if SOLARIS
+ if (setzone(fd) != 0) {
+ close(fd);
+ exit(1);
+ }
+#endif
}
bzero((char *)&flush, sizeof(flush));
diff --git a/usr/src/cmd/krb5/kadmin/Makefile b/usr/src/cmd/krb5/kadmin/Makefile
index de893f05a4..8d0dbdc93f 100644
--- a/usr/src/cmd/krb5/kadmin/Makefile
+++ b/usr/src/cmd/krb5/kadmin/Makefile
@@ -25,7 +25,7 @@
include ../../Makefile.cmd
-SUBDIRS= cli dbutil ktutil kpasswd server kclient kdcmgr gui
+SUBDIRS= cli dbutil ktutil kpasswd server kclient kdcmgr
all := TARGET= all
clean := TARGET= clean
diff --git a/usr/src/cmd/ksh/Makefile.com b/usr/src/cmd/ksh/Makefile.com
index 8716f4eb9d..e5615cb407 100644
--- a/usr/src/cmd/ksh/Makefile.com
+++ b/usr/src/cmd/ksh/Makefile.com
@@ -36,11 +36,13 @@ LIBSHELLBASE=../../../lib/libshell
LIBSHELLSRC=$(LIBSHELLBASE)/common/sh
SRCS= $(OBJECTS:%.o=$(LIBSHELLSRC)/%.c)
+OBJS= $(OBJECTS)
LDLIBS += -lshell
# Set common AST build flags (e.g., needed to support the math stuff).
include ../../../Makefile.ast
+include ../../Makefile.ctf
# 1. Make sure that the -D/-U defines in CFLAGS below are in sync
# with usr/src/lib/libshell/Makefile.com
diff --git a/usr/src/cmd/localedef/Makefile b/usr/src/cmd/localedef/Makefile
index 4b012d0c45..ad0a46f5ed 100644
--- a/usr/src/cmd/localedef/Makefile
+++ b/usr/src/cmd/localedef/Makefile
@@ -13,6 +13,7 @@
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright 2011 EveryCity Ltd. All rights reserved.
# Copyright 2013 DEY Storage Systems, Inc.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
PROG=localedef
@@ -51,13 +52,13 @@ POFILE = localedef_cmd.po
ISO8859_1_LOCALES = \
da_DK \
- de_CH \
+ de_CH de_DE \
en_AU en_CA en_GB en_NZ en_US \
- es_AR es_BO es_CL es_CO es_EC es_GT es_MX es_NI es_PA \
+ es_AR es_BO es_CL es_CO es_EC es_ES es_GT es_MX es_NI es_PA \
es_PE es_SV es_UY es_VE \
- fr_CA fr_CH \
+ fr_CA fr_CH fr_FR \
is_IS \
- it_CH \
+ it_CH it_IT \
sv_SE
ISO8859_2_LOCALES = \
@@ -275,8 +276,11 @@ locale/%.UTF-8/stamp: data/%.UTF-8.src UTF-8.cm \
$(SED) '/^LC_CTYPE/,/^END LC_CTYPE/d;$$r UTF-8.ct' $< | \
./$(PROG) -U -w data/widths.txt -f UTF-8.cm $(@D)
$(TOUCH) $@
+# Convert EURO_SIGN to CURRENCY_SIGN for the ISO8859-1 locales
locale/%.ISO8859-1/stamp: data/%.UTF-8.src 8859-1.cm locale $(PROG)
- ./$(PROG) -U -w data/widths.txt -i $< -f 8859-1.cm $(@D)
+ sed 's/EURO_SIGN/CURRENCY_SIGN/' $< > $<.tmp
+ ./$(PROG) -U -w data/widths.txt -i $<.tmp -f 8859-1.cm $(@D)
+ rm -f $<.tmp
$(TOUCH) $@
locale/%.ISO8859-2/stamp: data/%.UTF-8.src 8859-2.cm locale $(PROG)
./$(PROG) -U -w data/widths.txt -i $< -f 8859-2.cm $(@D)
diff --git a/usr/src/cmd/lofiadm/main.c b/usr/src/cmd/lofiadm/main.c
index 0740bce1b7..0c32b69323 100644
--- a/usr/src/cmd/lofiadm/main.c
+++ b/usr/src/cmd/lofiadm/main.c
@@ -349,7 +349,8 @@ out:
* DO NOT use this function if the filename is actually the device name.
*/
static int
-lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
+lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename,
+ boolean_t no_devlink_flag)
{
int minor;
@@ -362,7 +363,8 @@ lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
"unsupported"));
die(gettext("could not map file %s"), filename);
}
- wait_until_dev_complete(minor);
+ if (!no_devlink_flag)
+ wait_until_dev_complete(minor);
return (minor);
}
@@ -372,7 +374,8 @@ lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
*/
static void
add_mapping(int lfd, const char *devicename, const char *filename,
- mech_alias_t *cipher, const char *rkey, size_t rksz, boolean_t rdonly)
+ mech_alias_t *cipher, const char *rkey, size_t rksz,
+ boolean_t rdonly, boolean_t no_devlink_flag)
{
struct lofi_ioctl li;
@@ -408,7 +411,7 @@ add_mapping(int lfd, const char *devicename, const char *filename,
int minor;
/* pick one via the driver */
- minor = lofi_map_file(lfd, li, filename);
+ minor = lofi_map_file(lfd, li, filename, no_devlink_flag);
/* if mapping succeeds, print the one picked */
(void) printf("/dev/%s/%d\n", LOFI_BLOCK_NAME, minor);
return;
@@ -429,7 +432,8 @@ add_mapping(int lfd, const char *devicename, const char *filename,
die(gettext("could not map file %s to %s"), filename,
devicename);
}
- wait_until_dev_complete(li.li_minor);
+ if (!no_devlink_flag)
+ wait_until_dev_complete(li.li_minor);
}
/*
@@ -1311,7 +1315,7 @@ lofi_uncompress(int lfd, const char *filename)
if (statbuf.st_size == 0)
return;
- minor = lofi_map_file(lfd, li, filename);
+ minor = lofi_map_file(lfd, li, filename, B_FALSE);
(void) snprintf(devicename, sizeof (devicename), "/dev/%s/%d",
LOFI_BLOCK_NAME, minor);
@@ -1817,6 +1821,7 @@ main(int argc, char *argv[])
boolean_t ephflag = B_FALSE;
boolean_t compressflag = B_FALSE;
boolean_t uncompressflag = B_FALSE;
+ boolean_t no_devlink_flag = B_FALSE;
/* the next two work together for -c, -k, -T, -e options only */
boolean_t need_crypto = B_FALSE; /* if any -c, -k, -T, -e */
boolean_t cipher_only = B_TRUE; /* if -c only */
@@ -1832,7 +1837,7 @@ main(int argc, char *argv[])
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
- while ((c = getopt(argc, argv, "a:c:Cd:efk:o:rs:T:U")) != EOF) {
+ while ((c = getopt(argc, argv, "a:c:Cd:efk:o:rs:T:UX")) != EOF) {
switch (c) {
case 'a':
addflag = B_TRUE;
@@ -1910,6 +1915,13 @@ main(int argc, char *argv[])
case 'U':
uncompressflag = B_TRUE;
break;
+ case 'X':
+ /*
+ * Private flag to skip the wait for the /dev links to
+ * be created.
+ */
+ no_devlink_flag = B_TRUE;
+ break;
case '?':
default:
errflag = B_TRUE;
@@ -2038,7 +2050,7 @@ main(int argc, char *argv[])
*/
if (addflag)
add_mapping(lfd, devicename, filename, cipher, rkey, rksz,
- rdflag);
+ rdflag, no_devlink_flag);
else if (compressflag)
lofi_compress(&lfd, filename, compress_index, segsize);
else if (uncompressflag)
diff --git a/usr/src/cmd/lp/terminfo/40.ti b/usr/src/cmd/lp/terminfo/40.ti
new file mode 100644
index 0000000000..2e08ece1ca
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/40.ti
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.4 */
+
+######################################################################
+#
+# Entries for the AT&T Model 40 line printers
+#
+
+
+
+40-80-6|att40-80-6|AT&T Model 40 line printer 80 cloumn 6 line per inch,
+
+ daisy,
+ bufsz#160,
+ cols#80,
+ lines#66,
+ orc#1,
+ orhi#10,
+ orl#1,
+ orvi#6,
+ cps#400,
+
+ cr=^M,
+ cuf1=\s,
+ ff=^L,
+
+40-80-8|att40-80-8|AT&T Model 40 line printer 80 cloumn 8 line per inch,
+
+ lines#88,
+ orvi#8,
+ use=40-80-6,
+
+40-132-6|att40-132-6|AT&T Model 40 line printer 132 cloumn 6 line per inch,
+
+ bufsz#264,
+ cols#132,
+ use=40-80-6,
+
+40-132-8|att40-132-8|AT&T Model 40 line printer 132 cloumn 8 line per inch,
+
+ lines#88,
+ orvi#8,
+ use=40-132-6,
+
diff --git a/usr/src/cmd/lp/terminfo/44x.ti b/usr/src/cmd/lp/terminfo/44x.ti
new file mode 100644
index 0000000000..d932bf441e
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/44x.ti
@@ -0,0 +1,57 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Entries for the AT&T 440 seris line printers
+#
+
+
+442|att442|AT&T 442 line printer,
+
+ bufsz#2048,
+ cols#132,
+ lines#66,
+ orc#1,
+ orhi#10,
+ orl#1,
+ orvi#6,
+ cps#440,
+
+ cr=^M,
+ cuf1=\s,
+ ff=^L,
+
+444|att444|AT&T 444 line printer,
+
+ use=442,
+
+446|att446|AT&T 446 line printer,
+
+ cps#1100,
+ use=442,
+
diff --git a/usr/src/cmd/lp/terminfo/45x.ti b/usr/src/cmd/lp/terminfo/45x.ti
new file mode 100644
index 0000000000..e4a6c5ab92
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/45x.ti
@@ -0,0 +1,42 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Entries for the AT&T 455, 457, 459 printers
+#
+
+455|att455|AT&T 455 Daisywheel printer,
+
+ use=Gdaisy+basic,
+
+457|att457|458|att458|AT&T 457 Daisy printer,
+
+ bufsz#1024,
+ use=455,
+
+
diff --git a/usr/src/cmd/lp/terminfo/477.ti b/usr/src/cmd/lp/terminfo/477.ti
new file mode 100644
index 0000000000..51b808b2ce
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/477.ti
@@ -0,0 +1,181 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.7 */
+
+######################################################################
+#
+# Entries for the AT&T 477 printer
+#
+
+#
+# Basic capabilities (all emulations):
+#
+att477+basic1,
+
+ bufsz#8192,
+ cps#80,
+
+######################################################################
+#
+# Particular printers:
+#
+
+477-470|att477-470|AT&T 477; 470 emulation,
+
+ cols#136,
+ cpix,
+
+ orc#10,
+ orhi#100,
+ orl#30,
+ orvi#180,
+
+ csnm=%?%p1%{0}%=%tamerican%e%p1%{1}%=%tbritish%e%p1%{2}%=%tswedish%e%p1%{3}%=%tgerman%e%p1%{4}%=%tfrench%e%p1%{5}%=%titalian%e%p1%{6}%=%tspanish%;,
+ scs=%?%p1%{0}%=%t\EZ^O@\ED^B@%e%p1%{1}%=%t\EZ^O@\ED^C@%e%p1%{2}%=%t\EZ^O@\ED^E@%e%p1%{3}%=%t\EZ^O@\ED^D@%e%p1%{4}%=%t\EZ^O@\ED^A@%e%p1%{5}%=%t\EZ^O@\ED^F@%e%p1%{6}%=%t\EZ^O@\ED^G@%;,
+
+ use=470,
+
+477ibmc|att477ibmc|AT&T 477 IBM Color printer emulation,
+
+ cols#135,
+
+ orc#15,
+ orhi#150,
+
+ orl#10,
+ orvi#60,
+
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\EA%p1%c\E2%;
+
+ cpi=%?%p1%{10}%=%t^R%e%p1%{12}%=%t\E:%e%p1%{17}%=%t^O%;,
+
+
+ smglp=\EX%p1%{1}%+%c%p2%{2}%+%c,
+ smgt=\E4,
+
+
+ use=att477+basic1, use=Gibmc+basic, use=Gibmc+low+5x6,
+ use=Gibmc+color,
+
+477ibmg|att477ibmg|AT&T 477 IBM Graphics printer emulation,
+
+ cols#136,
+
+ orc#10,
+ orhi#100,
+ orl#10,
+ orvi#60,
+
+ is2=\E@,
+
+ smglp=%?%p1%{256}%<%t\El%p1%{1}%+%c,
+ smgrp=%?%p1%{256}%<%t\EQ%p1%{2}%+%c,
+
+
+ use=att477+basic1, use=Gibmg+basic, use=Gibmg+low,
+
+477qume|att477qume|477-455|att477-455|AT&T 477 qume emulation,
+
+
+ cols#136,
+
+
+ is2=\E\rP\EW\E.\EL08\EY,
+
+ use=att477+basic1, use=Gdaisy+basic, use=Gdaisy+lowres,
+
+######################################################################
+#
+# In Fujitsu DPL24C mode. This seems to be more like the Epson LQ-2500
+# than the IBM Proprinter XL.
+#
+
+#
+# Basic capabilities (Fujitsu emulation only):
+#
+att477+basic2,
+
+ orc#18,
+ orhi#180,
+ orl#30,
+ orvi#180,
+
+
+#
+# The following is not redundant with the cpi capability, because
+# the cpi changes the character size as well, so that printing
+# looks balanced, while this leaves the character size alone.
+ chr=%?%p1%{256}%<%t\Eh%p1%c%;,
+
+
+ is2=\E@,
+
+ csnm=%?%p1%{0}%=%tcharacter_set_1%e%p1%{1}%=%tcharacter_set_2%e%p1%{2}%=%tusa%e%p1%{3}%=%tfrench%e%p1%{4}%=%tgerman%e%p1%{5}%=%tuk%e%p1%{6}%=%tdanish%e%p1%{7}%=%tswedish%e%p1%{8}%=%titalian%e%p1%{9}%=%tspanish%;,
+ scs=%?%p1%{0}%=%t\E7%e%p1%{1}%=%t\E6%e%p1%{3}%=%t\ER0%e%p1%{3}%=%t\ER1%e%p1%{4}%=%t\ER2%e%p1%{5}%=%t\ER3%e%p1%{6}%=%t\ER4%e%p1%{7}%=%t\ER5%e%p1%{8}%=%t\ER6%e%p1%{9}%=%t\ER7%;,
+
+
+477-5x6|att477-5x6|AT&T 477 as Fujitsu DPL24C; 5:6 aspect ratio,
+
+ spinh#50,
+
+# defbi=
+# Like the defbi for the epson2500, except:
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 50 dots per inch horizontally this means 5
+# dots per character.
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+
+#
+# Note that the epson2500 drives a real Epson LQ-2500 at
+# 60 dots per inch horizontally; the same control sequences
+# drive the Fujitsu DPL24C at 50 dots per inch horixontally.
+ use=att477+basic1, use=att477+basic2, use=Gep2500+basic,
+ use=Gep2500+low, use=Gep2500+color,
+
+477|att477|AT&T 477 as Fujitsu DPL24C; 1:1; low res,
+
+ is1@,
+
+#
+# This mode differs from the 5x6 mode only in sbim and defbi
+# (and spinh, of course). However, it is even closer to the
+# epson2500, so we use that.
+
+ sdrfq=\EH,
+ snlq=\EG,
+ snrmq@,
+
+ sbim=\E*\005%p1%{256}%m%c%p1%{256}%/%c,
+
+ use=att477+basic1, use=att477+basic2, use=Gep2500+basic,
+ use=Gep2500+low, use=Gep2500+color,
+
+477-hi|att477-hi|AT&T 477 as Fujitsu DPL24C; 1:1; high res,
+
+ use=att477+basic1, use=att477+basic2, use=Gep2500+basic,
+ use=Gep2500+high, use=Gep2500+color,
diff --git a/usr/src/cmd/lp/terminfo/47x.ti b/usr/src/cmd/lp/terminfo/47x.ti
new file mode 100644
index 0000000000..a5db8850de
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/47x.ti
@@ -0,0 +1,134 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Entries for the AT&T 470, 471, 473, 474, 475, 476, 478, 479 printers
+#
+
+#
+# C.Itoh derived printers:
+#
+
+470|att470|AT&T 470; C.Itoh 8510; 8"; parallel matrix printer,
+
+ bufsz#4096,
+
+ cols#80,
+ cpix,
+
+ sdrfq@,
+ snlq@,
+ snrmq@,
+ ssubm@,
+ ssupm@,
+ rsubm@,
+ rsupm@,
+
+ sgr0=\E"\EY,
+
+# Reset command does not reset form length
+
+ is2=\Ec1\Ev66,
+
+ csnm=%?%p1%{0}%=%tusa%e%p1%{1}%=%tbritish%e%p1%{2}%=%tdanish%e%p1%{3}%=%tjapanese%e%p1%{4}%=%tnorwegian%e%p1%{5}%=%tswedish%e%p1%{6}%=%tgerman%e%p1%{7}%=%tfrench%e%p1%{8}%=%tfrench2%e%p1%{9}%=%titalian%e%p1%{10}%=%tspanish%e%p1%{11}%=%tnetherland%e%p1%{12}%=%tafrikaans%e%p1%{13}%=%tbritish2%;,
+ scs=%?%p1%{0}%=%t\EZ^O@\ED^B@%e%p1%{1}%=%t\EZ^O@\ED^C@%e%p1%{2}%=%t\EZ^O@\ED\b@%e%p1%{3}%=%t\EZ^O@%e%p1%{4}%=%t\EZ^O@\ED\t@%e%p1%{5}%=%t\EZ^O@\ED^E@%e%p1%{6}%=%t\EZ^O@\ED^D@%e%p1%{7}%=%t\EZ^O@\ED^A@%e%p1%{8}%=%t\EZ^O@\ED^N@%e%p1%{9}%=%t\EZ^O@\ED^F@%e%p1%{10}%=%t\EZ^O@\ED^G@%e%p1%{11}%=%t\EZ^O@\ED\n@%e%p1%{12}%=%t\EZ^O@\ED^K@%e%p1%{13}%=%t\ED^O@%;,
+
+
+ use=Gcitoh+basic, use=Gcitoh+low,
+
+471|att471|AT&T 471; C.Itoh 1550; 14"; parallel matrix printer,
+
+ cols#136,
+
+ use=470,
+
+475|att475|AT&T 475; C.Itoh 8510; 8"; serial matrix printer,
+
+ use=470,
+
+476|att476|AT&T 475; C.Itoh 1550; 14"; serial matrix printer,
+
+ use=471,
+
+#
+# IBM derived printers:
+#
+
+473|att473|AT&T 473; 8"; C.Itoh 8510EP; IBM Graphics,
+
+ bufsz#4096,
+ cps#120,
+
+#
+# FIX: The AT&T 473 doesn't seem to have fine-scale horizontal
+# motion--the only motion is by columns.
+ orc#10,
+ orhi#100,
+
+
+
+
+ use=Gibmg+basic, use=Gibmg+low,
+
+474|att474|AT&T 474; 14"; C.Itoh 1550EP; IBM Graphics,
+
+ cols#132,
+
+ use=473,
+
+478|att478|AT&T 478; 8"; parallel matrix printer,
+
+ bufsz#16384,
+ cps#120,
+
+#
+# FIX: The AT&T 478 doesn't seem to have fine-scale horizontal
+# motion--the only motion is by columns.
+ orc#10,
+ orhi#100,
+
+ cpi=%?%p1%{10}%=%t^R%e%p1%{12}%=%t\E:%e%p1%{13}%=%p1%{14}%=%O%t\Eh%e%p1%{16}%=%p1%{17}%=%O%t\Em%e%p1%{18}%=%t^O%;,
+
+ is2=^R\EW0\E-0\E_0\EF\EH\EI\ET\EA\014\E2\ER\El\001\Er\120\Et\001\EC\102\E7\EU0\EO,
+
+ sdrfq=\EI^D,
+ snlq=\EI^B,
+
+ smglp=\El%{1}%p1%+%c,
+ smgrp=\Er%{1}%p1%+%c,
+ smgtp=\Et%{1}%p1%+%c,
+
+ use=Gibmg+basic, use=Gibmg+low,
+
+479|att479|AT&T 479; 14"; IBM parallel; matrix printer,
+
+ cols#132,
+ is2=^R\EW0\E-0\E_0\EF\EH\EI\004\ET\EA\014\E2\ER\El\001\Er\204\Et\001\EC\102\E7\EU0\EO,
+
+
+ use=478,
diff --git a/usr/src/cmd/lp/terminfo/495.ti b/usr/src/cmd/lp/terminfo/495.ti
new file mode 100644
index 0000000000..562c62b6be
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/495.ti
@@ -0,0 +1,78 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Entries for the AT&T 495 printer
+#
+
+495ibm|att495ibm|AT&T 495 IBM Graphics emulation,
+
+ bufsz#1024,
+ cps#800,
+
+
+ orc#12,
+ orhi#120,
+
+ lines#63,
+
+ cpi=%?%p1%{10}%=%t^R%e%p1%{17}%=%t^O%;,
+
+# Reset defaults, enter IBM Graphics emulation mode
+ is2=\E[0~\E[12~,
+
+ use=Gibmg+basic, use=Gibmg+low,
+
+495qume|att495qume|AT&T 495 Qume emulation,
+
+ daisy@,
+
+ bufsz#1024,
+ cps#800,
+
+ cols#80,
+ lines#63,
+
+
+ chr=%?%p1%{0}%>%p1%{127}%<%&%t\E^_%p1%{1}%+%c%;,
+ cvr=%?%p1%{0}%>%p1%{127}%<%&%t\E^^%p1%{1}%+%c%;,
+
+ is2=\E[0~\E[11~\E^_\r,
+
+ u9=%?%p1%{128}%<%t\EF%p1%02d%;,
+
+ use=Gdaisy+basic, use=Gdaisy+lowres,
+
+495hp|att495hp|AT&T 495 HP Laserjet I emulation,
+
+ bufsz#1024,
+ cps#800,
+
+ is2=\E[0~\E[10~\E&k0S,
+
+ use=Ghplaser+basic, use=Ghplaser+high,
diff --git a/usr/src/cmd/lp/terminfo/53x0.ti b/usr/src/cmd/lp/terminfo/53x0.ti
new file mode 100644
index 0000000000..fc6fb2cef9
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/53x0.ti
@@ -0,0 +1,81 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Entries for the AT&T 5310, 5320 printers
+#
+
+53x0+high,
+
+ spinv#72,
+ spinh#150,
+
+ bitwin#2,
+ u1=2,
+
+# defbi=
+# X is in 1/150 increments; set char spacing to 1/16.7
+# increments to allow us to get close; column is X*16.7/150.
+# Y is in 1/144 increments; set line spacing to 1/12
+# increments to allow us to get close; line is Y/12.
+# Note: The 5310/5320 won't move upward with the absolute
+# addressing control sequence, so we use the relative motion.
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\E[4w\E[%p1%{167}%*%{1500}%/%d`\E[w\E[3z\E[%p2%{12}%/%de\E[z\EP\035q%;,
+ defbi=%?%p5%{1}%=%t\E[4w\E[%p1%{167}%*%{1500}%/%d`\E[w\E[3z\E[%p2%{12}%/%de\E[z\EP\035q%;,
+
+ use=Gdec+low,
+
+5320|att5320|AT&T Model 5320 printer (EMUL set to ANSI),
+
+ bufsz#8192,
+ cps#120,
+
+
+#
+# FIX: The AT&T 5320 doesn't seem to have fine-scale horizontal
+# motion--the only motion is by columns.
+ orc#10,
+ orhi#100,
+
+#
+# FIX: The AT&T 5320 seems to only have half-line vertical motion
+# at best.
+ orl#12,
+ orvi#72,
+
+
+
+ use=53x0+high, use=Gdec+basic, use=Gdec+low,
+
+5310|att5310|AT&T 5310 matrix printer (EMUL set to ANSI),
+
+ cols#80,
+
+ use=5320,
diff --git a/usr/src/cmd/lp/terminfo/57x.ti b/usr/src/cmd/lp/terminfo/57x.ti
new file mode 100644
index 0000000000..677090111e
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/57x.ti
@@ -0,0 +1,105 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Entries for the AT&T 570 series printers
+#
+
+570eps|att570eps|AT&T 570 Epson emulation,
+
+ bufsz#10000,
+ cps#250,
+ cols#80,
+ orl#36,
+ orvi#216,
+
+ use=Gepson+basic,
+ use=Gepson+low,
+
+570ibm|att570ibm|AT&T 570 IBM ProPrinter emulation,
+
+ bufsz#10000,
+ cps#250,
+
+ use=Gibmg+basic,
+ use=Gibmg+low,
+
+571eps|att571eps|AT&T 571 Epson emulation,
+
+ cols#136,
+
+ use=570eps,
+
+571ibm|att571ibm|AT&T 571 IBM ProPrinter emulation,
+
+ cols#136,
+ use=570ibm,
+
+572|att572|AT&T 572 9-wire Matrix Printer,
+
+ bufsz#10000,
+ cps#250,
+ cols#80,
+ xhpa@,
+ xvpa@,
+
+ csnm=%?%p1%{0}%=%tusascii%e%p1%{1}%=%tbritish%e%p1%{2}%=%tfinnish%e%p1%{3}%=%tjapanese%e%p1%{4}%=%tnorwegian%e%p1%{5}%=%tswedish%e%p1%{6}%=%tgerman%e%p1%{7}%=%tfrench%e%p1%{8}%=%tfrench_canadian%e%p1%{9}%=%titalian%e%p1%{10}%=%tspanish%e%p1%{11}%=%tline_drawing%e%p1%{12}%=%tdanish%e%p1%{13}%=%tebcdic%e%p1%{14}%=%tmulti_national%;,
+ scs=%?%p1%{0}%=%t\E(B%e%p1%{1}%=%t\E(A%e%p1%{2}%=%t\E(C%e%p1%{3}%=%t\E(J%e%p1%{4}%=%t\E(E%e%p1%{5}%=%t\E(H%e%p1%{6}%=%t\E(K%e%p1%{7}%=%t\E(R%e%p1%{8}%=%t\E(Q%e%p1%{9}%=%t\E(Y%e%p1%{10}%=%t\E(Z%e%p1%{11}%=%t\E(O%e%p1%{12}%=%t\E(E%e%p1%{13}%=%t\E(3%e%p1%{14}%=%t\E(<%;,
+
+ is3=\E[0"z,
+
+ snlq=\E[2"z,
+ snrmq=\E[3"z,
+ sdrfq=\E[0"z,
+ sshm=\E[1m,
+ rshm=\E[22m,
+ smul=\E[4m,
+ rmul=\E[24m,
+
+# The following are not supported by 572/573
+
+ smgbp@,
+ smgtp@,
+ smglp@,
+ smgrp@,
+ cud@,
+ cuf@,
+ cuu1@,
+ hpa@,
+ vpa@,
+ nel@,
+
+ use=Gdec+basic,
+ use=Gdec+low,
+
+573|att573|AT&T 573 9-wire Matrix Printer,
+
+ cols#132,
+
+ use=572,
diff --git a/usr/src/cmd/lp/terminfo/58x.ti b/usr/src/cmd/lp/terminfo/58x.ti
new file mode 100644
index 0000000000..bd5b65c663
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/58x.ti
@@ -0,0 +1,103 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Generic entry:
+#
+
+att583+basic,
+
+ bufsz#16384,
+ cps#200,
+
+######################################################################
+#
+# Specific printers:
+#
+
+583ibm|att583ibm|AT&T 583 as IBM Proprinter XL,
+
+ cols#136,
+ is3=\EX\210,
+
+ use=att583+basic, use=Gibmxl+basic, use=Gibmxl+low+5x6, use=Gibmc+color,
+
+
+583ibm-80|att583ibm-80|AT&T 583 as IBM Proprinter XL;80-col,
+
+ use=att583+basic, use=Gibmxl+basic, use=Gibmxl+low+5x6, use=Gibmc+color,
+
+
+583eps|att583eps|AT&T 583 as Epson LQ-2500; low resolution,
+
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+low, use=Gep2500+color,
+
+583eps-hi|att583eps-hi|AT&T 583 as Epson LQ-2500; high resolution,
+
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+high, use=Gep2500+color,
+
+583eps-80|att583eps-80|AT&T 583 as Epson LQ-2500; low resolution; 80-col,
+
+ cols#80,
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+low, use=Gep2500+color,
+
+583eps-hi-80|att583eps-hi-80|AT&T 583 as Epson LQ-2500; high resolution; 80-col,
+
+ cols#80,
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+high, use=Gep2500+color,
+
+
+580ibm|att580ibm|AT&T 580 as IBM Proprinter XL,
+
+ use=att583+basic, use=Gibmxl+basic, use=Gibmxl+low+5x6,
+
+581ibm|att581ibm|AT&T 581 as IBM Proprinter XL,
+
+ cols#136,
+ is3=\EX\210,
+
+ use=580ibm,
+
+
+581eps|att581eps|AT&T 581 as Epson LQ-2500; low resolution,
+
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+low,
+
+581eps-hi|att581eps-hi|AT&T 581 as Epson LQ-2500; high resolution,
+
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+high,
+
+580eps|att580eps|AT&T 580 as Epson LQ-2500; low resolution,
+
+ cols#80,
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+low,
+
+580eps-hi|att580eps-hi|AT&T 580 as Epson LQ-2500; high resolution,
+
+ cols#80,
+ use=att583+basic, use=Gep2500+basic, use=Gep2500+high,
diff --git a/usr/src/cmd/lp/terminfo/593.ti b/usr/src/cmd/lp/terminfo/593.ti
new file mode 100644
index 0000000000..3d2a592c47
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/593.ti
@@ -0,0 +1,73 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Entries for the AT&T 593 printer
+#
+
+593eps|att593eps|AT&T 593 Epson FX86e emulation,
+
+ bufsz#2000,
+ cps#480,
+ cols#80,
+ lines#62,
+ orl#36,
+ orvi#216,
+
+# Only letter quality for laser printer
+ snlq@,
+ snrmq@,
+ sdrfq@,
+
+ use=Gep2500+basic,
+ use=Gepson+low,
+
+593ibm|att593ibm|AT&T 593 IBM ProPrinter XL emulation,
+
+ bufsz#2000,
+ cps#480,
+
+ lines#62,
+
+# Only letter quality for laser printer
+ snlq@,
+ sdrfq@,
+
+ smglp=\EX%p1%{1}%+%c%p2%{1}%+%c,
+
+ use=Gibmxl+basic,
+ use=Gibmxl+low+1x1,
+
+
+593hp|att593hp|AT&T 593 HP Laserjet II emulation,
+
+ bufsz#2000,
+ cps#480,
+
+
+ use=Ghplaser+II,
diff --git a/usr/src/cmd/lp/terminfo/Makefile b/usr/src/cmd/lp/terminfo/Makefile
new file mode 100644
index 0000000000..b94f821b6b
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/Makefile
@@ -0,0 +1,77 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# cmd/lp/terminfo/Makefile
+#
+
+include ../Makefile.lp
+
+ROOTTERMINFO = $(ROOT)/usr/share/lib/terminfo
+
+TIC = tic
+
+SRCS = PS.ti 40.ti 477.ti 53x0.ti 593.ti daisy.ti hplaser.ti \
+ 44x.ti 47x.ti 57x.ti dec.ti ibm.ti 45x.ti 495.ti \
+ 58x.ti citoh.ti epson.ti unknown.ti
+
+
+TMPSRC = terminfo.src
+
+DIRMODE= 755
+FILEMODE= 644
+
+.KEEP_STATE:
+
+all : $(TMPSRC)
+
+$(TMPSRC) : $(SRCS)
+ $(RM) $@; cat $(SRCS) > $@
+
+#
+# Since all entries are created at once, we simply choose one of the
+# target files and assume everything will be made at one time. This
+# has holes (like if somebody removes P/PSR but not P/PS), but those
+# are the breaks.
+#
+install : all $(ROOTTERMINFO) $(ROOTTERMINFO)/P/PS
+
+$(ROOTTERMINFO)/P/PS: $(TMPSRC)
+ TERMINFO=$(ROOTTERMINFO) 2>&1 $(TIC) -v $(TMPSRC) > errs
+ @$(ECHO) "\n`2>/dev/null cat errs|wc -l` entries have been compiled\n"
+ @-( 2>/dev/null cat errs|grep -iv "^mkdir"|grep -iv "^create"|grep -iv "^link"|grep -vi $(TMPSRC)|grep -vi touch|grep -vi "working"; \
+ if [ $$? -ne 0 ] ; \
+ then \
+ $(ECHO) "\tNo errors\n"; \
+ else \
+ $(ECHO) "\n\tErrors can be found in `pwd`/errs\n"; \
+ fi \
+ )
+
+$(ROOTTERMINFO) :
+ $(INS.dir)
+
+clean clobber:
+ $(RM) $(TMPSRC)
+
+strip lint :
+
diff --git a/usr/src/cmd/lp/terminfo/PS.ti b/usr/src/cmd/lp/terminfo/PS.ti
new file mode 100644
index 0000000000..28a83e988b
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/PS.ti
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+# ident "%Z%%M% %I% %E% SMI" /* SVr4 1.1 */
+
+PS|PSR|PS-b|PS-r|PS-br|Fake PostScript entry,
+ slines=\004,
+ u9=\004,
+ csnm=\004,
+ scs=\004,
+ cpi=null,
+ lpi=null,
+ cols#80,
+ lines#66,
diff --git a/usr/src/cmd/lp/terminfo/citoh.ti b/usr/src/cmd/lp/terminfo/citoh.ti
new file mode 100644
index 0000000000..c2b584b7c6
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/citoh.ti
@@ -0,0 +1,127 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Generic entry for the C.Itoh 8510 printer and emulations:
+#
+
+Gcitoh+basic,
+
+# The 8510 I use only allows 79 columns!
+ cols#79,
+
+ lines#66,
+ it#8,
+
+ orc#1,
+ orhi#10,
+ orl#24,
+ orvi#144,
+
+ cr=\r,
+ cud1=\n,
+ cuf1=\s,
+ ff=\f,
+ ht=\t,
+
+ cpi=%?%p1%{10}%=%t\EN%e%p1%{12}%=%t\EE%e%p1%{16}%=%p1%{17}%=%O%t\EQ%;,
+ cvr=%?%p1%{0}%>%p1%{100}%<%&%t\ET%p1%02d%;,
+
+ is1=^Q^X,
+ is2=\Ec1\Ev66.,
+
+ smso=\E!,
+ rmso=\E",
+ smul=\EX,
+ rmul=\EY,
+ bold=\E!,
+ ssubm=\Es2,
+ rsubm=\Es0,
+ ssupm=\Es1,
+ rsupm=\Es0,
+ swidm=^N,
+ rwidm=^O,
+ sgr0=\E"\EY\Es0^O,
+ sgr=%?%p1%p6%|%t\E!%e\E"%;%?%p2%t\EX%e\EY%;,
+
+ rep=\ER%p2%03d%p1%c,
+
+ snlq=\Em2,
+ snrmq=\Em1,
+ sdrfq=\Em0,
+
+ smglp=\EL%p1%03d,
+ smgrp=\E/%{1}%p1%+%03d,
+
+# slines=
+ u9=\Ev%p1%02d.,
+ slines=\Ev%p1%02d.,
+
+Gcitoh+low,
+
+ npins#8,
+ spinv#68,
+ spinh#136,
+
+ porder=8\,7\,6\,5\,4\,3\,2\,1;0,
+
+ sbim=\ES%p1%04d,
+
+# birep=
+ u4=\EV%p2%04d%p3%c,
+ birep=\EV%p2%04d%p3%c,
+
+ bitwin#1,
+ u1=1,
+ bitype#1,
+ u2=1,
+
+# defbi=
+# Set the line spacing to 17/144 inch to get (almost) 68 dots
+# per inch vertically (8 * 144/17).
+# Set the character spacing to compressed (1/17 inch or 17
+# characters per inch); at 136 dots per inch horizontally this
+# means 8 dots per character.
+# Set the left margin at the left edge of the image.
+# The C.Itoh doesn't have parameterized vertical motion,
+# so we simulate it with linefeeds. Assume we never need
+# to move more than 63 lines (at 17/144 LPI).
+# Set uni-directional motion; bi-directional causes a wavy
+# image.
+# defbi=
+ u6=%?%p5%{1}%=%t\ET17\EQ\EL%p1%{8}%/%03d%p2%{8}%/%Py%?%gy%{31}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{32}%-%Py%;%?%gy%{15}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{16}%-%Py%;%?%gy%{7}%>%t\n\n\n\n\n\n\n\n%gy%{8}%-%Py%;%?%gy%{3}%>%t\n\n\n\n%gy%{4}%-%Py%;%?%gy%{1}%>%t\n\n%gy%{2}%-%Py%;%?%gy%{0}%>%t\n%;\E>%;,
+ defbi=%?%p5%{1}%=%t\ET17\EQ\EL%p1%{8}%/%03d%p2%{8}%/%Py%?%gy%{31}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{32}%-%Py%;%?%gy%{15}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{16}%-%Py%;%?%gy%{7}%>%t\n\n\n\n\n\n\n\n%gy%{8}%-%Py%;%?%gy%{3}%>%t\n\n\n\n%gy%{4}%-%Py%;%?%gy%{1}%>%t\n\n%gy%{2}%-%Py%;%?%gy%{0}%>%t\n%;\E>%;,
+
+# endbi=
+ u7=\EA\EP\EL001\E<,
+ endbi=\EA\EP\EL001\E<,
+
+# binel=
+ u5=\n\r\EL%p1%{8}%/%03d,
+ binel=\n\r\EL%p1%{8}%/%03d,
+
diff --git a/usr/src/cmd/lp/terminfo/daisy.ti b/usr/src/cmd/lp/terminfo/daisy.ti
new file mode 100644
index 0000000000..26c7b31d9f
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/daisy.ti
@@ -0,0 +1,126 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Generic entry for the daisy wheel printers and emulations:
+#
+
+#
+# Basic capabilities:
+#
+Gdaisy+basic,
+
+ daisy,
+
+ cols#132,
+ lines#66,
+
+ bufsz#500,
+ cps#55,
+ orc#12,
+ orhi#120,
+ orl#8,
+ orvi#48,
+
+ cr=^M,
+ cud1=^J,
+ cuf1=\s,
+ ff=^L,
+ ht=^I,
+ hpa=%?%p1%{100}%<%t\EC%p1%02d%e%p1%{110}%<%t\ECA%p1%{100}%-%d%e%p1%{120}%<%t\ECB%p1%{110}%-%d%e%p1%{130}%<%t\ECC%p1%{120}%-%d%e%p1%{140}%<%t\ECD%p1%{130}%-%d%e%p1%{150}%<%t\ECE%p1%{140}%-%d%e%p1%{160}%<%t\ECF%p1%{150}%-%d%;,
+ vpa=%?%p1%{100}%<%t\EP%p1%02d%e%p1%{110}%<%t\EPA%p1%{100}%-%d%e%p1%{120}%<%t\EPB%p1%{110}%-%d%e%p1%{130}%<%t\EPC%p1%{120}%-%d%e%p1%{140}%<%t\EPD%p1%{130}%-%d%e%p1%{150}%<%t\EPE%p1%{140}%-%d%e%p1%{160}%<%t\EPF%p1%{150}%-%d%;,
+
+ chr=%?%p1%{100}%<%t\EE%p1%02d%e%p1%{110}%<%t\EEA%p1%{100}%-%d%e%p1%{120}%<%t\EEB%p1%{110}%-%d%e%p1%{130}%<%t\EEC%p1%{120}%-%d%e%p1%{140}%<%t\EED%p1%{130}%-%d%e%p1%{150}%<%t\EEE%p1%{140}%-%d%e%p1%{160}%<%t\EEF%p1%{150}%-%d%;,
+ cvr=%?%p1%{100}%<%t\EL%p1%02d%e%p1%{110}%<%t\ELA%p1%{100}%-%d%e%p1%{120}%<%t\ELB%p1%{110}%-%d%e%p1%{130}%<%t\ELC%p1%{120}%-%d%e%p1%{140}%<%t\ELD%p1%{130}%-%d%e%p1%{150}%<%t\ELE%p1%{140}%-%d%e%p1%{160}%<%t\ELF%p1%{150}%-%d%;,
+
+
+ is2=\E\015P\EW\E.\EL08\EE12\E%\E<,
+
+ smso=\EQ,
+ rmso=\ER,
+ smul=\EI,
+ rmul=\EJ,
+ bold=\EK3,
+ sshm=\EQ,
+ rshm=\ER,
+ sgr0=\ER\EM\EJ,
+ sgr=%?%p1%t\EQ%e\ER%;%?%p2%t\EI%e\EJ%;%?%p6%t\EK3%e\EM%;,
+
+ smgb=\E-,
+ smgl=\E9,
+ smgr=\E0,
+ smgt=\E+,
+
+# slines=,
+ u9=\EF%p1%02d,
+
+#
+# Graphics capabilities:
+#
+Gdaisy+lowres,
+
+#
+# We could use the graphics on/graphics off control sequences
+# (ESC G/ESC 4) but for these problems:
+#
+# - graphics mode gets turned off when a \r is received;
+# - printing a character doesn't cause motion, which
+# means that each ``cell'' must be followed by a space;
+# - to get the best aspect ratio, three horizontal dots
+# must be sent per ``cell'' (using the ESC 3 graphics mode
+# (1/60 instead of 1/120) alleviates this problem but
+# gives a worse aspect ratio).
+#
+# So instead we set the HMI and VMI to 1/40 and 1/48 inch,
+# respectively.
+#
+ npins#1,
+ spinv#48,
+ spinh#40,
+
+ porder=o\,o\,o\,o\,1\,1\,1\,o;32,
+
+# bitwin#
+ u1=1,
+# bitype#
+ u2=1,
+
+# birep=
+ u4=%?%p3%{32}%=%t\EH%p2%{3}%*%Px%gx%{256}%/%{64}%+%c%gx%{256}%m%{16}%/%{64}%+%c%gx%{16}%m%{64}%+%c%;,
+
+# defbi=
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\EL01\EE03%p1%{3}%*%Px\r\EH%gx%{256}%/%{64}%+%c%gx%{256}%m%{16}%/%{64}%+%c%gx%{16}%m%{64}%+%c\EV%p2%{256}%/%{64}%+%c%p2%{256}%m%{16}%/%{64}%+%c%p2%{16}%m%{64}%+%c\E>%;,
+
+# endbi=
+ u7=\EL08\EE12\E<,
+
+# binel=
+ u5=\n\r%p1%{3}%*%Px\EH%gx%{256}%/%{64}%+%c%gx%{256}%m%{16}%/%{64}%+%c%gx%{16}%m%{64}%+%c,
+
diff --git a/usr/src/cmd/lp/terminfo/dec.ti b/usr/src/cmd/lp/terminfo/dec.ti
new file mode 100644
index 0000000000..5cc0ae6d55
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/dec.ti
@@ -0,0 +1,130 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Generic entries for the various DEC printers and emulations.
+#
+# The Gdec+... entries are really for any ANSI X3.64 printer,
+# but "ansi" is already used for terminals. It would be more
+# work to do "use=ansi" because there are too many screen
+# oriented caps that would have to be removed.
+#
+
+#
+# Basic capabilities:
+#
+Gdec+basic,
+
+ cols#132,
+ lines#66,
+ it#8,
+
+ cpix,
+ orc#1,
+ orhi#10,
+ orl#2,
+ orvi#12,
+
+#
+# FIX? Are xhpa and xvpa really needed?
+ xhpa,
+ xvpa,
+
+ cr=^M,
+ ff=^L,
+ ht=^I,
+ cud1=^J,
+ cuf1=\s,
+ cuu1=\EM,
+ cud=\E[%p1%de,
+ cuf=\E[%p1%da,
+ hpa=\E[%p1%d`,
+ vpa=\E[%p1%dd,
+ nel=\EE,
+
+ is1=\Ec,
+ is2=\E[20l,
+
+ cpi=%?%p1%{10}%=%t\E[w%e%p1%{12}%=%t\E[2w%e%p1%{5}%=%t\E[5w%e%p1%{13}%=%p1%{14}%=%O%t\E[3w%e%p1%{16}%=%p1%{17}%=%O%t\E[4w%e%p1%{6}%=%t\E[6w%e%p1%{7}%=%t\E[7w%e%p1%{8}%=%t\E[8w%;,
+ lpi=%?%p1%{2}%=%t\E[4z%e%p1%{3}%=%t\E[5z%e%p1%{4}%=%t\E[6z%e%p1%{6}%=%t\E[z%e%p1%{8}%=%t\E[2z%e%p1%{12}%=%t\E[3z%;,
+
+ csnm=%?%p1%{0}%=%tusascii%e%p1%{1}%=%tenglish%e%p1%{2}%=%tfinnish%e%p1%{3}%=%tjapanese%e%p1%{4}%=%tnorwegian%e%p1%{5}%=%tswedish%e%p1%{6}%=%tgermanic%e%p1%{7}%=%tfrench%e%p1%{8}%=%tcanadian_french%e%p1%{9}%=%titalian%e%p1%{10}%=%tspanish%e%p1%{11}%=%tline%e%p1%{12}%=%tsecurity%e%p1%{13}%=%tebcdic%e%p1%{14}%=%tapl%e%p1%{15}%=%tmosaic%;,
+ scs=%?%p1%{0}%=%t\E(B%e%p1%{1}%=%t\E(A%e%p1%{2}%=%t\E(C%e%p1%{3}%=%t\E(D%e%p1%{4}%=%t\E(E%e%p1%{5}%=%t\E(H%e%p1%{6}%=%t\E(K%e%p1%{7}%=%t\E(R%e%p1%{8}%=%t\E(Q%e%p1%{9}%=%t\E(Y%e%p1%{10}%=%t\E(Z%e%p1%{11}%=%t\E(0%e%p1%{12}%=%t\E(1%e%p1%{13}%=%t\E(3%e%p1%{14}%=%t\E(8%e%p1%{15}%=%t\E(}%;,
+
+ sshm=\E[5m,
+ rshm=\E[m,
+
+ smgtp=\E[%p1%dr,
+ smgbp=\E[;%p1%dr,
+ smglp=\E[%{1}%p1%+%ds,
+ smgrp=\E[;%{1}%p1%+%ds,
+
+# slines=
+ u9=\E[%p1%dt,
+ slines=\E[%p1%dt,
+
+#
+# Graphics capabilities (low resolution, 6-pin):
+#
+Gdec+low,
+
+ npins#6,
+ spinv#72,
+ spinh#75,
+
+ porder=o\,o\,6\,5\,4\,3\,2\,1;63,
+
+ bitwin#1,
+ u1=1,
+ bitype#1,
+ u2=1,
+
+# birep=
+ u4=!%p2%d%p3%c,
+ birep=!%p2%d%p3%c,
+
+# defbi=
+# X is in 1/75 increments; set char spacing to 1/16.7
+# increments to allow us to get close; column is X*16.7/75.
+# Y is in 1/72 increments; set line spacing to 1/12
+# increments to allow us to get close; line is Y/6.
+ u6=%?%p5%{1}%=%t\E[4w\E[%p1%{167}%*%{750}%/%d`\E[w\E[3z\E[%p2%{6}%/%dd\E[z\EP0q%;,
+ defbi=%?%p5%{1}%=%t\E[4w\E[%p1%{167}%*%{750}%/%d`\E[w\E[3z\E[%p2%{6}%/%dd\E[z\EP0q%;,
+
+# endbi=
+ u7=^X,
+ endbi=^X,
+
+# binel=
+ u5=-,
+ binel=-,
+
+# bicr=
+ u3=$,
+ bicr=$,
+
diff --git a/usr/src/cmd/lp/terminfo/epson.ti b/usr/src/cmd/lp/terminfo/epson.ti
new file mode 100644
index 0000000000..8beceb1e74
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/epson.ti
@@ -0,0 +1,301 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Epson
+#
+
+#
+# Basic capabilities:
+#
+Gepson+basic,
+
+ cols#80,
+ lines#66,
+ it#8,
+
+ cpix,
+ orc#6,
+ orhi#60,
+ orl#30,
+ orvi#180,
+
+ cr=^M,
+ cud1=^J,
+ cuf1=\s,
+ cub1=\b,
+ ff=^L,
+ ht=^I,
+
+ cpi=%?%p1%{10}%=%t^R\EP%e%p1%{12}%=%t^R\EM%e%p1%{20}%=%t^O\EM%e%p1%{17}%=%t^O\EP%;,
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\E3%p1%c%;,
+
+ is1=^Q^X,
+ is2=\E@\E%0\EO,
+
+ csnm=%?%p1%{0}%=%tusa%e%p1%{1}%=%tfrench%e%p1%{2}%=%tgerman%e%p1%{3}%=%tbritish%e%p1%{4}%=%tdanish%e%p1%{5}%=%tswedish%e%p1%{6}%=%titalian%e%p1%{7}%=%tspanish%e%p1%{8}%=%tjapanese%e%p1%{9}%=%tnorwegian%e%p1%{10}%=%tdanish2%e%p1%{11}%=%tspanish2%e%p1%{12}%=%tlatin_american%e%p1%{13}%=%tafrikaans%e%p1%{14}%=%tdutch%e%p1%{15}%=%tfrench_canadian%e%p1%{16}%=%tfrench2%e%p1%{17}%=%tbritish2%e%p1%{18}%=%tmulti_national%e%p1%{19}%=%tibmgraphics%;,
+ scs=%?%p1%{0}%=%t\ER\200%e%p1%{1}%=%t\ER\001%e%p1%{2}%=%t\ER\002%e%p1%{3}%=%t\ER\003%e%p1%{4}%=%t\ER\004%e%p1%{5}%=%t\ER\005%e%p1%{6}%=%t\ER\006%e%p1%{7}%=%t\ER\007%e%p1%{8}%=%t\ER\010%e%p1%{9}%=%t\ER\011%e%p1%{10}%=%t\ER\012%e%p1%{11}%=%t\ER\013%e%p1%{12}%=%t\ER\014%e%p1%{13}%=%t\ER\100%e%p1%{14}%=%t\ERA%e%p1%{15}%=%t\ERB%e%p1%{16}%=%t\ERC%e%p1%{17}%=%t\ERD%e%p1%{18}%=%t\E6%e%p1%{19}%=%t\Et1%;,
+
+ smso=\EE,
+ rmso=\EF,
+ smul=\E-1,
+ rmul=\E-0,
+ bold=\EG,
+ sshm=\EE,
+ rshm=\EF,
+ ssubm=\ES1,
+ rsubm=\ET,
+ ssupm=\ES0,
+ rsupm=\ET,
+ swidm=\EW1,
+ rwidm=\EW0,
+ sitm=\E4,
+ ritm=\E5,
+ sgr0=\EF\E-0\EH\ET\EW0\E5,
+ sgr=%?%p1%t\EE%e\EF%;%?%p2%t\E-1%e\E-0%;%?%p6%t\EG%e\EH%;,
+
+#
+# For now we can't set the margin in the first (0th) column
+# due to limitations in the Curses code. This should be changed
+# in the future. For now, shift right 1. Note that the right
+# margin is the last USEABLE column in Terminfo, but is 1
+# PAST that for the Epson.
+ smglp=%?%p1%{256}%<%t\El%p1%{1}%+%c%;,
+ smgrp=%?%p1%{256}%<%t\EQ%p1%{2}%+%c%;,
+
+# slines= u9 used for svr3.2
+ u9=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%c%;,
+ slines=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%d%;,
+
+ sdrfq=\Ex0,
+ snlq=\Ex1,
+ snrmq=\Ek1,
+
+#
+# Graphics capabilities:
+#
+Gepson+low,
+
+ npins#8,
+ spinv#60,
+ spinh#60,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8;0,
+
+ sbim=\EK%p1%{256}%m%c%p1%{256}%/%c,
+
+# u1 - u8 used for svr3.2
+ bitwin#1,
+ u1=1,
+ bitype#1,
+ u2=1,
+
+# defbi=
+# Set the line spacing to 8/60 inch (7.5 lines per inch)
+# to get 60 dots per inch vertically (7.5 lines/" * 8 pins/line).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 60 dots per inch horizontally this means 6
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ defbi=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+
+# endbi=
+ u7=\E3\036,
+ endbi=\E3\036,
+
+# binel=
+ u5=\n\r\t,
+ binel=\n\r\t,
+
+# bicr=
+ u3=\r\t,
+ bicr=\r\t,
+
+######################################################################
+#
+# Epson LQ-2500
+#
+
+#
+# Basic capabilities:
+#
+Gep2500+basic,
+
+ cols#136,
+ lines#66,
+ it#8,
+
+ cpix,
+ orc#6,
+ orhi#60,
+ orl#30,
+ orvi#180,
+
+ cr=^M,
+ cud1=^J,
+ cuf1=\s,
+ cub1=\b,
+ ff=^L,
+ ht=^I,
+
+ cpi=%?%p1%{10}%=%t^R\EP%e%p1%{12}%=%t^R\EM%e%p1%{20}%=%t^O\EM%e%p1%{17}%=%t^O\EP%;,
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\E3%p1%c%;,
+
+ is1=^Q^X,
+ is2=\E@\E%0\EO,
+
+
+ csnm=%?%p1%{0}%=%tusa%e%p1%{1}%=%tfrench%e%p1%{2}%=%tgerman%e%p1%{3}%=%tbritish%e%p1%{4}%=%tdanish%e%p1%{5}%=%tswedish%e%p1%{6}%=%titalian%e%p1%{7}%=%tspanish%e%p1%{8}%=%tjapanese%e%p1%{9}%=%tnorwegian%e%p1%{10}%=%tdanish2%e%p1%{11}%=%tspanish2%e%p1%{12}%=%tlatin_american%e%p1%{13}%=%tibmgraphics%;,
+ scs=%?%p1%{0}%=%t\ER\200%e%p1%{1}%=%t\ER\001%e%p1%{2}%=%t\ER\002%e%p1%{3}%=%t\ER\003%e%p1%{4}%=%t\ER\004%e%p1%{5}%=%t\ER\005%e%p1%{6}%=%t\ER\006%e%p1%{7}%=%t\ER\007%e%p1%{8}%=%t\ER\010%e%p1%{9}%=%t\ER\011%e%p1%{10}%=%t\ER\012%e%p1%{11}%=%t\ER\013%e%p1%{12}%=%t\ER\014%e%p1%{13}%=%t\Et1%;,
+
+ smso=\EE,
+ rmso=\EF,
+ smul=\E-1,
+ rmul=\E-0,
+ bold=\EG,
+ sshm=\EE,
+ rshm=\EF,
+ ssubm=\ES1,
+ rsubm=\ET,
+ ssupm=\ES0,
+ rsupm=\ET,
+ swidm=\EW1,
+ rwidm=\EW0,
+ sitm=\E4,
+ ritm=\E5,
+ sgr0=\EF\E-0\EH\ET\EW0\E5,
+ sgr=%?%p1%t\EE%e\EF%;%?%p2%t\E-1%e\E-0%;%?%p6%t\EG%e\EH%;,
+
+#
+# For now we can't set the margin in the first (0th) column
+# due to limitations in the Curses code. This should be changed
+# in the future. For now, shift right 1. Note that the right
+# margin is the last USEABLE column in Terminfo, but is 1
+# PAST that for the Epson.
+ smglp=%?%p1%{256}%<%t\El%p1%{1}%+%c%;,
+ smgrp=%?%p1%{256}%<%t\EQ%p1%{2}%+%c%;,
+
+# slines=
+ u9=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%c%;,
+ slines=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%c%;,
+
+ sdrfq=\Ex0,
+ snlq=\Ex1,
+ snrmq=\Ek1,
+
+#
+# Graphics capabilities:
+#
+Gep2500+low,
+
+ npins#8,
+ spinv#60,
+ spinh#60,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8;0,
+
+ sbim=\EK%p1%{256}%m%c%p1%{256}%/%c,
+
+ bitwin#1,
+ u1=1,
+ bitype#1,
+ u2=1,
+
+# defbi=
+# Set the line spacing to 8/60 inch (7.5 lines per inch)
+# to get 60 dots per inch vertically (7.5 lines/" * 8 pins/line).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 60 dots per inch horizontally this means 6
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+
+# endbi=
+ u7=\E3\036,
+ endbi=\E3\036,
+
+# binel=
+ u5=\n\r\t,
+ binel=\n\r\t,
+
+# bicr=
+ u3=\r\t,
+ bicr=\r\t,
+
+#
+# Graphics capabilities:
+#
+Gep2500+high,
+
+ npins#24,
+ spinv#180,
+ spinh#180,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8\,9\,10\,11\,12\,13\,14\,15\,16\,17\,18\,19\,20\,21\,22\,23\,24;0,
+
+ sbim=\E*\047%p1%{256}%m%c%p1%{256}%/%c,
+
+# defbi=
+# Set the line spacing to 8/60 inch (7.5 lines per inch)
+# to get 180 dots per inch vertically (7.5 lines/" * 24 pins/line).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 180 dots per inch horizontally this means 18
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{24}%/%c$<>\ED%p1%{18}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\E3\030^R\EP\EB%p2%{24}%/%c$<>\ED%p1%{18}%/%c$<>\013\r\t%;,
+
+ use=Gep2500+low,
+
+#
+# Color capability:
+#
+Gep2500+color,
+
+ colors#3,
+
+# setcolor=
+ initc=%?%p1%{0}%=%t\Er0%;%?%p1%{1}%=%t\Er2%;%?%p1%{2}%=%t\Er1%;%?%p1%{3}%=%t\Er4%;,
+
+# colornm=
+ u8=%?%p1%{0}%=%tblack%;%?%p1%{1}%=%tcyan%;%?%p1%{2}%=%tmagenta%;%?%p1%{3}%=%tyellow%;%?%p1%{4}%=%torange=yellow+magenta%;%?%p1%{5}%=%tgreen=yellow+cyan%;%?%p1%{6}%=%tviolet=magenta+cyan%;%?%p1%{7}%=%tbrown=magenta+black%;,
+ colornm=%?%p1%{0}%=%tblack%;%?%p1%{1}%=%tcyan%;%?%p1%{2}%=%tmagenta%;%?%p1%{3}%=%tyellow%;%?%p1%{4}%=%torange=yellow+magenta%;%?%p1%{5}%=%tgreen=yellow+cyan%;%?%p1%{6}%=%tviolet=magenta+cyan%;%?%p1%{7}%=%tbrown=magenta+black%;,
+
diff --git a/usr/src/cmd/lp/terminfo/hplaser.ti b/usr/src/cmd/lp/terminfo/hplaser.ti
new file mode 100644
index 0000000000..4d887f99ab
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/hplaser.ti
@@ -0,0 +1,122 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+
+######################################################################
+#
+# Generic entry for the HP Laserjet printers and emulations:
+#
+
+#
+# Basic capabilities:
+#
+Ghplaser+basic,
+
+ cols#80,
+ lines#60,
+
+ orc#12,
+ orhi#120,
+ orl#8,
+ orvi#48,
+
+ cr=^M,
+ cud1=^J,
+ cuf1=\s,
+ cud=\E&a+%p1%dR,
+ cuf=\E&a+%p1%dC,
+ ff=^L,
+ hpa=\E&a%p1%dC,
+ vpa=\E&a%p1%dR,
+
+ cpi=%?%p1%{10}%=%t\E&k0S%e%p1%{17}%=%t\E&k2S%;,
+ chr=%?%p1%{0}%>%p1%{127}%<%t\E&k%p1%dH%;,
+ cvr=%?%p1%{0}%>%p1%{127}%<%t\E&l%p1%dC%;,
+ lpi=%?%p1%{1}%=%t\E&l1D%e%p1%{2}%=%t\E&l2D%e%p1%{3}%=%t\E&l3D%e%p1%{4}%=%t\E&l4D%e%p1%{6}%=%t\E&l6D%e%p1%{8}%=%t\E&l8D%e%p1%{12}%=%t\E&l12D%e%p1%{16}%=%t\E&l16D%e%p1%{24}%=%t\E&l24D%e%p1%{48}%=%t\E&l48D%;,
+
+ is2=\EE\E&k0G,
+ mgc=\E9,
+
+ rmul=\E&d\100,
+ ritm=\E(s0S,
+ smul=\E&dD,
+ sitm=\E(s1S,
+ smgtp=\E&l%p1%{1}%+%dE,
+ smgbp=\E&l%p1%{1}%+%dF,
+ smglp=\E&a%p1%dL,
+ smgrp=\E&a%p1%dM,
+
+#Set top margin at +2 offset
+ smgtp=\E&l%p1%dE,
+
+#Set page length u9 used for 3.2 slines for 4.0
+ u9=\E&l%p1P,
+ slines=\E&l%p1%dF,
+
+#
+# Graphics capabilities:
+#
+Ghplaser+high,
+
+ npins#8,
+ spinv#300,
+ spinh#300,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8;0,
+
+ sbim=\E*b%p1%dW,
+
+# u1 - u7 used for svr3.2
+# bitwin#,
+ u1=1,
+ bitwin#1,
+# bitype#
+ u2=2,
+ bitype#2,
+
+# defbi=
+# X (or Y) * scale * 12/5 == pos in decipoints (12/5 == 720/300)
+ u6=%?%p5%{0}%>%p5%{5}%<%&%t\E&a%p1%p5%*%{12}%*%{5}%/%dH\E&a%p2%p5%*%{12}%*%{5}%/%dV\E*t%{300}%p5%/%dR\E*r1A%;,
+ defbi=%?%p5%{0}%>%p5%{5}%<%&%t\E&a%p1%p5%*%{12}%*%{5}%/%dH\E&a%p2%p5%*%{12}%*%{5}%/%dV\E*t%{300}%p5%/%dR\E*r1A%;,
+
+# endbi=
+ u7=\E*rB,
+ endbi=\E*rB,
+
+Ghplaser+II,
+
+ cpi=%?%p1%{10}%=%t\E(s10H%e%p1%{16}%=%p1%{17}%=%O%t\E(s16.66H%e%;,
+
+ csnm=%?%p1%{0}%=%troman-8%e%p1%{1}%=%tibm-us%e%p1%{2}%=%tibm-dn%e%p1%{3}%=%tgerman%e%p1%{4}%=%tspanish%e%p1%{5}%=%tecma-94%e%p1%{6}%=%tiso2%e%p1%{7}%=%tiso4%e%p1%{8}%=%tiso6%e%p1%{9}%=%tiso10%e%p1%{10}%=%tiso11%e%p1%{11}%=%tiso14%e%p1%{12}%=%tiso15%e%p1%{13}%=%tiso16%e%p1%{14}%=%tiso17%e%p1%{15}%=%tiso21%e%p1%{16}%=%tiso25%e%p1%{17}%=%tiso57%e%p1%{18}%=%tiso60%e%p1%{19}%=%tiso61%e%p1%{20}%=%tiso69%e%p1%{21}%=%tiso84%e%p1%{22}%=%tiso85%;,
+
+ scs=%?%p1%{0}%=%t\E(8U%e%p1%{1}%=%t\E(10U%e%p1%{2}%=%t\E(11U%e%p1%{3}%=%t\E(0G%e%p1%{4}%=%t\E(1S%e%p1%{5}%=%t\E(0N%e%p1%{6}%=%t\E(2U%e%p1%{7}%=%t\E(1E%e%p1%{8}%=%t\E(0U%e%p1%{9}%=%t\E(3S%e%p1%{10}%=%t\E(0S%e%p1%{11}%=%t\E(0K%e%p1%{12}%=%t\E(0I%e%p1%{13}%=%t\E(4S%e%p1%{14}%=%t\E(2S%e%p1%{15}%=%t\E(1G%e%p1%{16}%=%t\E(0F%e%p1%{17}%=%t\E(2K%e%p1%{18}%=%t\E(0D%e%p1%{19}%=%t\E(1D%e%p1%{20}%=%t\E(1F%e%p1%{21}%=%t\E(5S%e%p1%{22}%=%t\E(6S%;,
+
+ use=Ghplaser+basic, use=Ghplaser+high,
+
diff --git a/usr/src/cmd/lp/terminfo/ibm.ti b/usr/src/cmd/lp/terminfo/ibm.ti
new file mode 100644
index 0000000000..e58aca21eb
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/ibm.ti
@@ -0,0 +1,353 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.2 */
+
+######################################################################
+#
+# Generic entries for the various IBM printers and emulations.
+#
+
+###################################
+#
+# IBM Graphics
+#
+
+#
+# Basic capabilities:
+#
+Gibmg+basic,
+
+ cols#80,
+ lines#66,
+ it#8,
+
+ cpix,
+ orc#1,
+ orhi#10,
+ orl#12,
+ orvi#72,
+
+ cr=^M,
+ ff=^L,
+ ht=^I,
+ cud1=^J,
+ cuf1=\s,
+
+ cpi=%?%p1%{10}%=%t^R%e%p1%{16}%=%p1%{17}%=%O%t^O%;,
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\EA%p1%c\E2%;,
+
+ is1=^X,
+ is2=^R\EA\014\E2\EF\EH\EW0\ET\E-0\E7\EO\ECB,
+
+ csnm=%?%p1%{0}%=%tcharacter_set_1%e%p1%{1}%=%tcharacter_set_2%;,
+ scs=%?%p1%{0}%=%t\E7%e%p1%{2}%=%t\E6%;,
+
+ smso=\EE,
+ rmso=\EF,
+ smul=\E-1,
+ rmul=\E-0,
+ bold=\EG,
+ smacs=\E6,
+ rmacs=\E7,
+ sshm=\EE,
+ rshm=\EF,
+ ssubm=\ES1,
+ rsubm=\ET,
+ ssupm=\ES0,
+ rsupm=\ET,
+ swidm=\EW1,
+ rwidm=\EW0,
+ sgr0=\EF\E-0\EH\E7\ET\EW0,
+ sgr=%?%p1%t\EE%e\EF%;%?%p2%t\E-1%e\E-0%;%?%p6%t\EG%e\EH%;%?%p9%t\E6%e\E7%;,
+ sdrfq=\EH,
+ snlq=\EG,
+
+# slines= u9 used for svr3.2
+ u9=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%c%;,
+ slines=%?%p1%{0}%>%p1%{128}%<%&%t\EC%p1%c%;,
+
+#
+# Graphics capabilities (low resolution, 9-pin):
+#
+Gibmg+low,
+
+ npins#8,
+ spinv#72,
+ spinh#60,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8;0,
+
+ sbim=\EK%p1%{256}%m%c%p1%{256}%/%c,
+
+ bitwin#1,
+ u1=1,
+ bitype#1,
+ u2=1,
+
+# defbi=
+# Set the line spacing to 8/72 inch (9 lines per inch)
+# to get 72 dots per inch vertically (9 lines/inch * 8 pins/line).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 60 dots per inch horizontally this means 6
+# dots per character.
+# The IBM Graphics doesn't have parameterized motion,
+# so we simulate it with linefeeds and spaces.
+# Assume we never need to move across more than 63 colums
+# or down more than 31 lines.
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\EA\010\E2^R%p2%{8}%/%Py%?%gy%{15}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{16}%-%Py%;%?%gy%{7}%>%t\n\n\n\n\n\n\n\n%gy%{8}%-%Py%;%?%gy%{3}%>%t\n\n\n\n%gy%{4}%-%Py%;%?%gy%{1}%>%t\n\n%gy%{2}%-%Py%;%?%gy%{0}%>%t\n%;\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;%;,
+ defbi=%?%p5%{1}%=%t\EA\010\E2^R%p2%{8}%/%Py%?%gy%{15}%>%t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n%gy%{16}%-%Py%;%?%gy%{7}%>%t\n\n\n\n\n\n\n\n%gy%{8}%-%Py%;%?%gy%{3}%>%t\n\n\n\n%gy%{4}%-%Py%;%?%gy%{1}%>%t\n\n%gy%{2}%-%Py%;%?%gy%{0}%>%t\n%;\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;%;,
+
+# endbi=
+ u7=\EA\014\E2,
+ endbi=\EA\014\E2,
+
+# binel=
+ u5=\n\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;,
+ binel=\n\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;,
+
+# bicr=
+ u3=\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;,
+ bicr=\r%p1%{6}%/%Px%?%gx%{31}%>%t %gx%{32}%-%Px%;%?%gx%{15}%>%t %gx%{16}%-%Px%;%?%gx%{7}%>%t %gx%{8}%-%Px%;%?%gx%{3}%>%t %gx%{4}%-%Px%;%?%gx%{1}%>%t %gx%{2}%-%Px%;%?%gx%{0}%>%t %;,
+
+###################################
+#
+# IBM Color
+#
+
+#
+# Basic capabilities:
+#
+Gibmc+basic,
+
+ cub1=\b,
+
+ is1=^Q^X,
+ is2=^R\EA\014\E2\EF\EH\EW0\ET\E-0\E7\EO\ER\E50\EM0\EX^A\210\Eb\ECB,
+
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\E3%p1%c%;,
+
+ smglp=\EX%p1%{1}%+%c%p2%{1}%+%c,
+
+ use=Gibmg+basic,
+
+#
+# Graphics capabilities (low resolution, 9-pin, 5:6 aspect ratio):
+#
+Gibmc+low+5x6,
+
+ spinv#84,
+ spinh#70,
+
+# defbi=
+# Set 5:6 aspect ratio.
+# Set the line spacing to 7/72 inch (10.29 lines per inch)
+# to get approximately 84 dots per inch vertically
+# (10.29 lines/inch * 8 pins/line equals 82.28 dots per inch).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 70 dots per inch horizontally this means 7
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\En^B\E1^R\EB%p2%{8}%/%c$<>\ED%p1%{7}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\En^B\E1^R\EB%p2%{8}%/%c$<>\ED%p1%{7}%/%c$<>\013\r\t%;,
+
+# binel=
+ u5=\n\r\t,
+ binel=\n\r\t,
+
+# bicr=
+ u3=\r\t,
+ bicr=\r\t,
+
+ use=Gibmg+low,
+
+#
+# Graphics capabilities (low resolution, 9-pin, 1:1 aspect ratio):
+#
+Gibmc+low+1x1,
+
+ spinh#84,
+
+# defbi=
+# Set 1:1 aspect ratio.
+# Set the line spacing to 7/72 inch (10.29 lines per inch)
+# to get approximately 84 dots per inch vertically
+# (10.29 lines/inch * 8 pins/line equals 82.28 dots per inch).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 84 dots per inch horizontally this means 8.4
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\En^A\E1^R\EB%p2%{8}%/%c$<>\ED%p1%{10}%*%{84}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\En^A\E1^R\EB%p2%{8}%/%c$<>\ED%p1%{10}%*%{84}%/%c$<>\013\r\t%;,
+
+ use=Gibmc+low+5x6,
+
+#
+# Color capability:
+#
+Gibmc+color,
+
+ colors#3,
+
+# setcolor=
+ initc=%?%p1%{0}%=%t\Eb%;%?%p1%{1}%=%t\Ec%;%?%p1%{2}%=%t\Em%;%?%p1%{3}%=%t\Ey%;,
+
+# colornm=
+ u8=%?%p1%{0}%=%tblack%;%?%p1%{1}%=%tcyan%;%?%p1%{2}%=%tmagenta%;%?%p1%{3}%=%tyellow%;%?%p1%{4}%=%torange=yellow+magenta%;%?%p1%{5}%=%tgreen=yellow+cyan%;%?%p1%{6}%=%tviolet=magenta+cyan%;%?%p1%{7}%=%tbrown=magenta+black%;,
+ colornm=%?%p1%{0}%=%tblack%;%?%p1%{1}%=%tcyan%;%?%p1%{2}%=%tmagenta%;%?%p1%{3}%=%tyellow%;%?%p1%{4}%=%torange=yellow+magenta%;%?%p1%{5}%=%tgreen=yellow+cyan%;%?%p1%{6}%=%tviolet=magenta+cyan%;%?%p1%{7}%=%tbrown=magenta+black%;,
+
+###################################
+#
+# IBM Proprinter XL:
+#
+# This printer appears to be a superset of the IBM Graphics
+# and IBM Color printers, with a 24-wire printhead. The entry
+# below uses the full capabilities of the superset and printhead.
+# The printer has an Alternate Graphics Mode (AGM) that changes
+# the vertical resolution from 1/216" to 1/180", and the graphics
+# aspect ratio from 5:6 to 1:1. HOWEVER, there does not appear to
+# be a control sequence that switches into this mode--it must be
+# done by hand!
+#
+
+#
+# Basic capabilities (printer not in AGM):
+#
+Gibmxl+basic,
+
+ orc#12,
+ orhi#120,
+ orl#36,
+ orvi#216,
+
+ cub1=\b,
+
+ cpi=%?%p1%{10}%=%t^R%e%p1%{12}%=%t\E:%e%p1%{17}%=%t^O%;,
+ cvr=%?%p1%{0}%>%p1%{256}%<%&%t\E3%p1%c%;,
+
+ is1=^Q^X,
+ is2=^R\EP0\EA\014\E2\EC\102\EO\ER\Eb\E50\EF\EH\EW0\ET\E-0\E_0\E7,
+ is3=\EX\001\120,
+
+ smglp=\EX%p1%{1}%+%c%p2%{1}%+%c,
+
+ use=Gibmg+basic,
+
+#
+# Basic capabilities (printer in AGM):
+#
+Gibmxlagm+basic,
+
+ orl#30,
+ orvi#180,
+
+ is2=^R\EP0\EA\012\E2\EC\102\EO\ER\Eb\E50\EF\EH\EW0\ET\E-0\E_0\E7,
+
+ use=Gibmxl+basic,
+
+#
+# Graphics capabilities (low resolution, 8-pin, 5:6 aspect ratio):
+#
+Gibmxl+low+5x6,
+
+ spinv#72,
+ spinh#60,
+
+# defbi=
+# Set the line spacing to 8/72 inch (9 lines per inch)
+# to get 72 dots per inch vertically (9 lines/" * 8 pins/line).
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 60 dots per inch horizontally this means 6
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\EA\010\E2^R\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\EA\010\E2^R\EB%p2%{8}%/%c$<>\ED%p1%{6}%/%c$<>\013\r\t%;,
+
+ use=Gibmc+low+5x6,
+
+#
+# Graphics capabilities (low resolution, 8-pin, 1:1 aspect ratio):
+#
+Gibmxl+low+1x1,
+
+ spinv#60,
+
+# In AGM the "defbi" cap from Gibmxl+low+5x6 will work. The
+# line spacing will be 8/60 inch to get 60 dots per inch, using
+# the same control sequence.
+
+ use=Gibmxl+low+5x6,
+
+#
+# Graphics capabilities (high resolution, 24-pin, 5:6 aspect ratio):
+#
+# This doesn't work as the pin spacing doesn't get set to
+# 1/216 inch, but stays at 1/180 inch, even out of AGM.
+#
+
+#
+# Graphics capabilities (high resolution, 24-pin, 1:1 aspect ratio):
+#
+Gibmxl+high+1x1,
+
+ npins#24,
+ spinv#180,
+ spinh#180,
+
+ porder=1\,2\,3\,4\,5\,6\,7\,8\,9\,10\,11\,12\,13\,14\,15\,16\,17\,18\,19\,20\,21\,22\,23\,24;0,
+
+ sbim=\E*\047%p1%{256}%m%c%p1%{256}%/%c,
+
+# defbi=
+# Set the line spacing to 8/60 inch (7.5 lines per inch)
+# to get 180 dots per inch vertically (7.5 lines/" * 24 pins/line).
+# This requires the printer or emulation in Alternate Graphics Mode.
+# Set the character spacing to pica (1/10 inch or 10 characters
+# per inch); at 180 dots per inch horizontally this means 18
+# dots per character.
+# Set vertical and horizontal tab stops at the upper left corner
+# of the image, then tab to the upper left corner.
+# Note: $<> is a true null (only works with special Curses routine).
+# THIS ASSUMES WE START AT THE TOP OF THE PAGE! (although
+# maybe not in the first column.)
+ u6=%?%p5%{1}%=%t\EA\010\E2^R\EB%p2%{24}%/%c$<>\ED%p1%{18}%/%c$<>\013\r\t%;,
+ defbi=%?%p5%{1}%=%t\EA\010\E2^R\EB%p2%{24}%/%c$<>\ED%p1%{18}%/%c$<>\013\r\t%;,
+
+ use=Gibmc+low+5x6,
+
diff --git a/usr/src/cmd/lp/terminfo/unknown.ti b/usr/src/cmd/lp/terminfo/unknown.ti
new file mode 100644
index 0000000000..85f95a606e
--- /dev/null
+++ b/usr/src/cmd/lp/terminfo/unknown.ti
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+# All Rights Reserved
+
+
+#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
+unknown,
+ am, gn,
+ cols#80,
+ bel=^G, cr=\r, cud1=\n, ind=\n,
diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common
index 4e282bf001..ea319d65ae 100644
--- a/usr/src/cmd/mdb/Makefile.common
+++ b/usr/src/cmd/mdb/Makefile.common
@@ -28,6 +28,7 @@ COMMON_MODULES_PROC = \
dof \
libavl \
libc \
+ libctf \
libcmdutils \
libnvpair \
libproc \
@@ -38,7 +39,8 @@ COMMON_MODULES_PROC = \
libuutil \
libzpool \
mdb_ds \
- mdb_test
+ mdb_test \
+ v8
#
# MDB modules used for debugging user processes which are only 32-bit
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_ctf.c b/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
index 072ac9ef12..1ae0952619 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
@@ -1780,7 +1780,6 @@ mdb_ctf_synthetics_create_base(int kind)
return (0);
discard:
- err = set_errno(ctf_to_errno(ctf_errno(cp)));
(void) ctf_discard(cp);
return (err);
}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_demangle.c b/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
index 7ab7d19716..27c8a1d21d 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_demangle.h>
@@ -263,7 +265,8 @@ int
cmd_demangle(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
mdb_demangler_t *dmp = mdb.m_demangler;
- const char *path = LIB_DEMANGLE;
+ const char *path;
+ char buf[MAXPATHLEN];
if (argc > 1 || (argc > 0 && argv->a_type != MDB_TYPE_STRING))
return (DCMD_USAGE);
@@ -272,6 +275,10 @@ cmd_demangle(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
if (dmp != NULL)
mdb_dem_unload(mdb.m_demangler);
path = argv->a_un.a_str;
+ } else {
+ (void) snprintf(buf, MAXPATHLEN,
+ "%s/%s", mdb.m_root, LIB_DEMANGLE);
+ path = buf;
}
if (dmp != NULL && argc == 0 && !(mdb.m_flags & MDB_FL_DEMANGLE)) {
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c
index 90e048bb82..a30ee45b7e 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_main.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -51,6 +51,7 @@
#include <libctf.h>
#include <errno.h>
#include <kvm.h>
+#include <zone.h>
#include <mdb/mdb_lex.h>
#include <mdb/mdb_debug.h>
@@ -797,9 +798,15 @@ main(int argc, char *argv[], char *envp[])
if (strchr(pidarg, '/') != NULL)
(void) mdb_iob_snprintf(object, MAXPATHLEN,
"%s/object/a.out", pidarg);
- else
+ else {
+ const char *root;
+
(void) mdb_iob_snprintf(object, MAXPATHLEN,
- "/proc/%s/object/a.out", pidarg);
+ "%s/proc/%s/object/a.out",
+ (root = zone_get_nroot()) != NULL ? root : "",
+ pidarg);
+ }
+
tgt_argv[tgt_argc++] = object;
tgt_argv[tgt_argc++] = pidarg;
}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
index 9d08a18c69..c6b4956b3b 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
#ifndef _MDB_MODAPI_H
@@ -71,7 +71,13 @@ extern "C" {
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
+#ifdef MDB_API_VERSION
+#if (MDB_API_VERSION != 3 && MDB_API_VERSION != 4)
+#error "Only modapi versions three and four are supported."
+#endif
+#else /* !MDB_API_VERISON */
#define MDB_API_VERSION 4 /* Current API version number */
+#endif /* MDB_API_VERISON */
/*
* Debugger command function flags:
@@ -85,11 +91,6 @@ extern "C" {
#define DCMD_HDRSPEC(fl) (((fl) & DCMD_LOOPFIRST) || !((fl) & DCMD_LOOP))
/*
- * Debugger tab command function flags
- */
-#define DCMD_TAB_SPACE 0x01 /* Tab cb invoked with trailing space */
-
-/*
* Debugger command function return values:
*/
#define DCMD_OK 0 /* Dcmd completed successfully */
@@ -118,10 +119,18 @@ typedef struct mdb_arg {
} a_un;
} mdb_arg_t;
+#if (MDB_API_VERSION >= 4)
+/*
+ * Debugger tab command function flags
+ */
+#define DCMD_TAB_SPACE 0x01 /* Tab cb invoked with trailing space */
+
typedef struct mdb_tab_cookie mdb_tab_cookie_t;
-typedef int mdb_dcmd_f(uintptr_t, uint_t, int, const mdb_arg_t *);
typedef int mdb_dcmd_tab_f(mdb_tab_cookie_t *, uint_t, int,
const mdb_arg_t *);
+#endif /* MDB_API_VERSION >= 4 */
+
+typedef int mdb_dcmd_f(uintptr_t, uint_t, int, const mdb_arg_t *);
typedef struct mdb_dcmd {
const char *dc_name; /* Command name */
@@ -129,7 +138,9 @@ typedef struct mdb_dcmd {
const char *dc_descr; /* Description */
mdb_dcmd_f *dc_funcp; /* Command function */
void (*dc_help)(void); /* Command help function (or NULL) */
+#if (MDB_API_VERSION >= 4)
mdb_dcmd_tab_f *dc_tabp; /* Tab completion function */
+#endif
} mdb_dcmd_t;
#define WALK_ERR -1 /* Walk fatal error (terminate walk) */
@@ -346,6 +357,7 @@ typedef void (*mdb_callback_f)(void *);
extern void *mdb_callback_add(int, mdb_callback_f, void *);
extern void mdb_callback_remove(void *);
+#if (MDB_API_VERSION >= 4)
#define MDB_TABC_ALL_TYPES 0x1 /* Include array types in type output */
#define MDB_TABC_MEMBERS 0x2 /* Tab comp. types with members */
#define MDB_TABC_NOPOINT 0x4 /* Tab comp. everything but pointers */
@@ -370,6 +382,7 @@ extern int mdb_tab_typename(int *, const mdb_arg_t **, char *buf, size_t len);
*/
extern int mdb_tab_complete_mt(mdb_tab_cookie_t *, uint_t, int,
const mdb_arg_t *);
+#endif /* MDB_API_VERSION >= 4 */
extern size_t strlcat(char *, const char *, size_t);
extern char *strcat(char *, const char *);
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_print.c b/usr/src/cmd/mdb/common/mdb/mdb_print.c
index 56275fdded..3be869c2c0 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_print.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_print.c
@@ -2105,10 +2105,10 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
/*
* This is the reason that tab completion was created. We're going to go
- * along and walk the delimiters until we find something a member that
- * we don't recognize, at which point we'll try and tab complete it.
- * Note that ::print takes multiple args, so this is going to operate on
- * whatever the last arg that we have is.
+ * along and walk the delimiters until we find something in a member
+ * that we don't recognize, at which point we'll try and tab complete
+ * it. Note that ::print takes multiple args, so this is going to
+ * operate on whatever the last arg that we have is.
*/
if (mdb_ctf_lookup_by_name(tn, &id) != 0)
return (1);
@@ -2118,11 +2118,11 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
delim = parse_delimiter(&start);
/*
- * If we hit the case where we actually have no delimiters, than we need
+ * If we hit the case where we actually have no delimiters, then we need
* to make sure that we properly set up the fields the loops would.
*/
if (delim == MEMBER_DELIM_DONE)
- (void) mdb_snprintf(member, sizeof (member), "%s", start);
+ (void) mdb_snprintf(member, sizeof (member), start);
while (delim != MEMBER_DELIM_DONE) {
switch (delim) {
@@ -2179,7 +2179,7 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
/*
* We are going to try to resolve this name as a member. There
- * are a few two different questions that we need to answer. The
+ * are a two different questions that we need to answer. The
* first is do we recognize this member. The second is are we at
* the end of the string. If we encounter a member that we don't
* recognize before the end, then we have to error out and can't
@@ -2209,6 +2209,7 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
* already have in rid.
*/
return (mdb_tab_complete_member_by_id(mcp, rid, member));
+
}
int
@@ -2538,8 +2539,7 @@ print_help(void)
}
static int
-printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt,
- boolean_t sign)
+printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt, int sign)
{
ssize_t size;
mdb_ctf_id_t base;
@@ -2557,7 +2557,7 @@ printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt,
} u;
if (mdb_ctf_type_resolve(id, &base) == -1) {
- mdb_warn("could not resolve type");
+ mdb_warn("could not resolve type\n");
return (DCMD_ABORT);
}
@@ -2795,7 +2795,6 @@ printf_string(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt)
if (size != 1) {
mdb_warn("string format specifier requires "
"an array of characters\n");
- return (DCMD_ABORT);
}
bzero(buf, sizeof (buf));
@@ -2929,7 +2928,7 @@ cmd_printf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
if (argc == 0 || argv[0].a_type != MDB_TYPE_STRING) {
mdb_warn("expected a format string\n");
- return (DCMD_USAGE);
+ return (DCMD_ABORT);
}
/*
@@ -2938,12 +2937,6 @@ cmd_printf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* subset of mdb_printf() format strings that we allow.
*/
fmt = argv[0].a_un.a_str;
- /*
- * 'dest' must be large enough to hold a copy of the format string,
- * plus a NUL and up to 2 additional characters for each conversion
- * in the format string. This gives us a bloat factor of 5/2 ~= 3.
- * e.g. "%d" (strlen of 2) --> "%lld\0" (need 5 bytes)
- */
dest = mdb_zalloc(strlen(fmt) * 3, UM_SLEEP | UM_GC);
fmts = mdb_zalloc(strlen(fmt) * sizeof (char *), UM_SLEEP | UM_GC);
funcs = mdb_zalloc(strlen(fmt) * sizeof (void *), UM_SLEEP | UM_GC);
@@ -3126,22 +3119,22 @@ static char _mdb_printf_help[] =
"\n"
" %% Prints the '%' symbol.\n"
" %a Prints the member in symbolic form.\n"
-" %d Prints the member as a decimal integer. If the member is a signed\n"
+" %d Prints the member as a decimal integer. If the member is a signed\n"
" integer type, the output will be signed.\n"
" %H Prints the member as a human-readable size.\n"
" %I Prints the member as an IPv4 address (must be 32-bit integer type).\n"
" %N Prints the member as an IPv6 address (must be of type in6_addr_t).\n"
" %o Prints the member as an unsigned octal integer.\n"
" %p Prints the member as a pointer, in hexadecimal.\n"
-" %q Prints the member in signed octal. Honk if you ever use this!\n"
-" %r Prints the member as an unsigned value in the current output radix.\n"
-" %R Prints the member as a signed value in the current output radix.\n"
+" %q Prints the member in signed octal. Honk if you ever use this!\n"
+" %r Prints the member as an unsigned value in the current output radix. \n"
+" %R Prints the member as a signed value in the current output radix. \n"
" %s Prints the member as a string (requires a pointer or an array of\n"
" characters).\n"
" %u Prints the member as an unsigned decimal integer.\n"
" %x Prints the member in hexadecimal.\n"
" %X Prints the member in hexadecimal, using the characters A-F as the\n"
-" digits for the values 10-15.\n"
+" digits for the values 10-15. \n"
" %Y Prints the member as a time_t as the string "
"'year month day HH:MM:SS'.\n"
"\n"
@@ -3154,13 +3147,13 @@ static char _mdb_printf_help[] =
"\n"
"The following flag specifers are recognized by ::printf:\n"
"\n"
-" %- Left-justify the output within the specified field width. If the\n"
+" %- Left-justify the output within the specified field width. If the\n"
" width of the output is less than the specified field width, the\n"
-" output will be padded with blanks on the right-hand side. Without\n"
+" output will be padded with blanks on the right-hand side. Without\n"
" %-, values are right-justified by default.\n"
"\n"
" %0 Zero-fill the output field if the output is right-justified and the\n"
-" width of the output is less than the specified field width. Without\n"
+" width of the output is less than the specified field width. Without\n"
" %0, right-justified values are prepended with blanks in order to\n"
" fill the field.\n"
"\n"
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_tab.c b/usr/src/cmd/mdb/common/mdb/mdb_tab.c
index af32623470..66bd18586e 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_tab.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_tab.c
@@ -388,11 +388,6 @@ mdb_tab_size(mdb_tab_cookie_t *mcp)
return (mdb_nv_size(&mcp->mtc_nv));
}
-/*
- * Determine whether the specified name is a valid tab completion for
- * the given command. If the name is a valid tab completion then
- * it will be saved in the mdb_tab_cookie_t.
- */
void
mdb_tab_insert(mdb_tab_cookie_t *mcp, const char *name)
{
@@ -508,31 +503,18 @@ tab_complete_type(mdb_ctf_id_t id, void *arg)
mdb_tab_cookie_t *mcp = arg;
uint_t flags = (uint_t)(uintptr_t)mcp->mtc_cba;
- /*
- * CTF data includes types that mdb commands don't understand. Before
- * we resolve the actual type prune any entry that is a type we
- * don't care about.
- */
- switch (mdb_ctf_type_kind(id)) {
- case CTF_K_CONST:
- case CTF_K_RESTRICT:
- case CTF_K_VOLATILE:
- return (0);
- }
-
if (mdb_ctf_type_resolve(id, &rid) != 0)
return (1);
rkind = mdb_ctf_type_kind(rid);
-
- if ((flags & MDB_TABC_MEMBERS) && rkind != CTF_K_STRUCT &&
+ if (flags & MDB_TABC_MEMBERS && rkind != CTF_K_STRUCT &&
rkind != CTF_K_UNION)
return (0);
- if ((flags & MDB_TABC_NOPOINT) && rkind == CTF_K_POINTER)
+ if (flags & MDB_TABC_NOPOINT && rkind == CTF_K_POINTER)
return (0);
- if ((flags & MDB_TABC_NOARRAY) && rkind == CTF_K_ARRAY)
+ if (flags & MDB_TABC_NOARRAY && rkind == CTF_K_ARRAY)
return (0);
(void) mdb_ctf_type_name(id, buf, sizeof (buf));
diff --git a/usr/src/cmd/mdb/common/modules/ctf/mdb_modctf.c b/usr/src/cmd/mdb/common/modules/ctf/mdb_modctf.c
new file mode 100644
index 0000000000..5c946c938e
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/ctf/mdb_modctf.c
@@ -0,0 +1,85 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Routines for debugging ctf containers
+ */
+
+#include <mdb/mdb_modapi.h>
+#include <ctf_impl.h>
+
+static int
+mdb_ctf_idhash_walk_init(mdb_walk_state_t *wsp)
+{
+ ctf_idhash_t *ihp;
+
+ if (wsp->walk_addr == NULL) {
+ mdb_warn("ctf_idhash walker does not support global walks\n");
+ return (WALK_ERR);
+ }
+
+ ihp = mdb_alloc(sizeof (ctf_idhash_t), UM_NOSLEEP | UM_GC);
+ if (ihp == NULL) {
+ mdb_warn("failed to allocate memory for a ctf_idhash_t");
+ return (WALK_ERR);
+ }
+
+ if (mdb_vread(ihp, sizeof (ctf_idhash_t), wsp->walk_addr) !=
+ sizeof (ctf_idhash_t)) {
+ mdb_warn("failed to read ctf_idhash_t at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ if (ihp->ih_free == 0)
+ return (WALK_DONE);
+ wsp->walk_data = ihp;
+ wsp->walk_arg = (void *)(uintptr_t)1;
+
+ return (WALK_NEXT);
+}
+
+static int
+mdb_ctf_idhash_walk_step(mdb_walk_state_t *wsp)
+{
+ ctf_ihelem_t ihe;
+ ctf_idhash_t *ihp = wsp->walk_data;
+ int index = (uintptr_t)wsp->walk_arg;
+
+ if (index == ihp->ih_free)
+ return (WALK_DONE);
+
+ if (mdb_vread(&ihe, sizeof (ihe),
+ (uintptr_t)(ihp->ih_chains + index)) != sizeof (ihe)) {
+ mdb_warn("failed to read index %d at %p", index,
+ ihp->ih_chains + index);
+ return (WALK_ERR);
+ }
+ wsp->walk_arg = (void *)(uintptr_t)(index + 1);
+ return (wsp->walk_callback((uintptr_t)(ihp->ih_chains + index), &ihe,
+ wsp->walk_cbdata));
+}
+
+static const mdb_walker_t walkers[] = {
+ { "ctf_idhash", "walk entries in a ctf idhash",
+ mdb_ctf_idhash_walk_init, mdb_ctf_idhash_walk_step }
+};
+
+static const mdb_modinfo_t modinfo = { MDB_API_VERSION, NULL, walkers };
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ return (&modinfo);
+}
diff --git a/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c b/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
index 7858866a01..50ad2c3497 100644
--- a/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
+++ b/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -433,6 +433,7 @@ dtracemdb_bufsnap(dtrace_buffer_t *which, dtrace_bufdesc_t *desc)
desc->dtbd_size = bufsize;
desc->dtbd_drops = buf.dtb_drops;
desc->dtbd_errors = buf.dtb_errors;
+ desc->dtbd_timestamp = gethrtime();
return (0);
}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
index f6a9a02193..2504c81f6e 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
@@ -113,10 +113,6 @@
*/
#define NINTR 16
-#define KILOS 10
-#define MEGS 20
-#define GIGS 30
-
#ifndef STACK_BIAS
#define STACK_BIAS 0
#endif
@@ -1904,24 +1900,24 @@ typedef struct datafmt {
} datafmt_t;
static datafmt_t kmemfmt[] = {
- { "cache ", "name ",
- "-------------------------", "%-25s " },
- { " buf", " size", "------", "%6u " },
- { " buf", "in use", "------", "%6u " },
- { " buf", " total", "------", "%6u " },
- { " memory", " in use", "----------", "%10lu%c " },
- { " alloc", " succeed", "---------", "%9u " },
- { "alloc", " fail", "-----", "%5u " },
+ { "cache ", "name ",
+ "------------------------------", "%-30s " },
+ { " buf", " size", "-----", "%5H " },
+ { " buf", " in use", "---------", "%9u " },
+ { " buf", " total", "---------", "%9u " },
+ { "memory", "in use", "------", "%6lH " },
+ { " alloc", " succeed", "----------", "%10u " },
+ { "alloc", " fail", "-----", "%5u" },
{ NULL, NULL, NULL, NULL }
};
static datafmt_t vmemfmt[] = {
- { "vmem ", "name ",
- "-------------------------", "%-*s " },
- { " memory", " in use", "----------", "%9llu%c " },
- { " memory", " total", "-----------", "%10llu%c " },
- { " memory", " import", "----------", "%9llu%c " },
- { " alloc", " succeed", "---------", "%9llu " },
+ { "vmem ", "name ",
+ "------------------------------", "%-*s " },
+ { " memory", " in use", "---------", "%9llH " },
+ { " memory", " total", "----------", "%10llH " },
+ { " memory", " import", "---------", "%9llH " },
+ { " alloc", " succeed", "----------", "%10llu " },
{ "alloc", " fail", "-----", "%5llu " },
{ NULL, NULL, NULL, NULL }
};
@@ -1973,15 +1969,9 @@ typedef struct kmastat_vmem {
int kv_fail;
} kmastat_vmem_t;
-typedef struct kmastat_args {
- kmastat_vmem_t **ka_kvpp;
- uint_t ka_shift;
-} kmastat_args_t;
-
static int
-kmastat_cache(uintptr_t addr, const kmem_cache_t *cp, kmastat_args_t *kap)
+kmastat_cache(uintptr_t addr, const kmem_cache_t *cp, kmastat_vmem_t **kvpp)
{
- kmastat_vmem_t **kvpp = kap->ka_kvpp;
kmastat_vmem_t *kv;
datafmt_t *dfp = kmemfmt;
int magsize;
@@ -2022,9 +2012,7 @@ out:
mdb_printf((dfp++)->fmt, cp->cache_bufsize);
mdb_printf((dfp++)->fmt, total - avail);
mdb_printf((dfp++)->fmt, total);
- mdb_printf((dfp++)->fmt, meminuse >> kap->ka_shift,
- kap->ka_shift == GIGS ? 'G' : kap->ka_shift == MEGS ? 'M' :
- kap->ka_shift == KILOS ? 'K' : 'B');
+ mdb_printf((dfp++)->fmt, meminuse);
mdb_printf((dfp++)->fmt, alloc);
mdb_printf((dfp++)->fmt, cp->cache_alloc_fail);
mdb_printf("\n");
@@ -2033,9 +2021,8 @@ out:
}
static int
-kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_args_t *kap)
+kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_vmem_t *kv)
{
- kmastat_vmem_t *kv = *kap->ka_kvpp;
size_t len;
while (kv != NULL && kv->kv_addr != addr)
@@ -2044,20 +2031,18 @@ kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_args_t *kap)
if (kv == NULL || kv->kv_alloc == 0)
return (WALK_NEXT);
- len = MIN(17, strlen(v->vm_name));
+ len = MIN(22, strlen(v->vm_name));
- mdb_printf("Total [%s]%*s %6s %6s %6s %10lu%c %9u %5u\n", v->vm_name,
- 17 - len, "", "", "", "",
- kv->kv_meminuse >> kap->ka_shift,
- kap->ka_shift == GIGS ? 'G' : kap->ka_shift == MEGS ? 'M' :
- kap->ka_shift == KILOS ? 'K' : 'B', kv->kv_alloc, kv->kv_fail);
+ mdb_printf("Total [%s]%*s %5s %9s %9s %6lH %10u %5u\n", v->vm_name,
+ 22 - len, "", "", "", "",
+ kv->kv_meminuse, kv->kv_alloc, kv->kv_fail);
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
-kmastat_vmem(uintptr_t addr, const vmem_t *v, const uint_t *shiftp)
+kmastat_vmem(uintptr_t addr, const vmem_t *v, const void *ignored)
{
datafmt_t *dfp = vmemfmt;
const vmem_kstat_t *vkp = &v->vm_kstat;
@@ -2075,16 +2060,10 @@ kmastat_vmem(uintptr_t addr, const vmem_t *v, const uint_t *shiftp)
}
mdb_printf("%*s", ident, "");
- mdb_printf((dfp++)->fmt, 25 - ident, v->vm_name);
- mdb_printf((dfp++)->fmt, vkp->vk_mem_inuse.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
- mdb_printf((dfp++)->fmt, vkp->vk_mem_total.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
- mdb_printf((dfp++)->fmt, vkp->vk_mem_import.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
+ mdb_printf((dfp++)->fmt, 30 - ident, v->vm_name);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_inuse.value.ui64);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_total.value.ui64);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_import.value.ui64);
mdb_printf((dfp++)->fmt, vkp->vk_alloc.value.ui64);
mdb_printf((dfp++)->fmt, vkp->vk_fail.value.ui64);
@@ -2099,44 +2078,35 @@ kmastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
kmastat_vmem_t *kv = NULL;
datafmt_t *dfp;
- kmastat_args_t ka;
-
- ka.ka_shift = 0;
- if (mdb_getopts(argc, argv,
- 'k', MDB_OPT_SETBITS, KILOS, &ka.ka_shift,
- 'm', MDB_OPT_SETBITS, MEGS, &ka.ka_shift,
- 'g', MDB_OPT_SETBITS, GIGS, &ka.ka_shift, NULL) != argc)
- return (DCMD_USAGE);
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->hdr1);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->hdr1);
mdb_printf("\n");
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->hdr2);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->hdr2);
mdb_printf("\n");
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
- ka.ka_kvpp = &kv;
- if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmastat_cache, &ka) == -1) {
+ if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmastat_cache, &kv) == -1) {
mdb_warn("can't walk 'kmem_cache'");
return (DCMD_ERR);
}
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
- if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem_totals, &ka) == -1) {
+ if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem_totals, kv) == -1) {
mdb_warn("can't walk 'vmem'");
return (DCMD_ERR);
}
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
mdb_printf("\n");
@@ -2153,7 +2123,7 @@ kmastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
mdb_printf("%s ", dfp->dashes);
mdb_printf("\n");
- if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem, &ka.ka_shift) == -1) {
+ if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem, NULL) == -1) {
mdb_warn("can't walk 'vmem'");
return (DCMD_ERR);
}
@@ -3993,8 +3963,7 @@ static const mdb_dcmd_t dcmds[] = {
{ "freedby", ":", "given a thread, print its freed buffers", freedby },
{ "kmalog", "?[ fail | slab ]",
"display kmem transaction log and stack traces", kmalog },
- { "kmastat", "[-kmg]", "kernel memory allocator stats",
- kmastat },
+ { "kmastat", NULL, "kernel memory allocator stats", kmastat },
{ "kmausers", "?[-ef] [cache ...]", "current medium and large users "
"of the kmem allocator", kmausers, kmausers_help },
{ "kmem_cache", "?[-n name]",
@@ -4096,6 +4065,9 @@ static const mdb_dcmd_t dcmds[] = {
/* from netstack.c */
{ "netstack", "", "show stack instances", netstack },
+ { "netstackid2netstack", ":",
+ "translate a netstack id to its netstack_t",
+ netstackid2netstack },
/* from nvpair.c */
{ NVPAIR_DCMD_NAME, NVPAIR_DCMD_USAGE, NVPAIR_DCMD_DESCR,
@@ -4186,6 +4158,10 @@ static const mdb_dcmd_t dcmds[] = {
pfiles_help },
/* from zone.c */
+ { "zid2zone", ":", "find the zone_t with the given zone id",
+ zid2zone },
+ { "zdid2zone", ":", "find the zone_t with the given zone debug id",
+ zdid2zone },
{ "zone", "?[-r [-v]]", "display kernel zone(s)", zoneprt },
{ "zsd", ":[-v] [zsd_key]", "display zone-specific-data entries for "
"selected zones", zsd },
diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.c b/usr/src/cmd/mdb/common/modules/genunix/netstack.c
index 588bd6dbf3..d46bd85d1f 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/netstack.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.c
@@ -21,10 +21,9 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_ks.h>
#include <mdb/mdb_ctf.h>
@@ -121,3 +120,30 @@ netstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_OK);
}
+
+static int
+netstackid_lookup_cb(uintptr_t addr, const netstack_t *ns, void *arg)
+{
+ netstackid_t nid = *(uintptr_t *)arg;
+ if (ns->netstack_stackid == nid)
+ mdb_printf("%p\n", addr);
+
+ return (WALK_NEXT);
+}
+
+/*ARGSUSED*/
+int
+netstackid2netstack(uintptr_t addr, uint_t flags, int argc,
+ const mdb_arg_t *argv)
+{
+ if (!(flags & DCMD_ADDRSPEC) || argc != 0)
+ return (DCMD_USAGE);
+
+ if (mdb_walk("netstack", (mdb_walk_cb_t)netstackid_lookup_cb, &addr) ==
+ -1) {
+ mdb_warn("failed to walk zone");
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.h b/usr/src/cmd/mdb/common/modules/genunix/netstack.h
index 392565caca..f5773c36c1 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/netstack.h
+++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.h
@@ -26,8 +26,6 @@
#ifndef _NETSTACK_H
#define _NETSTACK_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_modapi.h>
#ifdef __cplusplus
@@ -38,6 +36,7 @@ int netstack_walk_init(mdb_walk_state_t *);
int netstack_walk_step(mdb_walk_state_t *);
int netstack(uintptr_t, uint_t, int, const mdb_arg_t *);
+int netstackid2netstack(uintptr_t, uint_t, int, const mdb_arg_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.c b/usr/src/cmd/mdb/common/modules/genunix/zone.c
index 96f6b598ec..77ed2cbc48 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/zone.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/zone.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <mdb/mdb_param.h>
@@ -33,9 +34,9 @@
#define ZONE_NAMELEN 20
#ifdef _LP64
-#define ZONE_PATHLEN 32
+#define ZONE_PATHLEN 25
#else
-#define ZONE_PATHLEN 40
+#define ZONE_PATHLEN 33
#endif
/*
@@ -54,6 +55,56 @@ char *zone_status_names[] = {
"dead" /* ZONE_IS_DEAD */
};
+static int
+zid_lookup_cb(uintptr_t addr, const zone_t *zone, void *arg)
+{
+ zoneid_t zid = *(uintptr_t *)arg;
+ if (zone->zone_id == zid)
+ mdb_printf("%p\n", addr);
+
+ return (WALK_NEXT);
+}
+
+/*ARGSUSED*/
+int
+zid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ if (!(flags & DCMD_ADDRSPEC) || argc != 0)
+ return (DCMD_USAGE);
+
+ if (mdb_walk("zone", (mdb_walk_cb_t)zid_lookup_cb, &addr) == -1) {
+ mdb_warn("failed to walk zone");
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+}
+
+static int
+zdid_lookup_cb(uintptr_t addr, const zone_t *zone, void *arg)
+{
+ zoneid_t zdid = *(uintptr_t *)arg;
+ if (zone->zone_did == zdid)
+ mdb_printf("%p\n", addr);
+
+ return (WALK_NEXT);
+}
+
+/*ARGSUSED*/
+int
+zdid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ if (!(flags & DCMD_ADDRSPEC) || argc != 0)
+ return (DCMD_USAGE);
+
+ if (mdb_walk("zone", (mdb_walk_cb_t)zdid_lookup_cb, &addr) == -1) {
+ mdb_warn("failed to walk zone");
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+}
+
int
zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
@@ -96,10 +147,10 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
*/
if (DCMD_HDRSPEC(flags)) {
if (ropt_given == FALSE)
- mdb_printf("%<u>%?s %6s %-13s %-20s %-s%</u>\n",
+ mdb_printf("%<u>%?s %4s %-13s %-19s %-s%</u>\n",
"ADDR", "ID", "STATUS", "NAME", "PATH");
else
- mdb_printf("%<u>%?s %6s %10s %10s %-20s%</u>\n",
+ mdb_printf("%<u>%?s %6s %10s %10s %-19s%</u>\n",
"ADDR", "ID", "REFS", "CREFS", "NAME");
}
@@ -138,7 +189,7 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
statusp = zone_status_names[zn.zone_status];
else
statusp = "???";
- mdb_printf("%0?p %6d %-13s %-20s %s\n", addr, zn.zone_id,
+ mdb_printf("%0?p %4d %-13s %-19s %s\n", addr, zn.zone_id,
statusp, name, path);
} else {
/*
@@ -146,7 +197,7 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* Display the zone's subsystem-specific reference counts if
* the user specified the '-v' option.
*/
- mdb_printf("%0?p %6d %10u %10u %-20s\n", addr, zn.zone_id,
+ mdb_printf("%0?p %6d %10u %10u %-19s\n", addr, zn.zone_id,
zn.zone_ref, zn.zone_cred_ref, name);
if (vopt_given == TRUE) {
GElf_Sym subsys_names_sym;
@@ -384,7 +435,7 @@ zsd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* Prepare to output the specified zone's ZSD information.
*/
if (DCMD_HDRSPEC(flags))
- mdb_printf("%<u>%-20s %?s %?s %8s%</u>\n", "ZONE", "KEY",
+ mdb_printf("%<u>%-19s %?s %?s %8s%</u>\n", "ZONE", "KEY",
"VALUE", "FLAGS");
len = mdb_readstr(name, ZONE_NAMELEN, (uintptr_t)zone.zone_name);
if (len > 0) {
@@ -393,7 +444,7 @@ zsd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
} else {
(void) strcpy(name, "??");
}
- mdb_printf("%-20s ", name);
+ mdb_printf("%-19s ", name);
/*
* Display the requested ZSD entries.
diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.h b/usr/src/cmd/mdb/common/modules/genunix/zone.h
index e0e5038527..94a383e41c 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/zone.h
+++ b/usr/src/cmd/mdb/common/modules/genunix/zone.h
@@ -27,14 +27,14 @@
#ifndef _ZONE_H
#define _ZONE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_modapi.h>
#ifdef __cplusplus
extern "C" {
#endif
+extern int zid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *);
+extern int zdid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *);
extern int zoneprt(uintptr_t, uint_t, int argc, const mdb_arg_t *);
extern int zone_walk_init(mdb_walk_state_t *);
diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c
index 44e4f49b87..9037072bcb 100644
--- a/usr/src/cmd/mdb/common/modules/libc/libc.c
+++ b/usr/src/cmd/mdb/common/modules/libc/libc.c
@@ -24,7 +24,7 @@
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include <sys/mdb_modapi.h>
@@ -826,14 +826,19 @@ d_uberdata(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
prt_addr(uberdata.all_lwps, 1),
prt_addr(uberdata.all_zombies, 0));
- HD("nthreads nzombies ndaemons pid sigacthandler");
- mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %s\n",
+ HD("nthreads nzombies ndaemons pid");
+ mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d\n",
OFFSET(nthreads),
uberdata.nthreads,
uberdata.nzombies,
uberdata.ndaemons,
- (int)uberdata.pid,
- prt_addr((void *)uberdata.sigacthandler, 0));
+ (int)uberdata.pid);
+
+ HD("sigacthandler setctxt");
+ mdb_printf(OFFSTR "%s %s\n",
+ OFFSET(sigacthandler),
+ prt_addr((void *)uberdata.sigacthandler, 1),
+ prt_addr((void *)uberdata.setctxt, 1));
HD("lwp_stacks lwp_laststack nfreestack stk_cache");
mdb_printf(OFFSTR "%s %s %-10d %d\n",
@@ -856,12 +861,17 @@ d_uberdata(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
prt_addr(uberdata.ulwp_replace_last, 1),
prt_addr(uberdata.atforklist, 0));
- HD("robustlocks robustlist progname");
- mdb_printf(OFFSTR "%s %s %s\n",
+ HD("robustlocks robustlist");
+ mdb_printf(OFFSTR "%s %s\n",
OFFSET(robustlocks),
prt_addr(uberdata.robustlocks, 1),
- prt_addr(uberdata.robustlist, 1),
- prt_addr(uberdata.progname, 0));
+ prt_addr(uberdata.robustlist, 1));
+
+ HD("progname ub_broot");
+ mdb_printf(OFFSTR "%s %s\n",
+ OFFSET(progname),
+ prt_addr(uberdata.progname, 1),
+ prt_addr(uberdata.ub_broot, 1));
HD("tdb_bootstrap tdb_sync_addr_hash tdb_'count tdb_'fail");
mdb_printf(OFFSTR "%s %s %-10d %d\n",
diff --git a/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c b/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c
new file mode 100644
index 0000000000..aa597e7a01
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c
@@ -0,0 +1,4606 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * mdb(1M) module for debugging the V8 JavaScript engine. This implementation
+ * makes heavy use of metadata defined in the V8 binary for inspecting in-memory
+ * structures. Canned configurations can be manually loaded for V8 binaries
+ * that predate this metadata. See mdb_v8_cfg.c for details.
+ *
+ * NOTE: This dmod implementation (including this file and related headers and C
+ * files) exist in both the Node and illumos source trees. THESE SHOULD BE KEPT
+ * IN SYNC. The version in the Node tree is built directly into modern Node
+ * binaries as part of the build process, and the version in the illumos source
+ * tree is delivered with the OS for debugging Node binaries that predate
+ * support for including the dmod directly in the binary. Note too that these
+ * files have different licenses to match their corresponding repositories.
+ */
+
+/*
+ * We hard-code our MDB_API_VERSION to be 3 to allow this module to be
+ * compiled on systems with higher version numbers, but still allow the
+ * resulting binary object to be used on older systems. (We do not make use
+ * of functionality present in versions later than 3.) This is particularly
+ * important for mdb_v8 because (1) it's used in particular to debug
+ * application-level software and (2) it has a history of rapid evolution.
+ */
+#define MDB_API_VERSION 3
+
+#include <sys/mdb_modapi.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <libproc.h>
+#include <sys/avl.h>
+#include <alloca.h>
+
+#include "v8dbg.h"
+#include "v8cfg.h"
+
+#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
+
+/*
+ * The "v8_class" and "v8_field" structures describe the C++ classes used to
+ * represent V8 heap objects.
+ */
+typedef struct v8_class {
+ struct v8_class *v8c_next; /* list linkage */
+ struct v8_class *v8c_parent; /* parent class (inheritance) */
+ struct v8_field *v8c_fields; /* array of class fields */
+ size_t v8c_start; /* offset of first class field */
+ size_t v8c_end; /* offset of first subclass field */
+ char v8c_name[64]; /* heap object class name */
+} v8_class_t;
+
+typedef struct v8_field {
+ struct v8_field *v8f_next; /* list linkage */
+ ssize_t v8f_offset; /* field offset */
+ char v8f_name[64]; /* field name */
+ boolean_t v8f_isbyte; /* 1-byte int field */
+ boolean_t v8f_isstr; /* NUL-terminated string */
+} v8_field_t;
+
+/*
+ * Similarly, the "v8_enum" structure describes an enum from V8.
+ */
+typedef struct {
+ char v8e_name[64];
+ uint_t v8e_value;
+} v8_enum_t;
+
+/*
+ * During configuration, the dmod updates these globals with the actual set of
+ * classes, types, and frame types based on the debug metadata.
+ */
+static v8_class_t *v8_classes;
+
+static v8_enum_t v8_types[128];
+static int v8_next_type;
+
+static v8_enum_t v8_frametypes[16];
+static int v8_next_frametype;
+
+static int v8_warnings;
+static int v8_silent;
+
+/*
+ * The following constants describe offsets from the frame pointer that are used
+ * to inspect each stack frame. They're initialized from the debug metadata.
+ */
+static ssize_t V8_OFF_FP_CONTEXT;
+static ssize_t V8_OFF_FP_MARKER;
+static ssize_t V8_OFF_FP_FUNCTION;
+static ssize_t V8_OFF_FP_ARGS;
+
+/*
+ * The following constants are used by macros defined in heap-dbg-common.h to
+ * examine the types of various V8 heap objects. In general, the macros should
+ * be preferred to using the constants directly. The values of these constants
+ * are initialized from the debug metadata.
+ */
+static intptr_t V8_FirstNonstringType;
+static intptr_t V8_IsNotStringMask;
+static intptr_t V8_StringTag;
+static intptr_t V8_NotStringTag;
+static intptr_t V8_StringEncodingMask;
+static intptr_t V8_TwoByteStringTag;
+static intptr_t V8_AsciiStringTag;
+static intptr_t V8_StringRepresentationMask;
+static intptr_t V8_SeqStringTag;
+static intptr_t V8_ConsStringTag;
+static intptr_t V8_SlicedStringTag;
+static intptr_t V8_ExternalStringTag;
+static intptr_t V8_FailureTag;
+static intptr_t V8_FailureTagMask;
+static intptr_t V8_HeapObjectTag;
+static intptr_t V8_HeapObjectTagMask;
+static intptr_t V8_SmiTag;
+static intptr_t V8_SmiTagMask;
+static intptr_t V8_SmiValueShift;
+static intptr_t V8_SmiShiftSize;
+static intptr_t V8_PointerSizeLog2;
+
+static intptr_t V8_ISSHARED_SHIFT;
+static intptr_t V8_DICT_SHIFT;
+static intptr_t V8_DICT_PREFIX_SIZE;
+static intptr_t V8_DICT_ENTRY_SIZE;
+static intptr_t V8_DICT_START_INDEX;
+static intptr_t V8_PROP_IDX_CONTENT;
+static intptr_t V8_PROP_IDX_FIRST;
+static intptr_t V8_PROP_TYPE_FIELD;
+static intptr_t V8_PROP_FIRST_PHANTOM;
+static intptr_t V8_PROP_TYPE_MASK;
+static intptr_t V8_PROP_DESC_KEY;
+static intptr_t V8_PROP_DESC_DETAILS;
+static intptr_t V8_PROP_DESC_VALUE;
+static intptr_t V8_PROP_DESC_SIZE;
+static intptr_t V8_TRANSITIONS_IDX_DESC;
+
+static intptr_t V8_TYPE_JSOBJECT = -1;
+static intptr_t V8_TYPE_JSARRAY = -1;
+static intptr_t V8_TYPE_FIXEDARRAY = -1;
+
+static intptr_t V8_ELEMENTS_KIND_SHIFT;
+static intptr_t V8_ELEMENTS_KIND_BITCOUNT;
+static intptr_t V8_ELEMENTS_FAST_ELEMENTS;
+static intptr_t V8_ELEMENTS_FAST_HOLEY_ELEMENTS;
+static intptr_t V8_ELEMENTS_DICTIONARY_ELEMENTS;
+
+/*
+ * Although we have this information in v8_classes, the following offsets are
+ * defined explicitly because they're used directly in code below.
+ */
+static ssize_t V8_OFF_CODE_INSTRUCTION_SIZE;
+static ssize_t V8_OFF_CODE_INSTRUCTION_START;
+static ssize_t V8_OFF_CONSSTRING_FIRST;
+static ssize_t V8_OFF_CONSSTRING_SECOND;
+static ssize_t V8_OFF_EXTERNALSTRING_RESOURCE;
+static ssize_t V8_OFF_FIXEDARRAY_DATA;
+static ssize_t V8_OFF_FIXEDARRAY_LENGTH;
+static ssize_t V8_OFF_HEAPNUMBER_VALUE;
+static ssize_t V8_OFF_HEAPOBJECT_MAP;
+static ssize_t V8_OFF_JSARRAY_LENGTH;
+static ssize_t V8_OFF_JSDATE_VALUE;
+static ssize_t V8_OFF_JSFUNCTION_SHARED;
+static ssize_t V8_OFF_JSOBJECT_ELEMENTS;
+static ssize_t V8_OFF_JSOBJECT_PROPERTIES;
+static ssize_t V8_OFF_MAP_CONSTRUCTOR;
+static ssize_t V8_OFF_MAP_INOBJECT_PROPERTIES;
+static ssize_t V8_OFF_MAP_INSTANCE_ATTRIBUTES;
+static ssize_t V8_OFF_MAP_INSTANCE_DESCRIPTORS;
+static ssize_t V8_OFF_MAP_INSTANCE_SIZE;
+static ssize_t V8_OFF_MAP_BIT_FIELD;
+static ssize_t V8_OFF_MAP_BIT_FIELD2;
+static ssize_t V8_OFF_MAP_BIT_FIELD3;
+static ssize_t V8_OFF_MAP_TRANSITIONS;
+static ssize_t V8_OFF_ODDBALL_TO_STRING;
+static ssize_t V8_OFF_SCRIPT_LINE_ENDS;
+static ssize_t V8_OFF_SCRIPT_NAME;
+static ssize_t V8_OFF_SCRIPT_SOURCE;
+static ssize_t V8_OFF_SEQASCIISTR_CHARS;
+static ssize_t V8_OFF_SEQONEBYTESTR_CHARS;
+static ssize_t V8_OFF_SEQTWOBYTESTR_CHARS;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_CODE;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_END_POSITION;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_LENGTH;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_SCRIPT;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_NAME;
+static ssize_t V8_OFF_SLICEDSTRING_PARENT;
+static ssize_t V8_OFF_SLICEDSTRING_OFFSET;
+static ssize_t V8_OFF_STRING_LENGTH;
+
+#define NODE_OFF_EXTSTR_DATA 0x4 /* see node_string.h */
+
+#define V8_CONSTANT_OPTIONAL 1
+#define V8_CONSTANT_HASFALLBACK 2
+
+#define V8_CONSTANT_MAJORSHIFT 3
+#define V8_CONSTANT_MAJORMASK ((1 << 4) - 1)
+#define V8_CONSTANT_MAJOR(flags) \
+ (((flags) >> V8_CONSTANT_MAJORSHIFT) & V8_CONSTANT_MAJORMASK)
+
+#define V8_CONSTANT_MINORSHIFT 7
+#define V8_CONSTANT_MINORMASK ((1 << 9) - 1)
+#define V8_CONSTANT_MINOR(flags) \
+ (((flags) >> V8_CONSTANT_MINORSHIFT) & V8_CONSTANT_MINORMASK)
+
+#define V8_CONSTANT_FALLBACK(maj, min) \
+ (V8_CONSTANT_OPTIONAL | V8_CONSTANT_HASFALLBACK | \
+ ((maj) << V8_CONSTANT_MAJORSHIFT) | ((min) << V8_CONSTANT_MINORSHIFT))
+
+/*
+ * Table of constants used directly by this file.
+ */
+typedef struct v8_constant {
+ intptr_t *v8c_valp;
+ const char *v8c_symbol;
+ uint32_t v8c_flags;
+ intptr_t v8c_fallback;
+} v8_constant_t;
+
+static v8_constant_t v8_constants[] = {
+ { &V8_OFF_FP_CONTEXT, "v8dbg_off_fp_context" },
+ { &V8_OFF_FP_FUNCTION, "v8dbg_off_fp_function" },
+ { &V8_OFF_FP_MARKER, "v8dbg_off_fp_marker" },
+ { &V8_OFF_FP_ARGS, "v8dbg_off_fp_args" },
+
+ { &V8_FirstNonstringType, "v8dbg_FirstNonstringType" },
+ { &V8_IsNotStringMask, "v8dbg_IsNotStringMask" },
+ { &V8_StringTag, "v8dbg_StringTag" },
+ { &V8_NotStringTag, "v8dbg_NotStringTag" },
+ { &V8_StringEncodingMask, "v8dbg_StringEncodingMask" },
+ { &V8_TwoByteStringTag, "v8dbg_TwoByteStringTag" },
+ { &V8_AsciiStringTag, "v8dbg_AsciiStringTag" },
+ { &V8_StringRepresentationMask, "v8dbg_StringRepresentationMask" },
+ { &V8_SeqStringTag, "v8dbg_SeqStringTag" },
+ { &V8_ConsStringTag, "v8dbg_ConsStringTag" },
+ { &V8_SlicedStringTag, "v8dbg_SlicedStringTag",
+ V8_CONSTANT_FALLBACK(0, 0), 0x3 },
+ { &V8_ExternalStringTag, "v8dbg_ExternalStringTag" },
+ { &V8_FailureTag, "v8dbg_FailureTag" },
+ { &V8_FailureTagMask, "v8dbg_FailureTagMask" },
+ { &V8_HeapObjectTag, "v8dbg_HeapObjectTag" },
+ { &V8_HeapObjectTagMask, "v8dbg_HeapObjectTagMask" },
+ { &V8_SmiTag, "v8dbg_SmiTag" },
+ { &V8_SmiTagMask, "v8dbg_SmiTagMask" },
+ { &V8_SmiValueShift, "v8dbg_SmiValueShift" },
+ { &V8_SmiShiftSize, "v8dbg_SmiShiftSize",
+#ifdef _LP64
+ V8_CONSTANT_FALLBACK(0, 0), 31 },
+#else
+ V8_CONSTANT_FALLBACK(0, 0), 0 },
+#endif
+ { &V8_PointerSizeLog2, "v8dbg_PointerSizeLog2" },
+
+ { &V8_DICT_SHIFT, "v8dbg_dict_shift",
+ V8_CONSTANT_FALLBACK(3, 13), 24 },
+ { &V8_DICT_PREFIX_SIZE, "v8dbg_dict_prefix_size",
+ V8_CONSTANT_FALLBACK(3, 11), 2 },
+ { &V8_DICT_ENTRY_SIZE, "v8dbg_dict_entry_size",
+ V8_CONSTANT_FALLBACK(3, 11), 3 },
+ { &V8_DICT_START_INDEX, "v8dbg_dict_start_index",
+ V8_CONSTANT_FALLBACK(3, 11), 3 },
+ { &V8_ISSHARED_SHIFT, "v8dbg_isshared_shift",
+ V8_CONSTANT_FALLBACK(3, 11), 0 },
+ { &V8_PROP_IDX_FIRST, "v8dbg_prop_idx_first" },
+ { &V8_PROP_TYPE_FIELD, "v8dbg_prop_type_field" },
+ { &V8_PROP_FIRST_PHANTOM, "v8dbg_prop_type_first_phantom" },
+ { &V8_PROP_TYPE_MASK, "v8dbg_prop_type_mask" },
+ { &V8_PROP_IDX_CONTENT, "v8dbg_prop_idx_content",
+ V8_CONSTANT_OPTIONAL },
+ { &V8_PROP_DESC_KEY, "v8dbg_prop_desc_key",
+ V8_CONSTANT_FALLBACK(0, 0), 0 },
+ { &V8_PROP_DESC_DETAILS, "v8dbg_prop_desc_details",
+ V8_CONSTANT_FALLBACK(0, 0), 1 },
+ { &V8_PROP_DESC_VALUE, "v8dbg_prop_desc_value",
+ V8_CONSTANT_FALLBACK(0, 0), 2 },
+ { &V8_PROP_DESC_SIZE, "v8dbg_prop_desc_size",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_TRANSITIONS_IDX_DESC, "v8dbg_transitions_idx_descriptors",
+ V8_CONSTANT_OPTIONAL },
+
+ { &V8_ELEMENTS_KIND_SHIFT, "v8dbg_elements_kind_shift",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_ELEMENTS_KIND_BITCOUNT, "v8dbg_elements_kind_bitcount",
+ V8_CONSTANT_FALLBACK(0, 0), 5 },
+ { &V8_ELEMENTS_FAST_ELEMENTS,
+ "v8dbg_elements_fast_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 2 },
+ { &V8_ELEMENTS_FAST_HOLEY_ELEMENTS,
+ "v8dbg_elements_fast_holey_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_ELEMENTS_DICTIONARY_ELEMENTS,
+ "v8dbg_elements_dictionary_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 6 },
+};
+
+static int v8_nconstants = sizeof (v8_constants) / sizeof (v8_constants[0]);
+
+typedef struct v8_offset {
+ ssize_t *v8o_valp;
+ const char *v8o_class;
+ const char *v8o_member;
+ boolean_t v8o_optional;
+} v8_offset_t;
+
+static v8_offset_t v8_offsets[] = {
+ { &V8_OFF_CODE_INSTRUCTION_SIZE,
+ "Code", "instruction_size" },
+ { &V8_OFF_CODE_INSTRUCTION_START,
+ "Code", "instruction_start" },
+ { &V8_OFF_CONSSTRING_FIRST,
+ "ConsString", "first" },
+ { &V8_OFF_CONSSTRING_SECOND,
+ "ConsString", "second" },
+ { &V8_OFF_EXTERNALSTRING_RESOURCE,
+ "ExternalString", "resource" },
+ { &V8_OFF_FIXEDARRAY_DATA,
+ "FixedArray", "data" },
+ { &V8_OFF_FIXEDARRAY_LENGTH,
+ "FixedArray", "length" },
+ { &V8_OFF_HEAPNUMBER_VALUE,
+ "HeapNumber", "value" },
+ { &V8_OFF_HEAPOBJECT_MAP,
+ "HeapObject", "map" },
+ { &V8_OFF_JSARRAY_LENGTH,
+ "JSArray", "length" },
+ { &V8_OFF_JSDATE_VALUE,
+ "JSDate", "value", B_TRUE },
+ { &V8_OFF_JSFUNCTION_SHARED,
+ "JSFunction", "shared" },
+ { &V8_OFF_JSOBJECT_ELEMENTS,
+ "JSObject", "elements" },
+ { &V8_OFF_JSOBJECT_PROPERTIES,
+ "JSObject", "properties" },
+ { &V8_OFF_MAP_CONSTRUCTOR,
+ "Map", "constructor" },
+ { &V8_OFF_MAP_INOBJECT_PROPERTIES,
+ "Map", "inobject_properties" },
+ { &V8_OFF_MAP_INSTANCE_ATTRIBUTES,
+ "Map", "instance_attributes" },
+ { &V8_OFF_MAP_INSTANCE_DESCRIPTORS,
+ "Map", "instance_descriptors", B_TRUE },
+ { &V8_OFF_MAP_TRANSITIONS,
+ "Map", "transitions", B_TRUE },
+ { &V8_OFF_MAP_INSTANCE_SIZE,
+ "Map", "instance_size" },
+ { &V8_OFF_MAP_BIT_FIELD2,
+ "Map", "bit_field2", B_TRUE },
+ { &V8_OFF_MAP_BIT_FIELD3,
+ "Map", "bit_field3", B_TRUE },
+ { &V8_OFF_ODDBALL_TO_STRING,
+ "Oddball", "to_string" },
+ { &V8_OFF_SCRIPT_LINE_ENDS,
+ "Script", "line_ends" },
+ { &V8_OFF_SCRIPT_NAME,
+ "Script", "name" },
+ { &V8_OFF_SCRIPT_SOURCE,
+ "Script", "source" },
+ { &V8_OFF_SEQASCIISTR_CHARS,
+ "SeqAsciiString", "chars", B_TRUE },
+ { &V8_OFF_SEQONEBYTESTR_CHARS,
+ "SeqOneByteString", "chars", B_TRUE },
+ { &V8_OFF_SEQTWOBYTESTR_CHARS,
+ "SeqTwoByteString", "chars", B_TRUE },
+ { &V8_OFF_SHAREDFUNCTIONINFO_CODE,
+ "SharedFunctionInfo", "code" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_END_POSITION,
+ "SharedFunctionInfo", "end_position" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION,
+ "SharedFunctionInfo", "function_token_position" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME,
+ "SharedFunctionInfo", "inferred_name" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_LENGTH,
+ "SharedFunctionInfo", "length" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_NAME,
+ "SharedFunctionInfo", "name" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_SCRIPT,
+ "SharedFunctionInfo", "script" },
+ { &V8_OFF_SLICEDSTRING_OFFSET,
+ "SlicedString", "offset" },
+ { &V8_OFF_SLICEDSTRING_PARENT,
+ "SlicedString", "parent", B_TRUE },
+ { &V8_OFF_STRING_LENGTH,
+ "String", "length" },
+};
+
+static int v8_noffsets = sizeof (v8_offsets) / sizeof (v8_offsets[0]);
+
+static uintptr_t v8_major;
+static uintptr_t v8_minor;
+static uintptr_t v8_build;
+static uintptr_t v8_patch;
+
+static int autoconf_iter_symbol(mdb_symbol_t *, void *);
+static v8_class_t *conf_class_findcreate(const char *);
+static v8_field_t *conf_field_create(v8_class_t *, const char *, size_t);
+static char *conf_next_part(char *, char *);
+static int conf_update_parent(const char *);
+static int conf_update_field(v8_cfg_t *, const char *);
+static int conf_update_enum(v8_cfg_t *, const char *, const char *,
+ v8_enum_t *);
+static int conf_update_type(v8_cfg_t *, const char *);
+static int conf_update_frametype(v8_cfg_t *, const char *);
+static void conf_class_compute_offsets(v8_class_t *);
+
+static int read_typebyte(uint8_t *, uintptr_t);
+static int heap_offset(const char *, const char *, ssize_t *);
+
+/*
+ * Invoked when this dmod is initially loaded to load the set of classes, enums,
+ * and other constants from the metadata in the target binary.
+ */
+static int
+autoconfigure(v8_cfg_t *cfgp)
+{
+ v8_class_t *clp;
+ v8_enum_t *ep;
+ struct v8_constant *cnp;
+ int ii;
+ int failed = 0;
+
+ assert(v8_classes == NULL);
+
+ /*
+ * Iterate all global symbols looking for metadata.
+ */
+ if (cfgp->v8cfg_iter(cfgp, autoconf_iter_symbol, cfgp) != 0) {
+ mdb_warn("failed to autoconfigure V8 support\n");
+ return (-1);
+ }
+
+ /*
+ * By now we've configured all of the classes so we can update the
+ * "start" and "end" fields in each class with information from its
+ * parent class.
+ */
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (clp->v8c_end != (size_t)-1)
+ continue;
+
+ conf_class_compute_offsets(clp);
+ };
+
+ /*
+ * Load various constants used directly in the module.
+ */
+ for (ii = 0; ii < v8_nconstants; ii++) {
+ cnp = &v8_constants[ii];
+
+ if (cfgp->v8cfg_readsym(cfgp,
+ cnp->v8c_symbol, cnp->v8c_valp) != -1) {
+ continue;
+ }
+
+ if (!(cnp->v8c_flags & V8_CONSTANT_OPTIONAL)) {
+ mdb_warn("failed to read \"%s\"", cnp->v8c_symbol);
+ failed++;
+ continue;
+ }
+
+ if (!(cnp->v8c_flags & V8_CONSTANT_HASFALLBACK) ||
+ v8_major < V8_CONSTANT_MAJOR(cnp->v8c_flags) ||
+ (v8_major == V8_CONSTANT_MAJOR(cnp->v8c_flags) &&
+ v8_minor < V8_CONSTANT_MINOR(cnp->v8c_flags))) {
+ *cnp->v8c_valp = -1;
+ continue;
+ }
+
+ /*
+ * We have a fallback -- and we know that the version satisfies
+ * the fallback's version constraints; use the fallback value.
+ */
+ *cnp->v8c_valp = cnp->v8c_fallback;
+ }
+
+ /*
+ * Load type values for well-known classes that we use a lot.
+ */
+ for (ep = v8_types; ep->v8e_name[0] != '\0'; ep++) {
+ if (strcmp(ep->v8e_name, "JSObject") == 0)
+ V8_TYPE_JSOBJECT = ep->v8e_value;
+
+ if (strcmp(ep->v8e_name, "JSArray") == 0)
+ V8_TYPE_JSARRAY = ep->v8e_value;
+
+ if (strcmp(ep->v8e_name, "FixedArray") == 0)
+ V8_TYPE_FIXEDARRAY = ep->v8e_value;
+ }
+
+ if (V8_TYPE_JSOBJECT == -1) {
+ mdb_warn("couldn't find JSObject type\n");
+ failed++;
+ }
+
+ if (V8_TYPE_JSARRAY == -1) {
+ mdb_warn("couldn't find JSArray type\n");
+ failed++;
+ }
+
+ if (V8_TYPE_FIXEDARRAY == -1) {
+ mdb_warn("couldn't find FixedArray type\n");
+ failed++;
+ }
+
+ /*
+ * Finally, load various class offsets.
+ */
+ for (ii = 0; ii < v8_noffsets; ii++) {
+ struct v8_offset *offp = &v8_offsets[ii];
+ const char *klass = offp->v8o_class;
+
+again:
+ if (heap_offset(klass, offp->v8o_member, offp->v8o_valp) == 0)
+ continue;
+
+ if (strcmp(klass, "FixedArray") == 0) {
+ /*
+ * The V8 included in node v0.6 uses a FixedArrayBase
+ * class to contain the "length" field, while the one
+ * in v0.4 has no such base class and stores the field
+ * directly in FixedArray; if we failed to derive
+ * the offset from FixedArray, try FixedArrayBase.
+ */
+ klass = "FixedArrayBase";
+ goto again;
+ }
+
+ if (offp->v8o_optional) {
+ *offp->v8o_valp = -1;
+ continue;
+ }
+
+ mdb_warn("couldn't find class \"%s\", field \"%s\"\n",
+ offp->v8o_class, offp->v8o_member);
+ failed++;
+ }
+
+ if (!((V8_OFF_SEQASCIISTR_CHARS != -1) ^
+ (V8_OFF_SEQONEBYTESTR_CHARS != -1))) {
+ mdb_warn("expected exactly one of SeqAsciiString and "
+ "SeqOneByteString to be defined\n");
+ failed++;
+ }
+
+ if (V8_OFF_SEQONEBYTESTR_CHARS != -1)
+ V8_OFF_SEQASCIISTR_CHARS = V8_OFF_SEQONEBYTESTR_CHARS;
+
+ if (V8_OFF_SEQTWOBYTESTR_CHARS == -1)
+ V8_OFF_SEQTWOBYTESTR_CHARS = V8_OFF_SEQASCIISTR_CHARS;
+
+ if (V8_OFF_SLICEDSTRING_PARENT == -1)
+ V8_OFF_SLICEDSTRING_PARENT = V8_OFF_SLICEDSTRING_OFFSET -
+ sizeof (uintptr_t);
+
+ /*
+ * If we don't have bit_field/bit_field2 for Map, we know that they're
+ * the second and third byte of instance_attributes.
+ */
+ if (V8_OFF_MAP_BIT_FIELD == -1)
+ V8_OFF_MAP_BIT_FIELD = V8_OFF_MAP_INSTANCE_ATTRIBUTES + 2;
+
+ if (V8_OFF_MAP_BIT_FIELD2 == -1)
+ V8_OFF_MAP_BIT_FIELD2 = V8_OFF_MAP_INSTANCE_ATTRIBUTES + 3;
+
+ return (failed ? -1 : 0);
+}
+
+/* ARGSUSED */
+static int
+autoconf_iter_symbol(mdb_symbol_t *symp, void *arg)
+{
+ v8_cfg_t *cfgp = arg;
+
+ if (strncmp(symp->sym_name, "v8dbg_parent_",
+ sizeof ("v8dbg_parent_") - 1) == 0)
+ return (conf_update_parent(symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_class_",
+ sizeof ("v8dbg_class_") - 1) == 0)
+ return (conf_update_field(cfgp, symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_type_",
+ sizeof ("v8dbg_type_") - 1) == 0)
+ return (conf_update_type(cfgp, symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_frametype_",
+ sizeof ("v8dbg_frametype_") - 1) == 0)
+ return (conf_update_frametype(cfgp, symp->sym_name));
+
+ return (0);
+}
+
+/*
+ * Extracts the next field of a string whose fields are separated by "__" (as
+ * the V8 metadata symbols are).
+ */
+static char *
+conf_next_part(char *buf, char *start)
+{
+ char *pp;
+
+ if ((pp = strstr(start, "__")) == NULL) {
+ mdb_warn("malformed symbol name: %s\n", buf);
+ return (NULL);
+ }
+
+ *pp = '\0';
+ return (pp + sizeof ("__") - 1);
+}
+
+static v8_class_t *
+conf_class_findcreate(const char *name)
+{
+ v8_class_t *clp, *iclp, *prev = NULL;
+ int cmp;
+
+ for (iclp = v8_classes; iclp != NULL; iclp = iclp->v8c_next) {
+ if ((cmp = strcmp(iclp->v8c_name, name)) == 0)
+ return (iclp);
+
+ if (cmp > 0)
+ break;
+
+ prev = iclp;
+ }
+
+ if ((clp = mdb_zalloc(sizeof (*clp), UM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ (void) strlcpy(clp->v8c_name, name, sizeof (clp->v8c_name));
+ clp->v8c_end = (size_t)-1;
+ clp->v8c_next = iclp;
+
+ if (prev != NULL) {
+ prev->v8c_next = clp;
+ } else {
+ v8_classes = clp;
+ }
+
+ return (clp);
+}
+
+static v8_field_t *
+conf_field_create(v8_class_t *clp, const char *name, size_t offset)
+{
+ v8_field_t *flp, *iflp;
+
+ if ((flp = mdb_zalloc(sizeof (*flp), UM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ (void) strlcpy(flp->v8f_name, name, sizeof (flp->v8f_name));
+ flp->v8f_offset = offset;
+
+ if (clp->v8c_fields == NULL || clp->v8c_fields->v8f_offset > offset) {
+ flp->v8f_next = clp->v8c_fields;
+ clp->v8c_fields = flp;
+ return (flp);
+ }
+
+ for (iflp = clp->v8c_fields; iflp->v8f_next != NULL;
+ iflp = iflp->v8f_next) {
+ if (iflp->v8f_next->v8f_offset > offset)
+ break;
+ }
+
+ flp->v8f_next = iflp->v8f_next;
+ iflp->v8f_next = flp;
+ return (flp);
+}
+
+/*
+ * Given a "v8dbg_parent_X__Y", symbol, update the parent of class X to class Y.
+ * Note that neither class necessarily exists already.
+ */
+static int
+conf_update_parent(const char *symbol)
+{
+ char *pp, *qq;
+ char buf[128];
+ v8_class_t *clp, *pclp;
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+ pp = buf + sizeof ("v8dbg_parent_") - 1;
+ qq = conf_next_part(buf, pp);
+
+ if (qq == NULL)
+ return (-1);
+
+ clp = conf_class_findcreate(pp);
+ pclp = conf_class_findcreate(qq);
+
+ if (clp == NULL || pclp == NULL) {
+ mdb_warn("mdb_v8: out of memory\n");
+ return (-1);
+ }
+
+ clp->v8c_parent = pclp;
+ return (0);
+}
+
+/*
+ * Given a "v8dbg_class_CLASS__FIELD__TYPE", symbol, save field "FIELD" into
+ * class CLASS with the offset described by the symbol. Note that CLASS does
+ * not necessarily exist already.
+ */
+static int
+conf_update_field(v8_cfg_t *cfgp, const char *symbol)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+ intptr_t offset;
+ char *pp, *qq, *tt;
+ char buf[128];
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+
+ pp = buf + sizeof ("v8dbg_class_") - 1;
+ qq = conf_next_part(buf, pp);
+
+ if (qq == NULL || (tt = conf_next_part(buf, qq)) == NULL)
+ return (-1);
+
+ if (cfgp->v8cfg_readsym(cfgp, symbol, &offset) == -1) {
+ mdb_warn("failed to read symbol \"%s\"", symbol);
+ return (-1);
+ }
+
+ if ((clp = conf_class_findcreate(pp)) == NULL ||
+ (flp = conf_field_create(clp, qq, (size_t)offset)) == NULL)
+ return (-1);
+
+ if (strcmp(tt, "int") == 0)
+ flp->v8f_isbyte = B_TRUE;
+
+ if (strcmp(tt, "char") == 0)
+ flp->v8f_isstr = B_TRUE;
+
+ return (0);
+}
+
+static int
+conf_update_enum(v8_cfg_t *cfgp, const char *symbol, const char *name,
+ v8_enum_t *enp)
+{
+ intptr_t value;
+
+ if (cfgp->v8cfg_readsym(cfgp, symbol, &value) == -1) {
+ mdb_warn("failed to read symbol \"%s\"", symbol);
+ return (-1);
+ }
+
+ enp->v8e_value = (int)value;
+ (void) strlcpy(enp->v8e_name, name, sizeof (enp->v8e_name));
+ return (0);
+}
+
+/*
+ * Given a "v8dbg_type_TYPENAME" constant, save the type name in v8_types. Note
+ * that this enum has multiple integer values with the same string label.
+ */
+static int
+conf_update_type(v8_cfg_t *cfgp, const char *symbol)
+{
+ char *klass;
+ v8_enum_t *enp;
+ char buf[128];
+
+ if (v8_next_type > sizeof (v8_types) / sizeof (v8_types[0])) {
+ mdb_warn("too many V8 types\n");
+ return (-1);
+ }
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+
+ klass = buf + sizeof ("v8dbg_type_") - 1;
+ if (conf_next_part(buf, klass) == NULL)
+ return (-1);
+
+ enp = &v8_types[v8_next_type++];
+ return (conf_update_enum(cfgp, symbol, klass, enp));
+}
+
+/*
+ * Given a "v8dbg_frametype_TYPENAME" constant, save the frame type in
+ * v8_frametypes.
+ */
+static int
+conf_update_frametype(v8_cfg_t *cfgp, const char *symbol)
+{
+ const char *frametype;
+ v8_enum_t *enp;
+
+ if (v8_next_frametype >
+ sizeof (v8_frametypes) / sizeof (v8_frametypes[0])) {
+ mdb_warn("too many V8 frame types\n");
+ return (-1);
+ }
+
+ enp = &v8_frametypes[v8_next_frametype++];
+ frametype = symbol + sizeof ("v8dbg_frametype_") - 1;
+ return (conf_update_enum(cfgp, symbol, frametype, enp));
+}
+
+/*
+ * Now that all classes have been loaded, update the "start" and "end" fields of
+ * each class based on the values of its parent class.
+ */
+static void
+conf_class_compute_offsets(v8_class_t *clp)
+{
+ v8_field_t *flp;
+
+ assert(clp->v8c_start == 0);
+ assert(clp->v8c_end == (size_t)-1);
+
+ if (clp->v8c_parent != NULL) {
+ if (clp->v8c_parent->v8c_end == (size_t)-1)
+ conf_class_compute_offsets(clp->v8c_parent);
+
+ clp->v8c_start = clp->v8c_parent->v8c_end;
+ }
+
+ if (clp->v8c_fields == NULL) {
+ clp->v8c_end = clp->v8c_start;
+ return;
+ }
+
+ for (flp = clp->v8c_fields; flp->v8f_next != NULL; flp = flp->v8f_next)
+ ;
+
+ if (flp == NULL)
+ clp->v8c_end = clp->v8c_start;
+ else
+ clp->v8c_end = flp->v8f_offset + sizeof (uintptr_t);
+}
+
+/*
+ * Utility functions
+ */
+#define JSSTR_NONE 0
+#define JSSTR_NUDE JSSTR_NONE
+
+#define JSSTR_FLAGSHIFT 16
+#define JSSTR_VERBOSE (0x1 << JSSTR_FLAGSHIFT)
+#define JSSTR_QUOTED (0x2 << JSSTR_FLAGSHIFT)
+#define JSSTR_ISASCII (0x4 << JSSTR_FLAGSHIFT)
+
+#define JSSTR_MAXDEPTH 512
+#define JSSTR_DEPTH(f) ((f) & ((1 << JSSTR_FLAGSHIFT) - 1))
+#define JSSTR_BUMPDEPTH(f) ((f) + 1)
+
+static int jsstr_print(uintptr_t, uint_t, char **, size_t *);
+static boolean_t jsobj_is_undefined(uintptr_t addr);
+static boolean_t jsobj_is_hole(uintptr_t addr);
+
+static const char *
+enum_lookup_str(v8_enum_t *enums, int val, const char *dflt)
+{
+ v8_enum_t *ep;
+
+ for (ep = enums; ep->v8e_name[0] != '\0'; ep++) {
+ if (ep->v8e_value == val)
+ return (ep->v8e_name);
+ }
+
+ return (dflt);
+}
+
+static void
+enum_print(v8_enum_t *enums)
+{
+ v8_enum_t *itp;
+
+ for (itp = enums; itp->v8e_name[0] != '\0'; itp++)
+ mdb_printf("%-30s = 0x%02x\n", itp->v8e_name, itp->v8e_value);
+}
+
+/*
+ * b[v]snprintf behave like [v]snprintf(3c), except that they update the buffer
+ * and length arguments based on how much buffer space is used by the operation.
+ * This makes it much easier to combine multiple calls in sequence without
+ * worrying about buffer overflow.
+ */
+static size_t
+bvsnprintf(char **bufp, size_t *buflenp, const char *format, va_list alist)
+{
+ size_t rv, len;
+
+ if (*buflenp == 0)
+ return (vsnprintf(NULL, 0, format, alist));
+
+ rv = vsnprintf(*bufp, *buflenp, format, alist);
+
+ len = MIN(rv, *buflenp);
+ *buflenp -= len;
+ *bufp += len;
+
+ return (len);
+}
+
+static size_t
+bsnprintf(char **bufp, size_t *buflenp, const char *format, ...)
+{
+ va_list alist;
+ size_t rv;
+
+ va_start(alist, format);
+ rv = bvsnprintf(bufp, buflenp, format, alist);
+ va_end(alist);
+
+ return (rv);
+}
+
+static void
+v8_warn(const char *format, ...)
+{
+ char buf[512];
+ va_list alist;
+ int len;
+
+ if (!v8_warnings || v8_silent)
+ return;
+
+ va_start(alist, format);
+ (void) vsnprintf(buf, sizeof (buf), format, alist);
+ va_end(alist);
+
+ /*
+ * This is made slightly annoying because we need to effectively
+ * preserve the original format string to allow for mdb to use the
+ * new-line at the end to indicate that strerror should be elided.
+ */
+ if ((len = strlen(format)) > 0 && format[len - 1] == '\n') {
+ buf[strlen(buf) - 1] = '\0';
+ mdb_warn("%s\n", buf);
+ } else {
+ mdb_warn("%s", buf);
+ }
+}
+
+/*
+ * Returns in "offp" the offset of field "field" in C++ class "klass".
+ */
+static int
+heap_offset(const char *klass, const char *field, ssize_t *offp)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (strcmp(klass, clp->v8c_name) == 0)
+ break;
+ }
+
+ if (clp == NULL)
+ return (-1);
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next) {
+ if (strcmp(field, flp->v8f_name) == 0)
+ break;
+ }
+
+ if (flp == NULL)
+ return (-1);
+
+ *offp = V8_OFF_HEAP(flp->v8f_offset);
+ return (0);
+}
+
+/*
+ * Assuming "addr" is an instance of the C++ heap class "klass", read into *valp
+ * the pointer-sized value of field "field".
+ */
+static int
+read_heap_ptr(uintptr_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read offset %d from %p", off, addr);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Like read_heap_ptr, but assume the field is an SMI and store the actual value
+ * into *valp rather than the encoded representation.
+ */
+static int
+read_heap_smi(uintptr_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (read_heap_ptr(valp, addr, off) != 0)
+ return (-1);
+
+ if (!V8_IS_SMI(*valp)) {
+ v8_warn("expected SMI, got %p\n", *valp);
+ return (-1);
+ }
+
+ *valp = V8_SMI_VALUE(*valp);
+
+ return (0);
+}
+
+static int
+read_heap_double(double *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read heap value at %p", addr + off);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Assuming "addr" refers to a FixedArray, return a newly-allocated array
+ * representing its contents.
+ */
+static int
+read_heap_array(uintptr_t addr, uintptr_t **retp, size_t *lenp, int flags)
+{
+ uint8_t type;
+ uintptr_t len;
+
+ if (!V8_IS_HEAPOBJECT(addr))
+ return (-1);
+
+ if (read_typebyte(&type, addr) != 0)
+ return (-1);
+
+ if (type != V8_TYPE_FIXEDARRAY)
+ return (-1);
+
+ if (read_heap_smi(&len, addr, V8_OFF_FIXEDARRAY_LENGTH) != 0)
+ return (-1);
+
+ *lenp = len;
+
+ if (len == 0) {
+ *retp = NULL;
+ return (0);
+ }
+
+ if ((*retp = mdb_zalloc(len * sizeof (uintptr_t), flags)) == NULL)
+ return (-1);
+
+ if (mdb_vread(*retp, len * sizeof (uintptr_t),
+ addr + V8_OFF_FIXEDARRAY_DATA) == -1) {
+ if (!(flags & UM_GC))
+ mdb_free(*retp, len * sizeof (uintptr_t));
+
+ *retp = NULL;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+read_heap_byte(uint8_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read heap value at %p", addr + off);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Given a heap object, returns in *valp the byte describing the type of the
+ * object. This is shorthand for first retrieving the Map at the start of the
+ * heap object and then retrieving the type byte from the Map object.
+ */
+static int
+read_typebyte(uint8_t *valp, uintptr_t addr)
+{
+ uintptr_t mapaddr;
+ ssize_t off = V8_OFF_HEAPOBJECT_MAP;
+
+ if (mdb_vread(&mapaddr, sizeof (mapaddr), addr + off) == -1) {
+ v8_warn("failed to read type of %p", addr);
+ return (-1);
+ }
+
+ if (!V8_IS_HEAPOBJECT(mapaddr)) {
+ v8_warn("object map is not a heap object\n");
+ return (-1);
+ }
+
+ if (read_heap_byte(valp, mapaddr, V8_OFF_MAP_INSTANCE_ATTRIBUTES) == -1)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Given a heap object, returns in *valp the size of the object. For
+ * variable-size objects, returns an undefined value.
+ */
+static int
+read_size(size_t *valp, uintptr_t addr)
+{
+ uintptr_t mapaddr;
+ uint8_t size;
+
+ if (read_heap_ptr(&mapaddr, addr, V8_OFF_HEAPOBJECT_MAP) != 0)
+ return (-1);
+
+ if (!V8_IS_HEAPOBJECT(mapaddr)) {
+ v8_warn("heap object map is not itself a heap object\n");
+ return (-1);
+ }
+
+ if (read_heap_byte(&size, mapaddr, V8_OFF_MAP_INSTANCE_SIZE) != 0)
+ return (-1);
+
+ *valp = size << V8_PointerSizeLog2;
+ return (0);
+}
+
+/*
+ * Assuming "addr" refers to a FixedArray that is implementing a
+ * StringDictionary, iterate over its contents calling the specified function
+ * with key and value.
+ */
+static int
+read_heap_dict(uintptr_t addr,
+ int (*func)(const char *, uintptr_t, void *), void *arg)
+{
+ uint8_t type;
+ uintptr_t len;
+ char buf[512];
+ char *bufp;
+ int rval = -1;
+ uintptr_t *dict, ndict, i;
+
+ if (read_heap_array(addr, &dict, &ndict, UM_SLEEP) != 0)
+ return (-1);
+
+ if (V8_DICT_ENTRY_SIZE < 2) {
+ v8_warn("dictionary entry size (%d) is too small for a "
+ "key and value\n", V8_DICT_ENTRY_SIZE);
+ goto out;
+ }
+
+ for (i = V8_DICT_START_INDEX + V8_DICT_PREFIX_SIZE; i < ndict;
+ i += V8_DICT_ENTRY_SIZE) {
+ /*
+ * The layout here is key, value, details. (This is hardcoded
+ * in Dictionary<Shape, Key>::SetEntry().)
+ */
+ if (jsobj_is_undefined(dict[i]))
+ continue;
+
+ if (V8_IS_SMI(dict[i])) {
+ intptr_t val = V8_SMI_VALUE(dict[i]);
+
+ (void) snprintf(buf, sizeof (buf), "%ld", val);
+ } else {
+ if (jsobj_is_hole(dict[i])) {
+ /*
+ * In some cases, the key can (apparently) be a
+ * hole, in which case we skip over it.
+ */
+ continue;
+ }
+
+ if (read_typebyte(&type, dict[i]) != 0)
+ goto out;
+
+ if (!V8_TYPE_STRING(type))
+ goto out;
+
+ bufp = buf;
+ len = sizeof (buf);
+
+ if (jsstr_print(dict[i], JSSTR_NUDE, &bufp, &len) != 0)
+ goto out;
+ }
+
+ if (func(buf, dict[i + 1], arg) == -1)
+ goto out;
+ }
+
+ rval = 0;
+out:
+ mdb_free(dict, ndict * sizeof (uintptr_t));
+
+ return (rval);
+}
+
+/*
+ * Returns in "buf" a description of the type of "addr" suitable for printing.
+ */
+static int
+obj_jstype(uintptr_t addr, char **bufp, size_t *lenp, uint8_t *typep)
+{
+ uint8_t typebyte;
+ uintptr_t strptr;
+ const char *typename;
+
+ if (V8_IS_FAILURE(addr)) {
+ if (typep)
+ *typep = 0;
+ (void) bsnprintf(bufp, lenp, "'Failure' object");
+ return (0);
+ }
+
+ if (V8_IS_SMI(addr)) {
+ if (typep)
+ *typep = 0;
+ (void) bsnprintf(bufp, lenp, "SMI: value = %d",
+ V8_SMI_VALUE(addr));
+ return (0);
+ }
+
+ if (read_typebyte(&typebyte, addr) != 0)
+ return (-1);
+
+ if (typep)
+ *typep = typebyte;
+
+ typename = enum_lookup_str(v8_types, typebyte, "<unknown>");
+ (void) bsnprintf(bufp, lenp, typename);
+
+ if (strcmp(typename, "Oddball") == 0) {
+ if (read_heap_ptr(&strptr, addr,
+ V8_OFF_ODDBALL_TO_STRING) != -1) {
+ (void) bsnprintf(bufp, lenp, ": \"");
+ (void) jsstr_print(strptr, JSSTR_NUDE, bufp, lenp);
+ (void) bsnprintf(bufp, lenp, "\"");
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Print out the fields of the given object that come from the given class.
+ */
+static int
+obj_print_fields(uintptr_t baddr, v8_class_t *clp)
+{
+ v8_field_t *flp;
+ uintptr_t addr, value;
+ int rv;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next) {
+ bufp = buf;
+ len = sizeof (buf);
+
+ addr = baddr + V8_OFF_HEAP(flp->v8f_offset);
+
+ if (flp->v8f_isstr) {
+ if (mdb_readstr(buf, sizeof (buf), addr) == -1) {
+ mdb_printf("%p %s (unreadable)\n",
+ addr, flp->v8f_name);
+ continue;
+ }
+
+ mdb_printf("%p %s = \"%s\"\n",
+ addr, flp->v8f_name, buf);
+ continue;
+ }
+
+ if (flp->v8f_isbyte) {
+ uint8_t sv;
+ if (mdb_vread(&sv, sizeof (sv), addr) == -1) {
+ mdb_printf("%p %s (unreadable)\n",
+ addr, flp->v8f_name);
+ continue;
+ }
+
+ mdb_printf("%p %s = 0x%x\n", addr, flp->v8f_name, sv);
+ continue;
+ }
+
+ rv = mdb_vread((void *)&value, sizeof (value), addr);
+
+ if (rv != sizeof (value) ||
+ obj_jstype(value, &bufp, &len, &type) != 0) {
+ mdb_printf("%p %s (unreadable)\n", addr, flp->v8f_name);
+ continue;
+ }
+
+ if (type != 0 && V8_TYPE_STRING(type)) {
+ (void) bsnprintf(&bufp, &len, ": ");
+ (void) jsstr_print(value, JSSTR_QUOTED, &bufp, &len);
+ }
+
+ mdb_printf("%p %s = %p (%s)\n", addr, flp->v8f_name, value,
+ buf);
+ }
+
+ return (DCMD_OK);
+}
+
+/*
+ * Print out all fields of the given object, starting with the root of the class
+ * hierarchy and working down the most specific type.
+ */
+static int
+obj_print_class(uintptr_t addr, v8_class_t *clp)
+{
+ int rv = 0;
+
+ /*
+ * If we have no fields, we just print a simple inheritance hierarchy.
+ * If we have fields but our parent doesn't, our header includes the
+ * inheritance hierarchy.
+ */
+ if (clp->v8c_end == 0) {
+ mdb_printf("%s ", clp->v8c_name);
+
+ if (clp->v8c_parent != NULL) {
+ mdb_printf("< ");
+ (void) obj_print_class(addr, clp->v8c_parent);
+ }
+
+ return (0);
+ }
+
+ mdb_printf("%p %s", addr, clp->v8c_name);
+
+ if (clp->v8c_start == 0 && clp->v8c_parent != NULL) {
+ mdb_printf(" < ");
+ (void) obj_print_class(addr, clp->v8c_parent);
+ }
+
+ mdb_printf(" {\n");
+ (void) mdb_inc_indent(4);
+
+ if (clp->v8c_start > 0 && clp->v8c_parent != NULL)
+ rv = obj_print_class(addr, clp->v8c_parent);
+
+ rv |= obj_print_fields(addr, clp);
+ (void) mdb_dec_indent(4);
+ mdb_printf("}\n");
+
+ return (rv);
+}
+
+/*
+ * Print the ASCII string for the given JS string, expanding ConsStrings and
+ * ExternalStrings as needed.
+ */
+static int jsstr_print_seq(uintptr_t, uint_t, char **, size_t *, size_t,
+ ssize_t);
+static int jsstr_print_cons(uintptr_t, uint_t, char **, size_t *);
+static int jsstr_print_sliced(uintptr_t, uint_t, char **, size_t *);
+static int jsstr_print_external(uintptr_t, uint_t, char **, size_t *);
+
+static int
+jsstr_print(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uint8_t typebyte;
+ int err = 0;
+ char *lbufp;
+ size_t llen;
+ char buf[64];
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+
+ if (read_typebyte(&typebyte, addr) != 0) {
+ (void) bsnprintf(bufp, lenp, "<could not read type>");
+ return (-1);
+ }
+
+ if (!V8_TYPE_STRING(typebyte)) {
+ (void) bsnprintf(bufp, lenp, "<not a string>");
+ return (-1);
+ }
+
+ if (verbose) {
+ lbufp = buf;
+ llen = sizeof (buf);
+ (void) obj_jstype(addr, &lbufp, &llen, NULL);
+ mdb_printf("%s\n", buf);
+ (void) mdb_inc_indent(4);
+ }
+
+ if (JSSTR_DEPTH(flags) > JSSTR_MAXDEPTH) {
+ (void) bsnprintf(bufp, lenp, "<maximum depth exceeded>");
+ return (-1);
+ }
+
+ if (V8_STRENC_ASCII(typebyte))
+ flags |= JSSTR_ISASCII;
+ else
+ flags &= ~JSSTR_ISASCII;
+
+ flags = JSSTR_BUMPDEPTH(flags);
+
+ if (V8_STRREP_SEQ(typebyte))
+ err = jsstr_print_seq(addr, flags, bufp, lenp, 0, -1);
+ else if (V8_STRREP_CONS(typebyte))
+ err = jsstr_print_cons(addr, flags, bufp, lenp);
+ else if (V8_STRREP_EXT(typebyte))
+ err = jsstr_print_external(addr, flags, bufp, lenp);
+ else if (V8_STRREP_SLICED(typebyte))
+ err = jsstr_print_sliced(addr, flags, bufp, lenp);
+ else {
+ (void) bsnprintf(bufp, lenp, "<unknown string type>");
+ err = -1;
+ }
+
+ if (verbose)
+ (void) mdb_dec_indent(4);
+
+ return (err);
+}
+
+static int
+jsstr_print_seq(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp,
+ size_t sliceoffset, ssize_t slicelen)
+{
+ /*
+ * To allow the caller to allocate a very large buffer for strings,
+ * we'll allocate a buffer sized based on our input, making it at
+ * least enough space for our ellipsis and at most 256K.
+ */
+ uintptr_t i, nreadoffset, blen, nstrbytes, nstrchrs;
+ ssize_t nreadbytes;
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ char *buf;
+ uint16_t chrval;
+
+ if ((blen = MIN(*lenp, 256 * 1024)) == 0)
+ return (0);
+
+ buf = alloca(blen);
+
+ if (read_heap_smi(&nstrchrs, addr, V8_OFF_STRING_LENGTH) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<string (failed to read length)>");
+ return (-1);
+ }
+
+ if (slicelen != -1)
+ nstrchrs = slicelen;
+
+ if ((flags & JSSTR_ISASCII) != 0) {
+ nstrbytes = nstrchrs;
+ nreadoffset = sliceoffset;
+ } else {
+ nstrbytes = 2 * nstrchrs;
+ nreadoffset = 2 * sliceoffset;
+ }
+
+ nreadbytes = nstrbytes + sizeof ("\"\"") <= blen ? nstrbytes :
+ blen - sizeof ("\"\"[...]");
+
+ if (nreadbytes < 0) {
+ /*
+ * We don't even have the room to store the ellipsis; zero
+ * the buffer out and set the length to zero.
+ */
+ *bufp = '\0';
+ *lenp = 0;
+ return (0);
+ }
+
+ if (verbose)
+ mdb_printf("length: %d chars (%d bytes), "
+ "will read %d bytes from offset %d\n",
+ nstrchrs, nstrbytes, nreadbytes, nreadoffset);
+
+ if (nstrbytes == 0) {
+ (void) bsnprintf(bufp, lenp, "%s%s",
+ quoted ? "\"" : "", quoted ? "\"" : "");
+ return (0);
+ }
+
+ buf[0] = '\0';
+
+ if ((flags & JSSTR_ISASCII) != 0) {
+ if (mdb_readstr(buf, nreadbytes + 1,
+ addr + V8_OFF_SEQASCIISTR_CHARS + nreadoffset) == -1) {
+ v8_warn("failed to read SeqString data");
+ return (-1);
+ }
+
+ if (nreadbytes != nstrbytes)
+ (void) strlcat(buf, "[...]", blen);
+
+ (void) bsnprintf(bufp, lenp, "%s%s%s",
+ quoted ? "\"" : "", buf, quoted ? "\"" : "");
+ } else {
+ if (mdb_readstr(buf, nreadbytes,
+ addr + V8_OFF_SEQTWOBYTESTR_CHARS + nreadoffset) == -1) {
+ v8_warn("failed to read SeqTwoByteString data");
+ return (-1);
+ }
+
+ (void) bsnprintf(bufp, lenp, "%s", quoted ? "\"" : "");
+ for (i = 0; i < nreadbytes; i += 2) {
+ /*LINTED*/
+ chrval = *((uint16_t *)(buf + i));
+ (void) bsnprintf(bufp, lenp, "%c",
+ (isascii(chrval) || chrval == 0) ?
+ (char)chrval : '?');
+ }
+ if (nreadbytes != nstrbytes)
+ (void) bsnprintf(bufp, lenp, "[...]");
+ (void) bsnprintf(bufp, lenp, "%s", quoted ? "\"" : "");
+ }
+
+ return (0);
+}
+
+static int
+jsstr_print_cons(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ uintptr_t ptr1, ptr2;
+
+ if (read_heap_ptr(&ptr1, addr, V8_OFF_CONSSTRING_FIRST) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<cons string (failed to read first)>");
+ return (-1);
+ }
+
+ if (read_heap_ptr(&ptr2, addr, V8_OFF_CONSSTRING_SECOND) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<cons string (failed to read second)>");
+ return (-1);
+ }
+
+ if (verbose) {
+ mdb_printf("ptr1: %p\n", ptr1);
+ mdb_printf("ptr2: %p\n", ptr2);
+ }
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ flags = JSSTR_BUMPDEPTH(flags) & ~JSSTR_QUOTED;
+
+ if (jsstr_print(ptr1, flags, bufp, lenp) != 0)
+ return (-1);
+
+ if (jsstr_print(ptr2, flags, bufp, lenp) != 0)
+ return (-1);
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ return (0);
+}
+
+static int
+jsstr_print_sliced(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uintptr_t parent, offset, length;
+ uint8_t typebyte;
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+
+ if (read_heap_ptr(&parent, addr, V8_OFF_SLICEDSTRING_PARENT) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read parent)>");
+ return (-1);
+ }
+
+ if (read_heap_smi(&offset, addr, V8_OFF_SLICEDSTRING_OFFSET) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read offset)>");
+ return (-1);
+ }
+
+ if (read_heap_smi(&length, addr, V8_OFF_STRING_LENGTH) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read length)>");
+ return (-1);
+ }
+
+ if (verbose)
+ mdb_printf("parent: %p, offset = %d, length = %d\n",
+ parent, offset, length);
+
+ if (read_typebyte(&typebyte, parent) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read parent type)>");
+ return (0);
+ }
+
+ if (!V8_STRREP_SEQ(typebyte)) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (parent is not a sequential string)>");
+ return (0);
+ }
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ flags = JSSTR_BUMPDEPTH(flags) & ~JSSTR_QUOTED;
+
+ if (V8_STRENC_ASCII(typebyte))
+ flags |= JSSTR_ISASCII;
+
+ if (jsstr_print_seq(parent, flags, bufp, lenp, offset, length) != 0)
+ return (-1);
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ return (0);
+}
+
+static int
+jsstr_print_external(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uintptr_t ptr1, ptr2;
+ size_t blen = *lenp + 1;
+ char *buf;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ int rval = -1;
+
+ if ((flags & JSSTR_ISASCII) == 0) {
+ (void) bsnprintf(bufp, lenp, "<external two-byte string>");
+ return (0);
+ }
+
+ if (flags & JSSTR_VERBOSE)
+ mdb_printf("assuming Node.js string\n");
+
+ if (read_heap_ptr(&ptr1, addr, V8_OFF_EXTERNALSTRING_RESOURCE) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<external string (failed to read resource)>");
+ return (-1);
+ }
+
+ if (mdb_vread(&ptr2, sizeof (ptr2),
+ ptr1 + NODE_OFF_EXTSTR_DATA) == -1) {
+ (void) bsnprintf(bufp, lenp, "<external string (failed to "
+ "read node external pointer %p)>",
+ ptr1 + NODE_OFF_EXTSTR_DATA);
+ return (-1);
+ }
+
+ buf = mdb_alloc(blen, UM_SLEEP);
+
+ if (mdb_readstr(buf, blen, ptr2) == -1) {
+ (void) bsnprintf(bufp, lenp, "<external string "
+ "(failed to read ExternalString data)>");
+ goto out;
+ }
+
+ if (buf[0] != '\0' && !isascii(buf[0])) {
+ (void) bsnprintf(bufp, lenp, "<external string "
+ "(failed to read ExternalString ascii data)>");
+ goto out;
+ }
+
+ (void) bsnprintf(bufp, lenp, "%s%s%s",
+ quoted ? "\"" : "", buf, quoted ? "\"" : "");
+
+ rval = 0;
+out:
+ mdb_free(buf, blen);
+
+ return (rval);
+}
+
+/*
+ * Returns true if the given address refers to the named oddball object (e.g.
+ * "undefined"). Returns false on failure (since we shouldn't fail on the
+ * actual "undefined" value).
+ */
+static boolean_t
+jsobj_is_oddball(uintptr_t addr, char *oddball)
+{
+ uint8_t type;
+ uintptr_t strptr;
+ const char *typename;
+ char buf[16];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+
+ v8_silent++;
+
+ if (read_typebyte(&type, addr) != 0) {
+ v8_silent--;
+ return (B_FALSE);
+ }
+
+ v8_silent--;
+ typename = enum_lookup_str(v8_types, type, "<unknown>");
+ if (strcmp(typename, "Oddball") != 0)
+ return (B_FALSE);
+
+ if (read_heap_ptr(&strptr, addr, V8_OFF_ODDBALL_TO_STRING) == -1)
+ return (B_FALSE);
+
+ if (jsstr_print(strptr, JSSTR_NUDE, &bufp, &len) != 0)
+ return (B_FALSE);
+
+ return (strcmp(buf, oddball) == 0);
+}
+
+static boolean_t
+jsobj_is_undefined(uintptr_t addr)
+{
+ return (jsobj_is_oddball(addr, "undefined"));
+}
+
+static boolean_t
+jsobj_is_hole(uintptr_t addr)
+{
+ return (jsobj_is_oddball(addr, "hole"));
+}
+
+static int
+jsobj_properties(uintptr_t addr,
+ int (*func)(const char *, uintptr_t, void *), void *arg)
+{
+ uintptr_t ptr, map, elements;
+ uintptr_t *props = NULL, *descs = NULL, *content = NULL, *trans, *elts;
+ size_t size, nprops, ndescs, ncontent, ntrans, len;
+ ssize_t ii, rndescs;
+ uint8_t type, ninprops;
+ int rval = -1;
+ size_t ps = sizeof (uintptr_t);
+ ssize_t off;
+
+ /*
+ * Objects have either "fast" properties represented with a FixedArray
+ * or slow properties represented with a Dictionary.
+ */
+ if (mdb_vread(&ptr, ps, addr + V8_OFF_JSOBJECT_PROPERTIES) == -1)
+ return (-1);
+
+ if (read_typebyte(&type, ptr) != 0)
+ return (-1);
+
+ if (type != V8_TYPE_FIXEDARRAY) {
+ /*
+ * If our properties aren't a fixed array, we'll emit a member
+ * that contains the type name, but with a NULL value.
+ */
+ char buf[256];
+
+ (void) mdb_snprintf(buf, sizeof (buf), "<%s>",
+ enum_lookup_str(v8_types, type, "unknown"));
+
+ return (func(buf, NULL, arg));
+ }
+
+ /*
+ * To iterate the properties, we need to examine the instance
+ * descriptors of the associated Map object. Depending on the version
+ * of V8, this might be found directly from the map -- or indirectly
+ * via the transitions array.
+ */
+ if (mdb_vread(&map, ps, addr + V8_OFF_HEAPOBJECT_MAP) == -1)
+ goto err;
+
+ /*
+ * Check to see if our elements member is an array and non-zero; if
+ * so, it contains numerically-named properties.
+ */
+ if (V8_ELEMENTS_KIND_SHIFT != -1 &&
+ read_heap_ptr(&elements, addr, V8_OFF_JSOBJECT_ELEMENTS) == 0 &&
+ read_heap_array(elements, &elts, &len, UM_SLEEP) == 0 && len != 0) {
+ uint8_t bit_field2, kind;
+ size_t sz = len * sizeof (uintptr_t);
+
+ if (mdb_vread(&bit_field2, sizeof (bit_field2),
+ map + V8_OFF_MAP_BIT_FIELD2) == -1) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+
+ kind = bit_field2 >> V8_ELEMENTS_KIND_SHIFT;
+ kind &= (1 << V8_ELEMENTS_KIND_BITCOUNT) - 1;
+
+ if (kind == V8_ELEMENTS_FAST_ELEMENTS ||
+ kind == V8_ELEMENTS_FAST_HOLEY_ELEMENTS) {
+ for (ii = 0; ii < len; ii++) {
+ char name[10];
+
+ if (kind == V8_ELEMENTS_FAST_HOLEY_ELEMENTS &&
+ jsobj_is_hole(elts[ii]))
+ continue;
+
+ snprintf(name, sizeof (name), "%d", ii);
+
+ if (func(name, elts[ii], arg) != 0) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+ }
+ } else if (kind == V8_ELEMENTS_DICTIONARY_ELEMENTS) {
+ if (read_heap_dict(elements, func, arg) != 0) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+ }
+
+ mdb_free(elts, sz);
+ }
+
+ if (V8_DICT_SHIFT != -1) {
+ uintptr_t bit_field3;
+
+ if (mdb_vread(&bit_field3, sizeof (bit_field3),
+ map + V8_OFF_MAP_BIT_FIELD3) == -1)
+ goto err;
+
+ if (V8_SMI_VALUE(bit_field3) & (1 << V8_DICT_SHIFT))
+ return (read_heap_dict(ptr, func, arg));
+ } else if (V8_OFF_MAP_INSTANCE_DESCRIPTORS != -1) {
+ uintptr_t bit_field3;
+
+ if (mdb_vread(&bit_field3, sizeof (bit_field3),
+ map + V8_OFF_MAP_INSTANCE_DESCRIPTORS) == -1)
+ goto err;
+
+ if (V8_SMI_VALUE(bit_field3) == (1 << V8_ISSHARED_SHIFT)) {
+ /*
+ * On versions of V8 prior to that used in 0.10,
+ * the instance descriptors were overloaded to also
+ * be bit_field3 -- and there was no way from that
+ * field to infer a dictionary type. Because we
+ * can't determine if the map is actually the
+ * hash_table_map, we assume that if it's an object
+ * that has kIsShared set, that it is in fact a
+ * dictionary -- an assumption that is assuredly in
+ * error in some cases.
+ */
+ return (read_heap_dict(ptr, func, arg));
+ }
+ }
+
+ if (read_heap_array(ptr, &props, &nprops, UM_SLEEP) != 0)
+ goto err;
+
+ if ((off = V8_OFF_MAP_INSTANCE_DESCRIPTORS) == -1) {
+ if (V8_OFF_MAP_TRANSITIONS == -1 ||
+ V8_TRANSITIONS_IDX_DESC == -1 ||
+ V8_PROP_IDX_CONTENT != -1) {
+ mdb_warn("missing instance_descriptors, but did "
+ "not find expected transitions array metadata; "
+ "cannot read properties\n");
+ goto err;
+ }
+
+ off = V8_OFF_MAP_TRANSITIONS;
+ }
+
+ if (mdb_vread(&ptr, ps, map + off) == -1)
+ goto err;
+
+ if (V8_OFF_MAP_INSTANCE_DESCRIPTORS == -1) {
+ if (read_heap_array(ptr, &trans, &ntrans, UM_SLEEP) != 0)
+ goto err;
+
+ ptr = trans[V8_TRANSITIONS_IDX_DESC];
+ mdb_free(trans, ntrans * sizeof (uintptr_t));
+ }
+
+ if (read_heap_array(ptr, &descs, &ndescs, UM_SLEEP) != 0)
+ goto err;
+
+ if (read_size(&size, addr) != 0)
+ size = 0;
+
+ if (mdb_vread(&ninprops, 1, map + V8_OFF_MAP_INOBJECT_PROPERTIES) == -1)
+ goto err;
+
+ if (V8_PROP_IDX_CONTENT != -1 && V8_PROP_IDX_CONTENT < ndescs &&
+ read_heap_array(descs[V8_PROP_IDX_CONTENT], &content,
+ &ncontent, UM_SLEEP) != 0)
+ goto err;
+
+ if (V8_PROP_IDX_CONTENT == -1) {
+ /*
+ * On node v0.8 and later, the content is not stored in an
+ * orthogonal FixedArray, but rather with the descriptors.
+ */
+ content = descs;
+ ncontent = ndescs;
+ rndescs = ndescs > V8_PROP_IDX_FIRST ?
+ (ndescs - V8_PROP_IDX_FIRST) / V8_PROP_DESC_SIZE : 0;
+ } else {
+ rndescs = ndescs - V8_PROP_IDX_FIRST;
+ }
+
+ for (ii = 0; ii < rndescs; ii++) {
+ uintptr_t keyidx, validx, detidx, baseidx;
+ char buf[1024];
+ intptr_t val;
+ size_t len = sizeof (buf);
+ char *c = buf;
+
+ if (V8_PROP_IDX_CONTENT != -1) {
+ /*
+ * In node versions prior to v0.8, this was hardcoded
+ * in the V8 implementation, so we hardcode it here
+ * as well.
+ */
+ keyidx = ii + V8_PROP_IDX_FIRST;
+ validx = ii << 1;
+ detidx = (ii << 1) + 1;
+ } else {
+ baseidx = V8_PROP_IDX_FIRST + (ii * V8_PROP_DESC_SIZE);
+ keyidx = baseidx + V8_PROP_DESC_KEY;
+ validx = baseidx + V8_PROP_DESC_VALUE;
+ detidx = baseidx + V8_PROP_DESC_DETAILS;
+ }
+
+ if (detidx >= ncontent) {
+ v8_warn("property descriptor %d: detidx (%d) "
+ "out of bounds for content array (length %d)\n",
+ ii, detidx, ncontent);
+ continue;
+ }
+
+ if (!V8_DESC_ISFIELD(content[detidx]))
+ continue;
+
+ if (keyidx >= ndescs) {
+ v8_warn("property descriptor %d: keyidx (%d) "
+ "out of bounds for descriptor array (length %d)\n",
+ ii, keyidx, ndescs);
+ continue;
+ }
+
+ if (jsstr_print(descs[keyidx], JSSTR_NUDE, &c, &len) != 0)
+ continue;
+
+ val = (intptr_t)content[validx];
+
+ if (!V8_IS_SMI(val)) {
+ v8_warn("object %p: property descriptor %d: value "
+ "index value is not an SMI: %p\n", addr, ii, val);
+ continue;
+ }
+
+ val = V8_SMI_VALUE(val) - ninprops;
+
+ if (val < 0) {
+ /* property is stored directly in the object */
+ if (mdb_vread(&ptr, sizeof (ptr), addr + V8_OFF_HEAP(
+ size + val * sizeof (uintptr_t))) == -1) {
+ v8_warn("object %p: failed to read in-object "
+ "property at %p\n", addr, addr +
+ V8_OFF_HEAP(size + val *
+ sizeof (uintptr_t)));
+ continue;
+ }
+ } else {
+ /* property should be in "props" array */
+ if (val >= nprops) {
+ /*
+ * This can happen when properties are deleted.
+ * If this value isn't obviously corrupt, we'll
+ * just silently ignore it.
+ */
+ if (val < rndescs)
+ continue;
+
+ v8_warn("object %p: property descriptor %d: "
+ "value index value (%d) out of bounds "
+ "(%d)\n", addr, ii, val, nprops);
+ goto err;
+ }
+
+ ptr = props[val];
+ }
+
+ if (func(buf, ptr, arg) != 0)
+ goto err;
+ }
+
+ rval = 0;
+err:
+ if (props != NULL)
+ mdb_free(props, nprops * sizeof (uintptr_t));
+
+ if (descs != NULL)
+ mdb_free(descs, ndescs * sizeof (uintptr_t));
+
+ if (content != NULL && V8_PROP_IDX_CONTENT != -1)
+ mdb_free(content, ncontent * sizeof (uintptr_t));
+
+ return (rval);
+}
+
+/*
+ * Given the line endings table in "lendsp", computes the line number for the
+ * given token position and print the result into "buf". If "lendsp" is
+ * undefined, prints the token position instead.
+ */
+static int
+jsfunc_lineno(uintptr_t lendsp, uintptr_t tokpos,
+ char *buf, size_t buflen, int *lineno)
+{
+ uintptr_t size, bufsz, lower, upper, ii = 0;
+ uintptr_t *data;
+
+ if (lineno != NULL)
+ *lineno = -1;
+
+ if (jsobj_is_undefined(lendsp)) {
+ /*
+ * The token position is an SMI, but it comes in as its raw
+ * value so we can more easily compare it to values in the line
+ * endings table. If we're just printing the position directly,
+ * we must convert it here.
+ */
+ mdb_snprintf(buf, buflen, "position %d", V8_SMI_VALUE(tokpos));
+
+ if (lineno != NULL)
+ *lineno = 0;
+
+ return (0);
+ }
+
+ if (read_heap_smi(&size, lendsp, V8_OFF_FIXEDARRAY_LENGTH) != 0)
+ return (-1);
+
+ bufsz = size * sizeof (data[0]);
+
+ if ((data = mdb_alloc(bufsz, UM_NOSLEEP)) == NULL) {
+ v8_warn("failed to alloc %d bytes for FixedArray data", bufsz);
+ return (-1);
+ }
+
+ if (mdb_vread(data, bufsz, lendsp + V8_OFF_FIXEDARRAY_DATA) != bufsz) {
+ v8_warn("failed to read FixedArray data");
+ mdb_free(data, bufsz);
+ return (-1);
+ }
+
+ lower = 0;
+ upper = size - 1;
+
+ if (tokpos > data[upper]) {
+ (void) strlcpy(buf, "position out of range", buflen);
+ mdb_free(data, bufsz);
+
+ if (lineno != NULL)
+ *lineno = 0;
+
+ return (0);
+ }
+
+ if (tokpos <= data[0]) {
+ (void) strlcpy(buf, "line 1", buflen);
+ mdb_free(data, bufsz);
+
+ if (lineno != NULL)
+ *lineno = 1;
+
+ return (0);
+ }
+
+ while (upper >= 1) {
+ ii = (lower + upper) >> 1;
+ if (tokpos > data[ii])
+ lower = ii + 1;
+ else if (tokpos <= data[ii - 1])
+ upper = ii - 1;
+ else
+ break;
+ }
+
+ if (lineno != NULL)
+ *lineno = ii + 1;
+
+ (void) mdb_snprintf(buf, buflen, "line %d", ii + 1);
+ mdb_free(data, bufsz);
+ return (0);
+}
+
+/*
+ * Given a Script object, prints nlines on either side of lineno, with each
+ * line prefixed by prefix (if non-NULL).
+ */
+static void
+jsfunc_lines(uintptr_t scriptp,
+ uintptr_t start, uintptr_t end, int nlines, char *prefix)
+{
+ uintptr_t src;
+ char *buf, *bufp;
+ size_t bufsz = 1024, len;
+ int i, line, slop = 10;
+ boolean_t newline = B_TRUE;
+ int startline = -1, endline = -1;
+
+ if (read_heap_ptr(&src, scriptp, V8_OFF_SCRIPT_SOURCE) != 0)
+ return;
+
+ for (;;) {
+ if ((buf = mdb_zalloc(bufsz, UM_NOSLEEP)) == NULL) {
+ mdb_warn("failed to allocate source code "
+ "buffer of size %d", bufsz);
+ return;
+ }
+
+ bufp = buf;
+ len = bufsz;
+
+ if (jsstr_print(src, JSSTR_NUDE, &bufp, &len) != 0) {
+ mdb_free(buf, bufsz);
+ return;
+ }
+
+ if (len > slop)
+ break;
+
+ mdb_free(buf, bufsz);
+ bufsz <<= 1;
+ }
+
+ if (end >= bufsz)
+ return;
+
+ /*
+ * First, take a pass to determine where our lines actually start.
+ */
+ for (i = 0, line = 1; buf[i] != '\0'; i++) {
+ if (buf[i] == '\n')
+ line++;
+
+ if (i == start)
+ startline = line;
+
+ if (i == end) {
+ endline = line;
+ break;
+ }
+ }
+
+ if (startline == -1 || endline == -1) {
+ mdb_warn("for script %p, could not determine startline/endline"
+ " (start %ld, end %ld, nlines %d)",
+ scriptp, start, end, nlines);
+ mdb_free(buf, bufsz);
+ return;
+ }
+
+ for (i = 0, line = 1; buf[i] != '\0'; i++) {
+ if (buf[i] == '\n') {
+ line++;
+ newline = B_TRUE;
+ }
+
+ if (line < startline - nlines)
+ continue;
+
+ if (line > endline + nlines)
+ break;
+
+ mdb_printf("%c", buf[i]);
+
+ if (newline) {
+ if (line >= startline && line <= endline)
+ mdb_printf("%<b>");
+
+ if (prefix != NULL)
+ mdb_printf(prefix, line);
+
+ if (line >= startline && line <= endline)
+ mdb_printf("%</b>");
+
+ newline = B_FALSE;
+ }
+ }
+
+ mdb_printf("\n");
+
+ if (line == endline)
+ mdb_printf("%</b>");
+
+ mdb_free(buf, bufsz);
+}
+
+/*
+ * Given a SharedFunctionInfo object, prints into bufp a name of the function
+ * suitable for printing. This function attempts to infer a name for anonymous
+ * functions.
+ */
+static int
+jsfunc_name(uintptr_t funcinfop, char **bufp, size_t *lenp)
+{
+ uintptr_t ptrp;
+ char *bufs = *bufp;
+
+ if (read_heap_ptr(&ptrp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_NAME) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<function (failed to read SharedFunctionInfo)>");
+ return (-1);
+ }
+
+ if (jsstr_print(ptrp, JSSTR_NUDE, bufp, lenp) != 0)
+ return (-1);
+
+ if (*bufp != bufs)
+ return (0);
+
+ if (read_heap_ptr(&ptrp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME) != 0) {
+ (void) bsnprintf(bufp, lenp, "<anonymous>");
+ return (0);
+ }
+
+ (void) bsnprintf(bufp, lenp, "<anonymous> (as ");
+ bufs = *bufp;
+
+ if (jsstr_print(ptrp, JSSTR_NUDE, bufp, lenp) != 0)
+ return (-1);
+
+ if (*bufp == bufs)
+ (void) bsnprintf(bufp, lenp, "<anon>");
+
+ (void) bsnprintf(bufp, lenp, ")");
+
+ return (0);
+}
+
+/*
+ * JavaScript-level object printing
+ */
+typedef struct jsobj_print {
+ char **jsop_bufp;
+ size_t *jsop_lenp;
+ int jsop_indent;
+ uint64_t jsop_depth;
+ boolean_t jsop_printaddr;
+ uintptr_t jsop_baseaddr;
+ int jsop_nprops;
+ const char *jsop_member;
+ boolean_t jsop_found;
+ boolean_t jsop_descended;
+} jsobj_print_t;
+
+static int jsobj_print_number(uintptr_t, jsobj_print_t *);
+static int jsobj_print_oddball(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsobject(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsarray(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsfunction(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsdate(uintptr_t, jsobj_print_t *);
+
+static int
+jsobj_print(uintptr_t addr, jsobj_print_t *jsop)
+{
+ uint8_t type;
+ const char *klass;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ const struct {
+ char *name;
+ int (*func)(uintptr_t, jsobj_print_t *);
+ } table[] = {
+ { "HeapNumber", jsobj_print_number },
+ { "Oddball", jsobj_print_oddball },
+ { "JSObject", jsobj_print_jsobject },
+ { "JSArray", jsobj_print_jsarray },
+ { "JSFunction", jsobj_print_jsfunction },
+ { "JSDate", jsobj_print_jsdate },
+ { NULL }
+ }, *ent;
+
+ if (jsop->jsop_baseaddr != NULL && jsop->jsop_member == NULL)
+ (void) bsnprintf(bufp, lenp, "%p: ", jsop->jsop_baseaddr);
+
+ if (jsop->jsop_printaddr && jsop->jsop_member == NULL)
+ (void) bsnprintf(bufp, lenp, "%p: ", addr);
+
+ if (V8_IS_SMI(addr)) {
+ (void) bsnprintf(bufp, lenp, "%d", V8_SMI_VALUE(addr));
+ return (0);
+ }
+
+ if (!V8_IS_HEAPOBJECT(addr)) {
+ (void) bsnprintf(bufp, lenp, "<not a heap object>");
+ return (-1);
+ }
+
+ if (read_typebyte(&type, addr) != 0) {
+ (void) bsnprintf(bufp, lenp, "<couldn't read type>");
+ return (-1);
+ }
+
+ if (V8_TYPE_STRING(type)) {
+ if (jsstr_print(addr, JSSTR_QUOTED, bufp, lenp) == -1)
+ return (-1);
+
+ return (0);
+ }
+
+ klass = enum_lookup_str(v8_types, type, "<unknown>");
+
+ for (ent = &table[0]; ent->name != NULL; ent++) {
+ if (strcmp(klass, ent->name) == 0) {
+ jsop->jsop_descended = B_TRUE;
+ return (ent->func(addr, jsop));
+ }
+ }
+
+ (void) bsnprintf(bufp, lenp,
+ "<unknown JavaScript object type \"%s\">", klass);
+ return (-1);
+}
+
+static int
+jsobj_print_number(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ double numval;
+
+ if (read_heap_double(&numval, addr, V8_OFF_HEAPNUMBER_VALUE) == -1)
+ return (-1);
+
+ if (numval == (long long)numval)
+ (void) bsnprintf(bufp, lenp, "%lld", (long long)numval);
+ else
+ (void) bsnprintf(bufp, lenp, "%e", numval);
+
+ return (0);
+}
+
+static int
+jsobj_print_oddball(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ uintptr_t strptr;
+
+ if (read_heap_ptr(&strptr, addr, V8_OFF_ODDBALL_TO_STRING) != 0)
+ return (-1);
+
+ return (jsstr_print(strptr, JSSTR_NUDE, bufp, lenp));
+}
+
+static int
+jsobj_print_prop(const char *desc, uintptr_t val, void *arg)
+{
+ jsobj_print_t *jsop = arg, descend;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ (void) bsnprintf(bufp, lenp, "%s\n%*s%s: ", jsop->jsop_nprops == 0 ?
+ "{" : "", jsop->jsop_indent + 4, "", desc);
+
+ descend = *jsop;
+ descend.jsop_depth--;
+ descend.jsop_indent += 4;
+
+ (void) jsobj_print(val, &descend);
+ (void) bsnprintf(bufp, lenp, ",");
+
+ jsop->jsop_nprops++;
+
+ return (0);
+}
+
+static int
+jsobj_print_prop_member(const char *desc, uintptr_t val, void *arg)
+{
+ jsobj_print_t *jsop = arg, descend;
+ const char *member = jsop->jsop_member, *next = member;
+ int rv;
+
+ for (; *next != '\0' && *next != '.' && *next != '['; next++)
+ continue;
+
+ if (*member == '[') {
+ mdb_warn("cannot use array indexing on an object\n");
+ return (-1);
+ }
+
+ if (strncmp(member, desc, next - member) != 0)
+ return (0);
+
+ if (desc[next - member] != '\0')
+ return (0);
+
+ /*
+ * This property matches the desired member; descend.
+ */
+ descend = *jsop;
+
+ if (*next == '\0') {
+ descend.jsop_member = NULL;
+ descend.jsop_found = B_TRUE;
+ } else {
+ descend.jsop_member = *next == '.' ? next + 1 : next;
+ }
+
+ rv = jsobj_print(val, &descend);
+ jsop->jsop_found = descend.jsop_found;
+
+ return (rv);
+}
+
+static int
+jsobj_print_jsobject(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ if (jsop->jsop_member != NULL)
+ return (jsobj_properties(addr, jsobj_print_prop_member, jsop));
+
+ if (jsop->jsop_depth == 0) {
+ (void) bsnprintf(bufp, lenp, "[...]");
+ return (0);
+ }
+
+ jsop->jsop_nprops = 0;
+
+ if (jsobj_properties(addr, jsobj_print_prop, jsop) != 0)
+ return (-1);
+
+ if (jsop->jsop_nprops > 0) {
+ (void) bsnprintf(bufp, lenp, "\n%*s", jsop->jsop_indent, "");
+ } else if (jsop->jsop_nprops == 0) {
+ (void) bsnprintf(bufp, lenp, "{");
+ } else {
+ (void) bsnprintf(bufp, lenp, "{ /* unknown property */ ");
+ }
+
+ (void) bsnprintf(bufp, lenp, "}");
+
+ return (0);
+}
+
+static int
+jsobj_print_jsarray_member(uintptr_t addr, jsobj_print_t *jsop)
+{
+ uintptr_t *elts;
+ jsobj_print_t descend;
+ uintptr_t ptr;
+ const char *member = jsop->jsop_member, *end, *p;
+ size_t elt = 0, place = 1, len, rv;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array member (failed to read elements)>");
+ return (-1);
+ }
+
+ if (read_heap_array(ptr, &elts, &len, UM_SLEEP | UM_GC) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array member (failed to read array)>");
+ return (-1);
+ }
+
+ if (*member != '[') {
+ mdb_warn("expected bracketed array index; "
+ "found '%s'\n", member);
+ return (-1);
+ }
+
+ if ((end = strchr(member, ']')) == NULL) {
+ mdb_warn("missing array index terminator\n");
+ return (-1);
+ }
+
+ /*
+ * We know where our array index ends; convert it to an integer
+ * by stepping through it from least significant digit to most.
+ */
+ for (p = end - 1; p > member; p--) {
+ if (*p < '0' || *p > '9') {
+ mdb_warn("illegal array index at '%c'\n", *p);
+ return (-1);
+ }
+
+ elt += (*p - '0') * place;
+ place *= 10;
+ }
+
+ if (place == 1) {
+ mdb_warn("missing array index\n");
+ return (-1);
+ }
+
+ if (elt >= len) {
+ mdb_warn("array index %d exceeds size of %d\n", elt, len);
+ return (-1);
+ }
+
+ descend = *jsop;
+
+ switch (*(++end)) {
+ case '\0':
+ descend.jsop_member = NULL;
+ descend.jsop_found = B_TRUE;
+ break;
+
+ case '.':
+ descend.jsop_member = end + 1;
+ break;
+
+ case '[':
+ descend.jsop_member = end;
+ break;
+
+ default:
+ mdb_warn("illegal character '%c' following "
+ "array index terminator\n", *end);
+ return (-1);
+ }
+
+ rv = jsobj_print(elts[elt], &descend);
+ jsop->jsop_found = descend.jsop_found;
+
+ return (rv);
+}
+
+static int
+jsobj_print_jsarray(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ int indent = jsop->jsop_indent;
+ jsobj_print_t descend;
+ uintptr_t ptr;
+ uintptr_t *elts;
+ size_t ii, len;
+
+ if (jsop->jsop_member != NULL)
+ return (jsobj_print_jsarray_member(addr, jsop));
+
+ if (jsop->jsop_depth == 0) {
+ (void) bsnprintf(bufp, lenp, "[...]");
+ return (0);
+ }
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array (failed to read elements)>");
+ return (-1);
+ }
+
+ if (read_heap_array(ptr, &elts, &len, UM_SLEEP | UM_GC) != 0) {
+ (void) bsnprintf(bufp, lenp, "<array (failed to read array)>");
+ return (-1);
+ }
+
+ if (len == 0) {
+ (void) bsnprintf(bufp, lenp, "[]");
+ return (0);
+ }
+
+ descend = *jsop;
+ descend.jsop_depth--;
+ descend.jsop_indent += 4;
+
+ if (len == 1) {
+ (void) bsnprintf(bufp, lenp, "[ ");
+ (void) jsobj_print(elts[0], &descend);
+ (void) bsnprintf(bufp, lenp, " ]");
+ return (0);
+ }
+
+ (void) bsnprintf(bufp, lenp, "[\n");
+
+ for (ii = 0; ii < len && *lenp > 0; ii++) {
+ (void) bsnprintf(bufp, lenp, "%*s", indent + 4, "");
+ (void) jsobj_print(elts[ii], &descend);
+ (void) bsnprintf(bufp, lenp, ",\n");
+ }
+
+ (void) bsnprintf(bufp, lenp, "%*s", indent, "");
+ (void) bsnprintf(bufp, lenp, "]");
+
+ return (0);
+}
+
+static int
+jsobj_print_jsfunction(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ uintptr_t shared;
+
+ if (read_heap_ptr(&shared, addr, V8_OFF_JSFUNCTION_SHARED) != 0)
+ return (-1);
+
+ (void) bsnprintf(bufp, lenp, "function ");
+ return (jsfunc_name(shared, bufp, lenp) != 0);
+}
+
+static int
+jsobj_print_jsdate(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ char buf[128];
+ uintptr_t value;
+ uint8_t type;
+ double numval;
+
+ if (V8_OFF_JSDATE_VALUE == -1) {
+ (void) bsnprintf(bufp, lenp, "<JSDate>", buf);
+ return (0);
+ }
+
+ if (read_heap_ptr(&value, addr, V8_OFF_JSDATE_VALUE) != 0) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read value)>");
+ return (-1);
+ }
+
+ if (read_typebyte(&type, value) != 0) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read type)>");
+ return (-1);
+ }
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "HeapNumber") != 0)
+ return (-1);
+
+ if (read_heap_double(&numval, value, V8_OFF_HEAPNUMBER_VALUE) == -1) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read num)>");
+ return (-1);
+ }
+
+ mdb_snprintf(buf, sizeof (buf), "%Y",
+ (time_t)((long long)numval / MILLISEC));
+ (void) bsnprintf(bufp, lenp, "%lld (%s)", (long long)numval, buf);
+
+ return (0);
+}
+
+/*
+ * dcmd implementations
+ */
+
+/* ARGSUSED */
+static int
+dcmd_v8classes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_class_t *clp;
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next)
+ mdb_printf("%s\n", clp->v8c_name);
+
+ return (DCMD_OK);
+}
+
+static int
+do_v8code(uintptr_t addr, boolean_t opt_d)
+{
+ uintptr_t instrlen;
+ ssize_t instroff = V8_OFF_CODE_INSTRUCTION_START;
+
+ if (read_heap_ptr(&instrlen, addr, V8_OFF_CODE_INSTRUCTION_SIZE) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("code: %p\n", addr);
+ mdb_printf("instructions: [%p, %p)\n", addr + instroff,
+ addr + instroff + instrlen);
+
+ if (!opt_d)
+ return (DCMD_OK);
+
+ mdb_set_dot(addr + instroff);
+
+ do {
+ (void) mdb_inc_indent(8); /* gets reset by mdb_eval() */
+
+ /*
+ * This is absolutely awful. We want to disassemble the above
+ * range of instructions. Because we don't know how many there
+ * are, we can't use "::dis". We resort to evaluating "./i",
+ * but then we need to advance "." by the size of the
+ * instruction just printed. The only way to do that is by
+ * printing out "+", but we don't want that to show up, so we
+ * redirect it to /dev/null.
+ */
+ if (mdb_eval("/i") != 0 ||
+ mdb_eval("+=p ! cat > /dev/null") != 0) {
+ (void) mdb_dec_indent(8);
+ v8_warn("failed to disassemble at %p", mdb_get_dot());
+ return (DCMD_ERR);
+ }
+ } while (mdb_get_dot() < addr + instroff + instrlen);
+
+ (void) mdb_dec_indent(8);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8code(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ boolean_t opt_d = B_FALSE;
+
+ if (mdb_getopts(argc, argv, 'd', MDB_OPT_SETBITS, B_TRUE, &opt_d,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ return (do_v8code(addr, opt_d));
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8function(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint8_t type;
+ uintptr_t funcinfop, scriptp, lendsp, tokpos, namep, codep;
+ char *bufp;
+ size_t len;
+ boolean_t opt_d = B_FALSE;
+ char buf[512];
+
+ if (mdb_getopts(argc, argv, 'd', MDB_OPT_SETBITS, B_TRUE, &opt_d,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ v8_warnings++;
+
+ if (read_typebyte(&type, addr) != 0)
+ goto err;
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "JSFunction") != 0) {
+ v8_warn("%p is not an instance of JSFunction\n", addr);
+ goto err;
+ }
+
+ if (read_heap_ptr(&funcinfop, addr, V8_OFF_JSFUNCTION_SHARED) != 0 ||
+ read_heap_ptr(&tokpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION) != 0 ||
+ read_heap_ptr(&scriptp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0 ||
+ read_heap_ptr(&namep, scriptp, V8_OFF_SCRIPT_NAME) != 0 ||
+ read_heap_ptr(&lendsp, scriptp, V8_OFF_SCRIPT_LINE_ENDS) != 0)
+ goto err;
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ goto err;
+
+ mdb_printf("%p: JSFunction: %s\n", addr, buf);
+
+ bufp = buf;
+ len = sizeof (buf);
+ mdb_printf("defined at ");
+
+ if (jsstr_print(namep, JSSTR_NUDE, &bufp, &len) == 0)
+ mdb_printf("%s ", buf);
+
+ if (jsfunc_lineno(lendsp, tokpos, buf, sizeof (buf), NULL) == 0)
+ mdb_printf("%s", buf);
+
+ mdb_printf("\n");
+
+ if (read_heap_ptr(&codep,
+ funcinfop, V8_OFF_SHAREDFUNCTIONINFO_CODE) != 0)
+ goto err;
+
+ v8_warnings--;
+
+ return (do_v8code(codep, opt_d));
+
+err:
+ v8_warnings--;
+ return (DCMD_ERR);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8frametypes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ enum_print(v8_frametypes);
+ return (DCMD_OK);
+}
+
+static void
+dcmd_v8print_help(void)
+{
+ mdb_printf(
+ "Prints out \".\" (a V8 heap object) as an instance of its C++\n"
+ "class. With no arguments, the appropriate class is detected\n"
+ "automatically. The 'class' argument overrides this to print an\n"
+ "object as an instance of the given class. The list of known\n"
+ "classes can be viewed with ::jsclasses.");
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ const char *rqclass;
+ v8_class_t *clp;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+
+ if (argc < 1) {
+ /*
+ * If no type was specified, determine it automatically.
+ */
+ bufp = buf;
+ len = sizeof (buf);
+ if (obj_jstype(addr, &bufp, &len, &type) != 0)
+ return (DCMD_ERR);
+
+ if (type == 0) {
+ /* For SMI or Failure, just print out the type. */
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ if ((rqclass = enum_lookup_str(v8_types, type, NULL)) == NULL) {
+ v8_warn("object has unknown type\n");
+ return (DCMD_ERR);
+ }
+ } else {
+ if (argv[0].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ rqclass = argv[0].a_un.a_str;
+ }
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (strcmp(rqclass, clp->v8c_name) == 0)
+ break;
+ }
+
+ if (clp == NULL) {
+ v8_warn("unknown class '%s'\n", rqclass);
+ return (DCMD_USAGE);
+ }
+
+ return (obj_print_class(addr, clp));
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8type(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ char buf[64];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+
+ if (obj_jstype(addr, &bufp, &len, NULL) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("0x%p: %s\n", addr, buf);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8types(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ enum_print(v8_types);
+ return (DCMD_OK);
+}
+
+static int
+load_current_context(uintptr_t *fpp, uintptr_t *raddrp)
+{
+ mdb_reg_t regfp, regip;
+
+#ifdef __amd64
+ if (mdb_getareg(1, "rbp", &regfp) != 0 ||
+ mdb_getareg(1, "rip", &regip) != 0) {
+#else
+#ifdef __i386
+ if (mdb_getareg(1, "ebp", &regfp) != 0 ||
+ mdb_getareg(1, "eip", &regip) != 0) {
+#else
+#error Unrecognized microprocessor
+#endif
+#endif
+ v8_warn("failed to load current context");
+ return (-1);
+ }
+
+ if (fpp != NULL)
+ *fpp = (uintptr_t)regfp;
+
+ if (raddrp != NULL)
+ *raddrp = (uintptr_t)regip;
+
+ return (0);
+}
+
+static int
+do_jsframe_special(uintptr_t fptr, uintptr_t raddr, char *prop)
+{
+ uintptr_t ftype;
+ const char *ftypename;
+
+ /*
+ * First see if this looks like a native frame rather than a JavaScript
+ * frame. We check this by asking MDB to print the return address
+ * symbolically. If that works, we assume this was NOT a V8 frame,
+ * since those are never in the symbol table.
+ */
+ if (mdb_snprintf(NULL, 0, "%A", raddr) > 1) {
+ if (prop != NULL)
+ return (0);
+
+ mdb_printf("%p %a\n", fptr, raddr);
+ return (0);
+ }
+
+ /*
+ * Figure out what kind of frame this is using the same algorithm as
+ * V8's ComputeType function. First, look for an ArgumentsAdaptorFrame.
+ */
+ if (mdb_vread(&ftype, sizeof (ftype), fptr + V8_OFF_FP_CONTEXT) != -1 &&
+ V8_IS_SMI(ftype) &&
+ (ftypename = enum_lookup_str(v8_frametypes, V8_SMI_VALUE(ftype),
+ NULL)) != NULL && strstr(ftypename, "ArgumentsAdaptor") != NULL) {
+ if (prop != NULL)
+ return (0);
+
+ mdb_printf("%p %a <%s>\n", fptr, raddr, ftypename);
+ return (0);
+ }
+
+ /*
+ * Other special frame types are indicated by a marker.
+ */
+ if (mdb_vread(&ftype, sizeof (ftype), fptr + V8_OFF_FP_MARKER) != -1 &&
+ V8_IS_SMI(ftype)) {
+ if (prop != NULL)
+ return (0);
+
+ ftypename = enum_lookup_str(v8_frametypes, V8_SMI_VALUE(ftype),
+ NULL);
+
+ if (ftypename != NULL)
+ mdb_printf("%p %a <%s>\n", fptr, raddr, ftypename);
+ else
+ mdb_printf("%p %a\n", fptr, raddr);
+
+ return (0);
+ }
+
+ return (-1);
+}
+
+static int
+do_jsframe(uintptr_t fptr, uintptr_t raddr, boolean_t verbose,
+ char *func, char *prop, uintptr_t nlines)
+{
+ uintptr_t funcp, funcinfop, tokpos, endpos, scriptp, lendsp, ptrp;
+ uintptr_t ii, nargs;
+ const char *typename;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+ int lineno;
+
+ /*
+ * Check for non-JavaScript frames first.
+ */
+ if (func == NULL && do_jsframe_special(fptr, raddr, prop) == 0)
+ return (DCMD_OK);
+
+ /*
+ * At this point we assume we're looking at a JavaScript frame. As with
+ * native frames, fish the address out of the parent frame.
+ */
+ if (mdb_vread(&funcp, sizeof (funcp),
+ fptr + V8_OFF_FP_FUNCTION) == -1) {
+ v8_warn("failed to read stack at %p",
+ fptr + V8_OFF_FP_FUNCTION);
+ return (DCMD_ERR);
+ }
+
+ /*
+ * Check if this thing is really a JSFunction at all. For some frames,
+ * it's a Code object, presumably indicating some internal frame.
+ */
+ if (read_typebyte(&type, funcp) != 0 ||
+ (typename = enum_lookup_str(v8_types, type, NULL)) == NULL) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ mdb_printf("%p %a\n", fptr, raddr);
+ return (DCMD_OK);
+ }
+
+ if (strcmp("Code", typename) == 0) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ mdb_printf("%p %a internal (Code: %p)\n", fptr, raddr, funcp);
+ return (DCMD_OK);
+ }
+
+ if (strcmp("JSFunction", typename) != 0) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ mdb_printf("%p %a unknown (%s: %p)", fptr, raddr, typename,
+ funcp);
+ return (DCMD_OK);
+ }
+
+ if (read_heap_ptr(&funcinfop, funcp, V8_OFF_JSFUNCTION_SHARED) != 0)
+ return (DCMD_ERR);
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ return (DCMD_ERR);
+
+ if (func != NULL && strcmp(buf, func) != 0)
+ return (DCMD_OK);
+
+ if (prop == NULL)
+ mdb_printf("%p %a %s (%p)\n", fptr, raddr, buf, funcp);
+
+ if (!verbose && prop == NULL)
+ return (DCMD_OK);
+
+ /*
+ * Although the token position is technically an SMI, we're going to
+ * byte-compare it to other SMI values so we don't want decode it here.
+ */
+ if (read_heap_ptr(&tokpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION) != 0)
+ return (DCMD_ERR);
+
+ if (read_heap_ptr(&scriptp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0)
+ return (DCMD_ERR);
+
+ if (read_heap_ptr(&ptrp, scriptp, V8_OFF_SCRIPT_NAME) != 0)
+ return (DCMD_ERR);
+
+ bufp = buf;
+ len = sizeof (buf);
+ (void) jsstr_print(ptrp, JSSTR_NUDE, &bufp, &len);
+
+ if (prop != NULL && strcmp(prop, "file") == 0) {
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ if (prop == NULL) {
+ (void) mdb_inc_indent(4);
+ mdb_printf("file: %s\n", buf);
+ }
+
+ if (read_heap_ptr(&lendsp, scriptp, V8_OFF_SCRIPT_LINE_ENDS) != 0)
+ return (DCMD_ERR);
+
+ if (read_heap_smi(&nargs, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_LENGTH) == 0) {
+ for (ii = 0; ii < nargs; ii++) {
+ uintptr_t argptr;
+ char arg[10];
+
+ if (mdb_vread(&argptr, sizeof (argptr),
+ fptr + V8_OFF_FP_ARGS + (nargs - ii - 1) *
+ sizeof (uintptr_t)) == -1)
+ continue;
+
+ (void) snprintf(arg, sizeof (arg), "arg%d", ii + 1);
+
+ if (prop != NULL) {
+ if (strcmp(arg, prop) != 0)
+ continue;
+
+ mdb_printf("%p\n", argptr);
+ return (DCMD_OK);
+ }
+
+ bufp = buf;
+ len = sizeof (buf);
+ (void) obj_jstype(argptr, &bufp, &len, NULL);
+
+ mdb_printf("arg%d: %p (%s)\n", (ii + 1), argptr, buf);
+ }
+ }
+
+ (void) jsfunc_lineno(lendsp, tokpos, buf, sizeof (buf), &lineno);
+
+ if (prop != NULL) {
+ if (strcmp(prop, "posn") == 0) {
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ mdb_warn("unknown frame property '%s'\n", prop);
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("posn: %s", buf);
+
+ if (nlines != 0 && read_heap_smi(&endpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_END_POSITION) == 0) {
+ jsfunc_lines(scriptp,
+ V8_SMI_VALUE(tokpos), endpos, nlines, "%5d ");
+ }
+
+ mdb_printf("\n");
+ (void) mdb_dec_indent(4);
+
+ return (DCMD_OK);
+}
+
+typedef struct findjsobjects_prop {
+ struct findjsobjects_prop *fjsp_next;
+ char fjsp_desc[1];
+} findjsobjects_prop_t;
+
+typedef struct findjsobjects_instance {
+ uintptr_t fjsi_addr;
+ struct findjsobjects_instance *fjsi_next;
+} findjsobjects_instance_t;
+
+typedef struct findjsobjects_obj {
+ findjsobjects_prop_t *fjso_props;
+ findjsobjects_prop_t *fjso_last;
+ size_t fjso_nprops;
+ findjsobjects_instance_t fjso_instances;
+ int fjso_ninstances;
+ avl_node_t fjso_node;
+ struct findjsobjects_obj *fjso_next;
+ boolean_t fjso_malformed;
+ char fjso_constructor[80];
+} findjsobjects_obj_t;
+
+typedef struct findjsobjects_stats {
+ int fjss_heapobjs;
+ int fjss_cached;
+ int fjss_typereads;
+ int fjss_jsobjs;
+ int fjss_objects;
+ int fjss_arrays;
+ int fjss_uniques;
+} findjsobjects_stats_t;
+
+typedef struct findjsobjects_reference {
+ uintptr_t fjsrf_addr;
+ char *fjsrf_desc;
+ size_t fjsrf_index;
+ struct findjsobjects_reference *fjsrf_next;
+} findjsobjects_reference_t;
+
+typedef struct findjsobjects_referent {
+ avl_node_t fjsr_node;
+ uintptr_t fjsr_addr;
+ findjsobjects_reference_t *fjsr_head;
+ findjsobjects_reference_t *fjsr_tail;
+ struct findjsobjects_referent *fjsr_next;
+} findjsobjects_referent_t;
+
+typedef struct findjsobjects_state {
+ uintptr_t fjs_addr;
+ uintptr_t fjs_size;
+ boolean_t fjs_verbose;
+ boolean_t fjs_brk;
+ boolean_t fjs_allobjs;
+ boolean_t fjs_initialized;
+ boolean_t fjs_marking;
+ boolean_t fjs_referred;
+ avl_tree_t fjs_tree;
+ avl_tree_t fjs_referents;
+ findjsobjects_referent_t *fjs_head;
+ findjsobjects_referent_t *fjs_tail;
+ findjsobjects_obj_t *fjs_current;
+ findjsobjects_obj_t *fjs_objects;
+ findjsobjects_stats_t fjs_stats;
+} findjsobjects_state_t;
+
+findjsobjects_obj_t *
+findjsobjects_alloc(uintptr_t addr)
+{
+ findjsobjects_obj_t *obj;
+
+ obj = mdb_zalloc(sizeof (findjsobjects_obj_t), UM_SLEEP);
+ obj->fjso_instances.fjsi_addr = addr;
+ obj->fjso_ninstances = 1;
+
+ return (obj);
+}
+
+void
+findjsobjects_free(findjsobjects_obj_t *obj)
+{
+ findjsobjects_prop_t *prop, *next;
+
+ for (prop = obj->fjso_props; prop != NULL; prop = next) {
+ next = prop->fjsp_next;
+ mdb_free(prop, sizeof (findjsobjects_prop_t) +
+ strlen(prop->fjsp_desc));
+ }
+
+ mdb_free(obj, sizeof (findjsobjects_obj_t));
+}
+
+int
+findjsobjects_cmp(findjsobjects_obj_t *lhs, findjsobjects_obj_t *rhs)
+{
+ findjsobjects_prop_t *lprop, *rprop;
+ int rv;
+
+ lprop = lhs->fjso_props;
+ rprop = rhs->fjso_props;
+
+ while (lprop != NULL && rprop != NULL) {
+ if ((rv = strcmp(lprop->fjsp_desc, rprop->fjsp_desc)) != 0)
+ return (rv > 0 ? 1 : -1);
+
+ lprop = lprop->fjsp_next;
+ rprop = rprop->fjsp_next;
+ }
+
+ if (lprop != NULL)
+ return (1);
+
+ if (rprop != NULL)
+ return (-1);
+
+ if (lhs->fjso_nprops > rhs->fjso_nprops)
+ return (1);
+
+ if (lhs->fjso_nprops < rhs->fjso_nprops)
+ return (-1);
+
+ rv = strcmp(lhs->fjso_constructor, rhs->fjso_constructor);
+
+ return (rv < 0 ? -1 : rv > 0 ? 1 : 0);
+}
+
+int
+findjsobjects_cmp_referents(findjsobjects_referent_t *lhs,
+ findjsobjects_referent_t *rhs)
+{
+ if (lhs->fjsr_addr < rhs->fjsr_addr)
+ return (-1);
+
+ if (lhs->fjsr_addr > rhs->fjsr_addr)
+ return (1);
+
+ return (0);
+}
+
+int
+findjsobjects_cmp_ninstances(const void *l, const void *r)
+{
+ findjsobjects_obj_t *lhs = *((findjsobjects_obj_t **)l);
+ findjsobjects_obj_t *rhs = *((findjsobjects_obj_t **)r);
+ size_t lprod = lhs->fjso_ninstances * lhs->fjso_nprops;
+ size_t rprod = rhs->fjso_ninstances * rhs->fjso_nprops;
+
+ if (lprod < rprod)
+ return (-1);
+
+ if (lprod > rprod)
+ return (1);
+
+ if (lhs->fjso_ninstances < rhs->fjso_ninstances)
+ return (-1);
+
+ if (lhs->fjso_ninstances > rhs->fjso_ninstances)
+ return (1);
+
+ if (lhs->fjso_nprops < rhs->fjso_nprops)
+ return (-1);
+
+ if (lhs->fjso_nprops > rhs->fjso_nprops)
+ return (1);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+findjsobjects_prop(const char *desc, uintptr_t val, void *arg)
+{
+ findjsobjects_state_t *fjs = arg;
+ findjsobjects_obj_t *current = fjs->fjs_current;
+ findjsobjects_prop_t *prop;
+
+ if (desc == NULL)
+ desc = "<unknown>";
+
+ prop = mdb_zalloc(sizeof (findjsobjects_prop_t) +
+ strlen(desc), UM_SLEEP);
+
+ strcpy(prop->fjsp_desc, desc);
+
+ if (current->fjso_last != NULL) {
+ current->fjso_last->fjsp_next = prop;
+ } else {
+ current->fjso_props = prop;
+ }
+
+ current->fjso_last = prop;
+ current->fjso_nprops++;
+ current->fjso_malformed =
+ val == NULL && current->fjso_nprops == 1 && desc[0] == '<';
+
+ return (0);
+}
+
+static void
+findjsobjects_constructor(findjsobjects_obj_t *obj)
+{
+ char *bufp = obj->fjso_constructor;
+ size_t len = sizeof (obj->fjso_constructor);
+ uintptr_t map, funcinfop;
+ uintptr_t addr = obj->fjso_instances.fjsi_addr;
+ uint8_t type;
+
+ v8_silent++;
+
+ if (read_heap_ptr(&map, addr, V8_OFF_HEAPOBJECT_MAP) != 0 ||
+ read_heap_ptr(&addr, map, V8_OFF_MAP_CONSTRUCTOR) != 0)
+ goto out;
+
+ if (read_typebyte(&type, addr) != 0)
+ goto out;
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "JSFunction") != 0)
+ goto out;
+
+ if (read_heap_ptr(&funcinfop, addr, V8_OFF_JSFUNCTION_SHARED) != 0)
+ goto out;
+
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ goto out;
+out:
+ v8_silent--;
+}
+
+int
+findjsobjects_range(findjsobjects_state_t *fjs, uintptr_t addr, uintptr_t size)
+{
+ uintptr_t limit;
+ findjsobjects_stats_t *stats = &fjs->fjs_stats;
+ uint8_t type;
+ int jsobject = V8_TYPE_JSOBJECT, jsarray = V8_TYPE_JSARRAY;
+ caddr_t range = mdb_alloc(size, UM_SLEEP);
+ uintptr_t base = addr, mapaddr;
+
+ if (mdb_vread(range, size, addr) == -1)
+ return (0);
+
+ for (limit = addr + size; addr < limit; addr++) {
+ findjsobjects_instance_t *inst;
+ findjsobjects_obj_t *obj;
+ avl_index_t where;
+
+ if (V8_IS_SMI(addr))
+ continue;
+
+ if (!V8_IS_HEAPOBJECT(addr))
+ continue;
+
+ stats->fjss_heapobjs++;
+
+ mapaddr = *((uintptr_t *)((uintptr_t)range +
+ (addr - base) + V8_OFF_HEAPOBJECT_MAP));
+
+ if (!V8_IS_HEAPOBJECT(mapaddr))
+ continue;
+
+ mapaddr += V8_OFF_MAP_INSTANCE_ATTRIBUTES;
+ stats->fjss_typereads++;
+
+ if (mapaddr >= base && mapaddr < base + size) {
+ stats->fjss_cached++;
+
+ type = *((uint8_t *)((uintptr_t)range +
+ (mapaddr - base)));
+ } else {
+ if (mdb_vread(&type, sizeof (uint8_t), mapaddr) == -1)
+ continue;
+ }
+
+ if (type != jsobject && type != jsarray)
+ continue;
+
+ stats->fjss_jsobjs++;
+
+ fjs->fjs_current = findjsobjects_alloc(addr);
+
+ if (type == jsobject) {
+ if (jsobj_properties(addr,
+ findjsobjects_prop, fjs) != 0) {
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+ continue;
+ }
+
+ findjsobjects_constructor(fjs->fjs_current);
+ stats->fjss_objects++;
+ } else {
+ uintptr_t ptr;
+ size_t *nprops = &fjs->fjs_current->fjso_nprops;
+ ssize_t len = V8_OFF_JSARRAY_LENGTH;
+ ssize_t elems = V8_OFF_JSOBJECT_ELEMENTS;
+ ssize_t flen = V8_OFF_FIXEDARRAY_LENGTH;
+ uintptr_t nelems;
+ uint8_t t;
+
+ if (read_heap_smi(nprops, addr, len) != 0 ||
+ read_heap_ptr(&ptr, addr, elems) != 0 ||
+ !V8_IS_HEAPOBJECT(ptr) ||
+ read_typebyte(&t, ptr) != 0 ||
+ t != V8_TYPE_FIXEDARRAY ||
+ read_heap_smi(&nelems, ptr, flen) != 0 ||
+ nelems < *nprops) {
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+ continue;
+ }
+
+ strcpy(fjs->fjs_current->fjso_constructor, "Array");
+ stats->fjss_arrays++;
+ }
+
+ /*
+ * Now determine if we already have an object matching our
+ * properties. If we don't, we'll add our new object; if we
+ * do we'll merely enqueue our instance.
+ */
+ obj = avl_find(&fjs->fjs_tree, fjs->fjs_current, &where);
+
+ if (obj == NULL) {
+ avl_add(&fjs->fjs_tree, fjs->fjs_current);
+ fjs->fjs_current->fjso_next = fjs->fjs_objects;
+ fjs->fjs_objects = fjs->fjs_current;
+ fjs->fjs_current = NULL;
+ stats->fjss_uniques++;
+ continue;
+ }
+
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+
+ inst = mdb_alloc(sizeof (findjsobjects_instance_t), UM_SLEEP);
+ inst->fjsi_addr = addr;
+ inst->fjsi_next = obj->fjso_instances.fjsi_next;
+ obj->fjso_instances.fjsi_next = inst;
+ obj->fjso_ninstances++;
+ }
+
+ mdb_free(range, size);
+
+ return (0);
+}
+
+static int
+findjsobjects_mapping(findjsobjects_state_t *fjs, const prmap_t *pmp,
+ const char *name)
+{
+ if (name != NULL && !(fjs->fjs_brk && (pmp->pr_mflags & MA_BREAK)))
+ return (0);
+
+ if (fjs->fjs_addr != NULL && (fjs->fjs_addr < pmp->pr_vaddr ||
+ fjs->fjs_addr >= pmp->pr_vaddr + pmp->pr_size))
+ return (0);
+
+ return (findjsobjects_range(fjs, pmp->pr_vaddr, pmp->pr_size));
+}
+
+static void
+findjsobjects_references_add(findjsobjects_state_t *fjs, uintptr_t val,
+ const char *desc, size_t index)
+{
+ findjsobjects_referent_t search, *referent;
+ findjsobjects_reference_t *reference;
+
+ search.fjsr_addr = val;
+
+ if ((referent = avl_find(&fjs->fjs_referents, &search, NULL)) == NULL)
+ return;
+
+ reference = mdb_zalloc(sizeof (*reference), UM_SLEEP | UM_GC);
+ reference->fjsrf_addr = fjs->fjs_addr;
+
+ if (desc != NULL) {
+ reference->fjsrf_desc =
+ mdb_alloc(strlen(desc) + 1, UM_SLEEP | UM_GC);
+ (void) strcpy(reference->fjsrf_desc, desc);
+ } else {
+ reference->fjsrf_index = index;
+ }
+
+ if (referent->fjsr_head == NULL) {
+ referent->fjsr_head = reference;
+ } else {
+ referent->fjsr_tail->fjsrf_next = reference;
+ }
+
+ referent->fjsr_tail = reference;
+}
+
+static int
+findjsobjects_references_prop(const char *desc, uintptr_t val, void *arg)
+{
+ findjsobjects_references_add(arg, val, desc, -1);
+
+ return (0);
+}
+
+static void
+findjsobjects_references_array(findjsobjects_state_t *fjs,
+ findjsobjects_obj_t *obj)
+{
+ findjsobjects_instance_t *inst = &obj->fjso_instances;
+ uintptr_t *elts;
+ size_t i, len;
+
+ for (; inst != NULL; inst = inst->fjsi_next) {
+ uintptr_t addr = inst->fjsi_addr, ptr;
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0 ||
+ read_heap_array(ptr, &elts, &len, UM_SLEEP) != 0)
+ continue;
+
+ fjs->fjs_addr = addr;
+
+ for (i = 0; i < len; i++)
+ findjsobjects_references_add(fjs, elts[i], NULL, i);
+
+ mdb_free(elts, len * sizeof (uintptr_t));
+ }
+}
+
+static void
+findjsobjects_referent(findjsobjects_state_t *fjs, uintptr_t addr)
+{
+ findjsobjects_referent_t search, *referent;
+
+ search.fjsr_addr = addr;
+
+ if (avl_find(&fjs->fjs_referents, &search, NULL) != NULL) {
+ assert(fjs->fjs_marking);
+ mdb_warn("%p is already marked; ignoring\n", addr);
+ return;
+ }
+
+ referent = mdb_zalloc(sizeof (findjsobjects_referent_t), UM_SLEEP);
+ referent->fjsr_addr = addr;
+
+ avl_add(&fjs->fjs_referents, referent);
+
+ if (fjs->fjs_tail != NULL) {
+ fjs->fjs_tail->fjsr_next = referent;
+ } else {
+ fjs->fjs_head = referent;
+ }
+
+ fjs->fjs_tail = referent;
+
+ if (fjs->fjs_marking)
+ mdb_printf("findjsobjects: marked %p\n", addr);
+}
+
+static void
+findjsobjects_references(findjsobjects_state_t *fjs)
+{
+ findjsobjects_reference_t *reference;
+ findjsobjects_referent_t *referent;
+ avl_tree_t *referents = &fjs->fjs_referents;
+ findjsobjects_obj_t *obj;
+ void *cookie = NULL;
+ uintptr_t addr;
+
+ fjs->fjs_referred = B_FALSE;
+
+ v8_silent++;
+
+ /*
+ * First traverse over all objects and arrays, looking for references
+ * to our designated referent(s).
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ if (obj->fjso_nprops != 0 && obj->fjso_props == NULL) {
+ findjsobjects_references_array(fjs, obj);
+ continue;
+ }
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ fjs->fjs_addr = inst->fjsi_addr;
+
+ (void) jsobj_properties(inst->fjsi_addr,
+ findjsobjects_references_prop, fjs);
+ }
+ }
+
+ v8_silent--;
+ fjs->fjs_addr = NULL;
+
+ /*
+ * Now go over our referent(s), reporting any references that we have
+ * accumulated.
+ */
+ for (referent = fjs->fjs_head; referent != NULL;
+ referent = referent->fjsr_next) {
+ addr = referent->fjsr_addr;
+
+ if ((reference = referent->fjsr_head) == NULL) {
+ mdb_printf("%p is not referred to by a "
+ "known object.\n", addr);
+ continue;
+ }
+
+ for (; reference != NULL; reference = reference->fjsrf_next) {
+ mdb_printf("%p referred to by %p",
+ addr, reference->fjsrf_addr);
+
+ if (reference->fjsrf_desc == NULL) {
+ mdb_printf("[%d]\n", reference->fjsrf_index);
+ } else {
+ mdb_printf(".%s\n", reference->fjsrf_desc);
+ }
+ }
+ }
+
+ /*
+ * Finally, destroy our referent nodes.
+ */
+ while ((referent = avl_destroy_nodes(referents, &cookie)) != NULL)
+ mdb_free(referent, sizeof (findjsobjects_referent_t));
+
+ fjs->fjs_head = NULL;
+ fjs->fjs_tail = NULL;
+}
+
+static findjsobjects_instance_t *
+findjsobjects_instance(findjsobjects_state_t *fjs, uintptr_t addr,
+ findjsobjects_instance_t **headp)
+{
+ findjsobjects_obj_t *obj;
+
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ if (inst->fjsi_addr == addr) {
+ *headp = head;
+ return (inst);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+/*ARGSUSED*/
+static void
+findjsobjects_match_all(findjsobjects_obj_t *obj, const char *ignored)
+{
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+}
+
+static void
+findjsobjects_match_propname(findjsobjects_obj_t *obj, const char *propname)
+{
+ findjsobjects_prop_t *prop;
+
+ for (prop = obj->fjso_props; prop != NULL; prop = prop->fjsp_next) {
+ if (strcmp(prop->fjsp_desc, propname) == 0) {
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+ return;
+ }
+ }
+}
+
+static void
+findjsobjects_match_constructor(findjsobjects_obj_t *obj,
+ const char *constructor)
+{
+ if (strcmp(constructor, obj->fjso_constructor) == 0)
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+}
+
+static int
+findjsobjects_match(findjsobjects_state_t *fjs, uintptr_t addr,
+ uint_t flags, void (*func)(findjsobjects_obj_t *, const char *),
+ const char *match)
+{
+ findjsobjects_obj_t *obj;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ for (obj = fjs->fjs_objects; obj != NULL;
+ obj = obj->fjso_next) {
+ if (obj->fjso_malformed && !fjs->fjs_allobjs)
+ continue;
+
+ func(obj, match);
+ }
+
+ return (DCMD_OK);
+ }
+
+ /*
+ * First, look for the specified address among the representative
+ * objects.
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ if (obj->fjso_instances.fjsi_addr == addr) {
+ func(obj, match);
+ return (DCMD_OK);
+ }
+ }
+
+ /*
+ * We didn't find it among the representative objects; iterate over
+ * all objects.
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ if (inst->fjsi_addr == addr) {
+ func(obj, match);
+ return (DCMD_OK);
+ }
+ }
+ }
+
+ mdb_warn("%p does not correspond to a known object\n", addr);
+ return (DCMD_ERR);
+}
+
+static void
+findjsobjects_print(findjsobjects_obj_t *obj)
+{
+ int col = 19 + (sizeof (uintptr_t) * 2) + strlen("..."), len;
+ uintptr_t addr = obj->fjso_instances.fjsi_addr;
+ findjsobjects_prop_t *prop;
+
+ mdb_printf("%?p %8d %8d ",
+ addr, obj->fjso_ninstances, obj->fjso_nprops);
+
+ if (obj->fjso_constructor[0] != '\0') {
+ mdb_printf("%s%s", obj->fjso_constructor,
+ obj->fjso_props != NULL ? ": " : "");
+ col += strlen(obj->fjso_constructor) + 2;
+ }
+
+ for (prop = obj->fjso_props; prop != NULL; prop = prop->fjsp_next) {
+ if (col + (len = strlen(prop->fjsp_desc) + 2) < 80) {
+ mdb_printf("%s%s", prop->fjsp_desc,
+ prop->fjsp_next != NULL ? ", " : "");
+ col += len;
+ } else {
+ mdb_printf("...");
+ break;
+ }
+ }
+
+ mdb_printf("\n", col);
+}
+
+static void
+dcmd_findjsobjects_help(void)
+{
+ mdb_printf("%s\n\n",
+"Finds all JavaScript objects in the V8 heap via brute force iteration over\n"
+"all mapped anonymous memory. (This can take up to several minutes on large\n"
+"dumps.) The output consists of representative objects, the number of\n"
+"instances of that object and the number of properties on the object --\n"
+"followed by the constructor and first few properties of the objects. Once\n"
+"run, subsequent calls to ::findjsobjects use cached data. If provided an\n"
+"address (and in the absence of -r, described below), ::findjsobjects treats\n"
+"the address as that of a representative object, and lists all instances of\n"
+"that object (that is, all objects that have a matching property signature).");
+
+ mdb_dec_indent(2);
+ mdb_printf("%<b>OPTIONS%</b>\n");
+ mdb_inc_indent(2);
+
+ mdb_printf("%s\n",
+" -b Include the heap denoted by the brk(2) (normally excluded)\n"
+" -c cons Display representative objects with the specified constructor\n"
+" -p prop Display representative objects that have the specified property\n"
+" -l List all objects that match the representative object\n"
+" -m Mark specified object for later reference determination via -r\n"
+" -r Find references to the specified and/or marked object(s)\n"
+" -v Provide verbose statistics\n");
+}
+
+static int
+dcmd_findjsobjects(uintptr_t addr,
+ uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ static findjsobjects_state_t fjs;
+ static findjsobjects_stats_t *stats = &fjs.fjs_stats;
+ findjsobjects_obj_t *obj;
+ struct ps_prochandle *Pr;
+ boolean_t references = B_FALSE, listlike = B_FALSE;
+ const char *propname = NULL;
+ const char *constructor = NULL;
+
+ fjs.fjs_verbose = B_FALSE;
+ fjs.fjs_brk = B_FALSE;
+ fjs.fjs_marking = B_FALSE;
+ fjs.fjs_allobjs = B_FALSE;
+
+ if (mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &fjs.fjs_allobjs,
+ 'b', MDB_OPT_SETBITS, B_TRUE, &fjs.fjs_brk,
+ 'c', MDB_OPT_STR, &constructor,
+ 'l', MDB_OPT_SETBITS, B_TRUE, &listlike,
+ 'm', MDB_OPT_SETBITS, B_TRUE, &fjs.fjs_marking,
+ 'p', MDB_OPT_STR, &propname,
+ 'r', MDB_OPT_SETBITS, B_TRUE, &references,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &fjs.fjs_verbose,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (!fjs.fjs_initialized) {
+ avl_create(&fjs.fjs_tree,
+ (int(*)(const void *, const void *))findjsobjects_cmp,
+ sizeof (findjsobjects_obj_t),
+ offsetof(findjsobjects_obj_t, fjso_node));
+
+ avl_create(&fjs.fjs_referents,
+ (int(*)(const void *, const void *))
+ findjsobjects_cmp_referents,
+ sizeof (findjsobjects_referent_t),
+ offsetof(findjsobjects_referent_t, fjsr_node));
+
+ fjs.fjs_initialized = B_TRUE;
+ }
+
+ if (avl_is_empty(&fjs.fjs_tree)) {
+ findjsobjects_obj_t **sorted;
+ int nobjs, i;
+ hrtime_t start = gethrtime();
+
+ if (mdb_get_xdata("pshandle", &Pr, sizeof (Pr)) == -1) {
+ mdb_warn("couldn't read pshandle xdata");
+ return (DCMD_ERR);
+ }
+
+ v8_silent++;
+
+ if (Pmapping_iter(Pr,
+ (proc_map_f *)findjsobjects_mapping, &fjs) != 0) {
+ v8_silent--;
+ return (DCMD_ERR);
+ }
+
+ if ((nobjs = avl_numnodes(&fjs.fjs_tree)) != 0) {
+ /*
+ * We have the objects -- now sort them.
+ */
+ sorted = mdb_alloc(nobjs * sizeof (void *),
+ UM_SLEEP | UM_GC);
+
+ for (obj = fjs.fjs_objects, i = 0; obj != NULL;
+ obj = obj->fjso_next, i++) {
+ sorted[i] = obj;
+ }
+
+ qsort(sorted, avl_numnodes(&fjs.fjs_tree),
+ sizeof (void *), findjsobjects_cmp_ninstances);
+
+ for (i = 1, fjs.fjs_objects = sorted[0]; i < nobjs; i++)
+ sorted[i - 1]->fjso_next = sorted[i];
+
+ sorted[nobjs - 1]->fjso_next = NULL;
+ }
+
+ v8_silent--;
+
+ if (fjs.fjs_verbose) {
+ const char *f = "findjsobjects: %30s => %d\n";
+ int elapsed = (int)((gethrtime() - start) / NANOSEC);
+
+ mdb_printf(f, "elapsed time (seconds)", elapsed);
+ mdb_printf(f, "heap objects", stats->fjss_heapobjs);
+ mdb_printf(f, "type reads", stats->fjss_typereads);
+ mdb_printf(f, "cached reads", stats->fjss_cached);
+ mdb_printf(f, "JavaScript objects", stats->fjss_jsobjs);
+ mdb_printf(f, "processed objects", stats->fjss_objects);
+ mdb_printf(f, "processed arrays", stats->fjss_arrays);
+ mdb_printf(f, "unique objects", stats->fjss_uniques);
+ }
+ }
+
+ if (listlike && !(flags & DCMD_ADDRSPEC)) {
+ if (propname != NULL || constructor != NULL) {
+ char opt = propname != NULL ? 'p' : 'c';
+
+ mdb_warn("cannot specify -l with -%c; instead, pipe "
+ "output of ::findjsobjects -%c to "
+ "::findjsobjects -l\n", opt, opt);
+ return (DCMD_ERR);
+ }
+
+ return (findjsobjects_match(&fjs, addr, flags,
+ findjsobjects_match_all, NULL));
+ }
+
+ if (propname != NULL) {
+ if (constructor != NULL) {
+ mdb_warn("cannot specify both a property name "
+ "and a constructor\n");
+ return (DCMD_ERR);
+ }
+
+ return (findjsobjects_match(&fjs, addr, flags,
+ findjsobjects_match_propname, propname));
+ }
+
+ if (constructor != NULL) {
+ return (findjsobjects_match(&fjs, addr, flags,
+ findjsobjects_match_constructor, constructor));
+ }
+
+ if (references && !(flags & DCMD_ADDRSPEC) &&
+ avl_is_empty(&fjs.fjs_referents)) {
+ mdb_warn("must specify or mark an object to find references\n");
+ return (DCMD_ERR);
+ }
+
+ if (fjs.fjs_marking && !(flags & DCMD_ADDRSPEC)) {
+ mdb_warn("must specify an object to mark\n");
+ return (DCMD_ERR);
+ }
+
+ if (references && fjs.fjs_marking) {
+ mdb_warn("can't both mark an object and find its references\n");
+ return (DCMD_ERR);
+ }
+
+ if (flags & DCMD_ADDRSPEC) {
+ findjsobjects_instance_t *inst, *head;
+
+ /*
+ * If we've been passed an address, it's to either list like
+ * objects (-l), mark an object (-m) or find references to the
+ * specified/marked objects (-r). (Note that the absence of
+ * any of these options implies -l.)
+ */
+ inst = findjsobjects_instance(&fjs, addr, &head);
+
+ if (inst == NULL) {
+ mdb_warn("%p is not a valid object\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (!references && !fjs.fjs_marking) {
+ for (inst = head; inst != NULL; inst = inst->fjsi_next)
+ mdb_printf("%p\n", inst->fjsi_addr);
+
+ return (DCMD_OK);
+ }
+
+ if (!listlike) {
+ findjsobjects_referent(&fjs, inst->fjsi_addr);
+ } else {
+ for (inst = head; inst != NULL; inst = inst->fjsi_next)
+ findjsobjects_referent(&fjs, inst->fjsi_addr);
+ }
+ }
+
+ if (references)
+ findjsobjects_references(&fjs);
+
+ if (references || fjs.fjs_marking)
+ return (DCMD_OK);
+
+ mdb_printf("%?s %8s %8s %s\n", "OBJECT",
+ "#OBJECTS", "#PROPS", "CONSTRUCTOR: PROPS");
+
+ for (obj = fjs.fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ if (obj->fjso_malformed && !fjs.fjs_allobjs)
+ continue;
+
+ findjsobjects_print(obj);
+ }
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsframe(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uintptr_t fptr, raddr;
+ boolean_t opt_v = B_FALSE, opt_i = B_FALSE;
+ char *opt_f = NULL, *opt_p = NULL;
+ uintptr_t opt_n = 5;
+
+ if (mdb_getopts(argc, argv,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v,
+ 'i', MDB_OPT_SETBITS, B_TRUE, &opt_i,
+ 'f', MDB_OPT_STR, &opt_f,
+ 'n', MDB_OPT_UINTPTR, &opt_n,
+ 'p', MDB_OPT_STR, &opt_p, NULL) != argc)
+ return (DCMD_USAGE);
+
+ /*
+ * As with $C, we assume we are given a *pointer* to the frame pointer
+ * for a frame, rather than the actual frame pointer for the frame of
+ * interest. This is needed to show the instruction pointer, which is
+ * actually stored with the next frame. For debugging, this can be
+ * overridden with the "-i" option (for "immediate").
+ */
+ if (opt_i)
+ return (do_jsframe(addr, 0, opt_v, opt_f, opt_p, opt_n));
+
+ if (mdb_vread(&raddr, sizeof (raddr),
+ addr + sizeof (uintptr_t)) == -1) {
+ mdb_warn("failed to read return address from %p",
+ addr + sizeof (uintptr_t));
+ return (DCMD_ERR);
+ }
+
+ if (mdb_vread(&fptr, sizeof (fptr), addr) == -1) {
+ mdb_warn("failed to read frame pointer from %p", addr);
+ return (DCMD_ERR);
+ }
+
+ if (fptr == NULL)
+ return (DCMD_OK);
+
+ return (do_jsframe(fptr, raddr, opt_v, opt_f, opt_p, opt_n));
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsprint(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ char *buf, *bufp;
+ size_t bufsz = 262144, len = bufsz;
+ jsobj_print_t jsop;
+ boolean_t opt_b = B_FALSE;
+ int rv, i;
+
+ bzero(&jsop, sizeof (jsop));
+ jsop.jsop_depth = 2;
+ jsop.jsop_printaddr = B_FALSE;
+
+ i = mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &jsop.jsop_printaddr,
+ 'b', MDB_OPT_SETBITS, B_TRUE, &opt_b,
+ 'd', MDB_OPT_UINT64, &jsop.jsop_depth, NULL);
+
+ if (opt_b)
+ jsop.jsop_baseaddr = addr;
+
+ do {
+ if (i != argc) {
+ const mdb_arg_t *member = &argv[i++];
+
+ if (member->a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ jsop.jsop_member = member->a_un.a_str;
+ }
+
+ for (;;) {
+ if ((buf = bufp =
+ mdb_zalloc(bufsz, UM_NOSLEEP)) == NULL)
+ return (DCMD_ERR);
+
+ jsop.jsop_bufp = &bufp;
+ jsop.jsop_lenp = &len;
+
+ rv = jsobj_print(addr, &jsop);
+
+ if (len > 0)
+ break;
+
+ mdb_free(buf, bufsz);
+ bufsz <<= 1;
+ len = bufsz;
+ }
+
+ if (jsop.jsop_member == NULL && rv != 0) {
+ if (!jsop.jsop_descended)
+ mdb_warn("%s\n", buf);
+
+ return (DCMD_ERR);
+ }
+
+ if (jsop.jsop_member && !jsop.jsop_found) {
+ if (jsop.jsop_baseaddr)
+ (void) mdb_printf("%p: ", jsop.jsop_baseaddr);
+
+ (void) mdb_printf("undefined%s",
+ i < argc ? " " : "");
+ } else {
+ (void) mdb_printf("%s%s", buf, i < argc &&
+ !isspace(buf[strlen(buf) - 1]) ? " " : "");
+ }
+
+ mdb_free(buf, bufsz);
+ jsop.jsop_found = B_FALSE;
+ jsop.jsop_baseaddr = NULL;
+ } while (i < argc);
+
+ mdb_printf("\n");
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8field(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+ const char *klass, *field;
+ uintptr_t offset = 0;
+
+ /*
+ * We may be invoked with either two arguments (class and field name) or
+ * three (an offset to save).
+ */
+ if (argc != 2 && argc != 3)
+ return (DCMD_USAGE);
+
+ if (argv[0].a_type != MDB_TYPE_STRING ||
+ argv[1].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ klass = argv[0].a_un.a_str;
+ field = argv[1].a_un.a_str;
+
+ if (argc == 3) {
+ if (argv[2].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ offset = mdb_strtoull(argv[2].a_un.a_str);
+ }
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next)
+ if (strcmp(clp->v8c_name, klass) == 0)
+ break;
+
+ if (clp == NULL) {
+ (void) mdb_printf("error: no such class: \"%s\"", klass);
+ return (DCMD_ERR);
+ }
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next)
+ if (strcmp(field, flp->v8f_name) == 0)
+ break;
+
+ if (flp == NULL) {
+ if (argc == 2) {
+ mdb_printf("error: no such field in class \"%s\": "
+ "\"%s\"", klass, field);
+ return (DCMD_ERR);
+ }
+
+ flp = conf_field_create(clp, field, offset);
+ if (flp == NULL) {
+ mdb_warn("failed to create field");
+ return (DCMD_ERR);
+ }
+ } else if (argc == 3) {
+ flp->v8f_offset = offset;
+ }
+
+ mdb_printf("%s::%s at offset 0x%x\n", klass, field, flp->v8f_offset);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8array(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint8_t type;
+ uintptr_t *array;
+ size_t ii, len;
+
+ if (read_typebyte(&type, addr) != 0)
+ return (DCMD_ERR);
+
+ if (type != V8_TYPE_FIXEDARRAY) {
+ mdb_warn("%p is not an instance of FixedArray\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (read_heap_array(addr, &array, &len, UM_SLEEP | UM_GC) != 0)
+ return (DCMD_ERR);
+
+ for (ii = 0; ii < len; ii++)
+ mdb_printf("%p\n", array[ii]);
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uintptr_t raddr, opt_n = 5;
+ boolean_t opt_v = B_FALSE;
+ char *opt_f = NULL, *opt_p = NULL;
+
+ if (mdb_getopts(argc, argv,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v,
+ 'f', MDB_OPT_STR, &opt_f,
+ 'n', MDB_OPT_UINTPTR, &opt_n,
+ 'p', MDB_OPT_STR, &opt_p,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ /*
+ * The "::jsframe" walker iterates the valid frame pointers, but the
+ * "::jsframe" dcmd looks at the frame after the one it was given, so we
+ * have to explicitly examine the top frame here.
+ */
+ if (!(flags & DCMD_ADDRSPEC)) {
+ if (load_current_context(&addr, &raddr) != 0 ||
+ do_jsframe(addr, raddr, opt_v, opt_f, opt_p, opt_n) != 0)
+ return (DCMD_ERR);
+ }
+
+ if (mdb_pwalk_dcmd("jsframe", "jsframe", argc, argv, addr) == -1)
+ return (DCMD_ERR);
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8str(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ boolean_t opt_v = B_FALSE;
+ char buf[512 * 1024];
+ char *bufp;
+ size_t len;
+
+ if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsstr_print(addr, (opt_v ? JSSTR_VERBOSE : JSSTR_NONE) |
+ JSSTR_QUOTED, &bufp, &len) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+}
+
+static void
+dcmd_v8load_help(void)
+{
+ v8_cfg_t *cfp, **cfgpp;
+
+ mdb_printf(
+ "To traverse in-memory V8 structures, the V8 dmod requires\n"
+ "configuration that describes the layout of various V8 structures\n"
+ "in memory. Normally, this information is pulled from metadata\n"
+ "in the target binary. However, it's possible to use the module\n"
+ "with a binary not built with metadata by loading one of the\n"
+ "canned configurations.\n\n");
+
+ mdb_printf("Available configurations:\n");
+
+ (void) mdb_inc_indent(4);
+
+ for (cfgpp = v8_cfgs; *cfgpp != NULL; cfgpp++) {
+ cfp = *cfgpp;
+ mdb_printf("%-10s %s\n", cfp->v8cfg_name, cfp->v8cfg_label);
+ }
+
+ (void) mdb_dec_indent(4);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8load(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_cfg_t *cfgp = NULL, **cfgpp;
+
+ if (v8_classes != NULL) {
+ mdb_warn("v8 module already configured\n");
+ return (DCMD_ERR);
+ }
+
+ if (argc < 1 || argv->a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ for (cfgpp = v8_cfgs; *cfgpp != NULL; cfgpp++) {
+ cfgp = *cfgpp;
+ if (strcmp(argv->a_un.a_str, cfgp->v8cfg_name) == 0)
+ break;
+ }
+
+ if (cfgp == NULL || cfgp->v8cfg_name == NULL) {
+ mdb_warn("unknown configuration: \"%s\"\n", argv->a_un.a_str);
+ return (DCMD_ERR);
+ }
+
+ if (autoconfigure(cfgp) == -1) {
+ mdb_warn("autoconfigure failed\n");
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("V8 dmod configured based on %s\n", cfgp->v8cfg_name);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8warnings(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_warnings ^= 1;
+ mdb_printf("v8 warnings are now %s\n", v8_warnings ? "on" : "off");
+
+ return (DCMD_OK);
+}
+
+static int
+walk_jsframes_init(mdb_walk_state_t *wsp)
+{
+ if (wsp->walk_addr != NULL)
+ return (WALK_NEXT);
+
+ if (load_current_context(&wsp->walk_addr, NULL) != 0)
+ return (WALK_ERR);
+
+ return (WALK_NEXT);
+}
+
+static int
+walk_jsframes_step(mdb_walk_state_t *wsp)
+{
+ uintptr_t addr, next;
+ int rv;
+
+ addr = wsp->walk_addr;
+ rv = wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata);
+
+ if (rv != WALK_NEXT)
+ return (rv);
+
+ if (mdb_vread(&next, sizeof (next), addr) == -1)
+ return (WALK_ERR);
+
+ if (next == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = next;
+ return (WALK_NEXT);
+}
+
+typedef struct jsprop_walk_data {
+ int jspw_nprops;
+ int jspw_current;
+ uintptr_t *jspw_props;
+} jsprop_walk_data_t;
+
+/*ARGSUSED*/
+static int
+walk_jsprop_nprops(const char *desc, uintptr_t val, void *arg)
+{
+ jsprop_walk_data_t *jspw = arg;
+ jspw->jspw_nprops++;
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+walk_jsprop_props(const char *desc, uintptr_t val, void *arg)
+{
+ jsprop_walk_data_t *jspw = arg;
+ jspw->jspw_props[jspw->jspw_current++] = val;
+
+ return (0);
+}
+
+static int
+walk_jsprop_init(mdb_walk_state_t *wsp)
+{
+ jsprop_walk_data_t *jspw;
+ uintptr_t addr;
+ uint8_t type;
+
+ if ((addr = wsp->walk_addr) == NULL) {
+ mdb_warn("'jsprop' does not support global walks\n");
+ return (WALK_ERR);
+ }
+
+ if (!V8_IS_HEAPOBJECT(addr) || read_typebyte(&type, addr) != 0 ||
+ type != V8_TYPE_JSOBJECT) {
+ mdb_warn("%p is not a JSObject\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw = mdb_zalloc(sizeof (jsprop_walk_data_t), UM_SLEEP | UM_GC);
+
+ if (jsobj_properties(addr, walk_jsprop_nprops, jspw) == -1) {
+ mdb_warn("couldn't iterate over properties for %p\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw->jspw_props = mdb_zalloc(jspw->jspw_nprops *
+ sizeof (uintptr_t), UM_SLEEP | UM_GC);
+
+ if (jsobj_properties(addr, walk_jsprop_props, jspw) == -1) {
+ mdb_warn("couldn't iterate over properties for %p\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw->jspw_current = 0;
+ wsp->walk_data = jspw;
+
+ return (WALK_NEXT);
+}
+
+static int
+walk_jsprop_step(mdb_walk_state_t *wsp)
+{
+ jsprop_walk_data_t *jspw = wsp->walk_data;
+ int rv;
+
+ if (jspw->jspw_current >= jspw->jspw_nprops)
+ return (WALK_DONE);
+
+ if ((rv = wsp->walk_callback(jspw->jspw_props[jspw->jspw_current++],
+ NULL, wsp->walk_cbdata)) != WALK_NEXT)
+ return (rv);
+
+ return (WALK_NEXT);
+}
+
+/*
+ * MDB linkage
+ */
+
+static const mdb_dcmd_t v8_mdb_dcmds[] = {
+ /*
+ * Commands to inspect JavaScript-level state
+ */
+ { "jsframe", ":[-iv] [-f function] [-p property] [-n numlines]",
+ "summarize a JavaScript stack frame", dcmd_jsframe },
+ { "jsprint", ":[-ab] [-d depth] [member]", "print a JavaScript object",
+ dcmd_jsprint },
+ { "jsstack", "[-v] [-f function] [-p property] [-n numlines]",
+ "print a JavaScript stacktrace", dcmd_jsstack },
+ { "findjsobjects", "?[-vb] [-r | -c cons | -p prop]", "find JavaScript "
+ "objects", dcmd_findjsobjects, dcmd_findjsobjects_help },
+
+ /*
+ * Commands to inspect V8-level state
+ */
+ { "v8array", ":", "print elements of a V8 FixedArray",
+ dcmd_v8array },
+ { "v8classes", NULL, "list known V8 heap object C++ classes",
+ dcmd_v8classes },
+ { "v8code", ":[-d]", "print information about a V8 Code object",
+ dcmd_v8code },
+ { "v8field", "classname fieldname offset",
+ "manually add a field to a given class", dcmd_v8field },
+ { "v8function", ":[-d]", "print JSFunction object details",
+ dcmd_v8function },
+ { "v8load", "version", "load canned config for a specific V8 version",
+ dcmd_v8load, dcmd_v8load_help },
+ { "v8frametypes", NULL, "list known V8 frame types",
+ dcmd_v8frametypes },
+ { "v8print", ":[class]", "print a V8 heap object",
+ dcmd_v8print, dcmd_v8print_help },
+ { "v8str", ":[-v]", "print the contents of a V8 string",
+ dcmd_v8str },
+ { "v8type", ":", "print the type of a V8 heap object",
+ dcmd_v8type },
+ { "v8types", NULL, "list known V8 heap object types",
+ dcmd_v8types },
+ { "v8warnings", NULL, "toggle V8 warnings",
+ dcmd_v8warnings },
+
+ { NULL }
+};
+
+static const mdb_walker_t v8_mdb_walkers[] = {
+ { "jsframe", "walk V8 JavaScript stack frames",
+ walk_jsframes_init, walk_jsframes_step },
+ { "jsprop", "walk property values for an object",
+ walk_jsprop_init, walk_jsprop_step },
+ { NULL }
+};
+
+static mdb_modinfo_t v8_mdb = { MDB_API_VERSION, v8_mdb_dcmds, v8_mdb_walkers };
+
+static void
+configure(void)
+{
+ char *success;
+ v8_cfg_t *cfgp = NULL;
+ GElf_Sym sym;
+
+ if (mdb_readsym(&v8_major, sizeof (v8_major),
+ "_ZN2v88internal7Version6major_E") == -1 ||
+ mdb_readsym(&v8_minor, sizeof (v8_minor),
+ "_ZN2v88internal7Version6minor_E") == -1 ||
+ mdb_readsym(&v8_build, sizeof (v8_build),
+ "_ZN2v88internal7Version6build_E") == -1 ||
+ mdb_readsym(&v8_patch, sizeof (v8_patch),
+ "_ZN2v88internal7Version6patch_E") == -1) {
+ mdb_warn("failed to determine V8 version");
+ return;
+ }
+
+ mdb_printf("V8 version: %d.%d.%d.%d\n",
+ v8_major, v8_minor, v8_build, v8_patch);
+
+ /*
+ * First look for debug metadata embedded within the binary, which may
+ * be present in recent V8 versions built with postmortem metadata.
+ */
+ if (mdb_lookup_by_name("v8dbg_SmiTag", &sym) == 0) {
+ cfgp = &v8_cfg_target;
+ success = "Autoconfigured V8 support from target";
+ } else if (v8_major == 3 && v8_minor == 1 && v8_build == 8) {
+ cfgp = &v8_cfg_04;
+ success = "Configured V8 support based on node v0.4";
+ } else if (v8_major == 3 && v8_minor == 6 && v8_build == 6) {
+ cfgp = &v8_cfg_06;
+ success = "Configured V8 support based on node v0.6";
+ } else {
+ mdb_printf("mdb_v8: target has no debug metadata and "
+ "no existing config found\n");
+ return;
+ }
+
+ if (autoconfigure(cfgp) != 0) {
+ mdb_warn("failed to autoconfigure from target; "
+ "commands may have incorrect results!\n");
+ return;
+ }
+
+ mdb_printf("%s\n", success);
+}
+
+static void
+enable_demangling(void)
+{
+ const char *symname = "_ZN2v88internal7Version6major_E";
+ GElf_Sym sym;
+ char buf[64];
+
+ /*
+ * Try to determine whether C++ symbol demangling has been enabled. If
+ * not, enable it.
+ */
+ if (mdb_lookup_by_name("_ZN2v88internal7Version6major_E", &sym) != 0)
+ return;
+
+ (void) mdb_snprintf(buf, sizeof (buf), "%a", sym.st_value);
+ if (strstr(buf, symname) != NULL)
+ (void) mdb_eval("$G");
+}
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ configure();
+ enable_demangling();
+ return (&v8_mdb);
+}
diff --git a/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c b/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c
new file mode 100644
index 0000000000..d907242435
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c
@@ -0,0 +1,728 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * mdb_v8_cfg.c: canned configurations for previous V8 versions.
+ *
+ * The functions and data defined here enable this dmod to support debugging
+ * Node.js binaries that predated V8's built-in postmortem debugging support.
+ */
+
+#include "v8cfg.h"
+
+/*ARGSUSED*/
+static int
+v8cfg_target_iter(v8_cfg_t *cfgp, int (*func)(mdb_symbol_t *, void *),
+ void *arg)
+{
+ return (mdb_symbol_iter(MDB_OBJ_EVERY, MDB_DYNSYM,
+ MDB_BIND_GLOBAL | MDB_TYPE_OBJECT | MDB_TYPE_FUNC,
+ func, arg));
+}
+
+/*ARGSUSED*/
+static int
+v8cfg_target_readsym(v8_cfg_t *cfgp, const char *name, intptr_t *valp)
+{
+ int val, rval;
+
+ if ((rval = mdb_readsym(&val, sizeof (val), name)) != -1)
+ *valp = (intptr_t)val;
+
+ return (rval);
+}
+
+/*
+ * Analog of mdb_symbol_iter() for a canned configuration.
+ */
+static int
+v8cfg_canned_iter(v8_cfg_t *cfgp, int (*func)(mdb_symbol_t *, void *),
+ void *arg)
+{
+ v8_cfg_symbol_t *v8sym;
+ mdb_symbol_t mdbsym;
+ int rv;
+
+ for (v8sym = cfgp->v8cfg_symbols; v8sym->v8cs_name != NULL; v8sym++) {
+ mdbsym.sym_name = v8sym->v8cs_name;
+ mdbsym.sym_object = NULL;
+ mdbsym.sym_sym = NULL;
+ mdbsym.sym_table = 0;
+ mdbsym.sym_id = 0;
+
+ if ((rv = func(&mdbsym, arg)) != 0)
+ return (rv);
+ }
+
+ return (0);
+}
+
+/*
+ * Analog of mdb_readsym() for a canned configuration.
+ */
+static int
+v8cfg_canned_readsym(v8_cfg_t *cfgp, const char *name, intptr_t *valp)
+{
+ v8_cfg_symbol_t *v8sym;
+
+ for (v8sym = cfgp->v8cfg_symbols; v8sym->v8cs_name != NULL; v8sym++) {
+ if (strcmp(name, v8sym->v8cs_name) == 0)
+ break;
+ }
+
+ if (v8sym->v8cs_name == NULL)
+ return (-1);
+
+ *valp = v8sym->v8cs_value;
+ return (0);
+}
+
+/*
+ * Canned configuration for the V8 bundled with Node.js v0.4.8 and later.
+ */
+static v8_cfg_symbol_t v8_symbols_node_04[] = {
+ { "v8dbg_type_AccessCheckInfo__ACCESS_CHECK_INFO_TYPE", 0x91 },
+ { "v8dbg_type_AccessorInfo__ACCESSOR_INFO_TYPE", 0x90 },
+ { "v8dbg_type_BreakPointInfo__BREAK_POINT_INFO_TYPE", 0x9b },
+ { "v8dbg_type_ByteArray__BYTE_ARRAY_TYPE", 0x86 },
+ { "v8dbg_type_CallHandlerInfo__CALL_HANDLER_INFO_TYPE", 0x93 },
+ { "v8dbg_type_Code__CODE_TYPE", 0x81 },
+ { "v8dbg_type_CodeCache__CODE_CACHE_TYPE", 0x99 },
+ { "v8dbg_type_ConsString__CONS_ASCII_STRING_TYPE", 0x5 },
+ { "v8dbg_type_ConsString__CONS_ASCII_SYMBOL_TYPE", 0x45 },
+ { "v8dbg_type_ConsString__CONS_STRING_TYPE", 0x1 },
+ { "v8dbg_type_ConsString__CONS_SYMBOL_TYPE", 0x41 },
+ { "v8dbg_type_DebugInfo__DEBUG_INFO_TYPE", 0x9a },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_STRING_TYPE", 0x6 },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_SYMBOL_TYPE", 0x46 },
+ { "v8dbg_type_ExternalByteArray__EXTERNAL_BYTE_ARRAY_TYPE", 0x88 },
+ { "v8dbg_type_ExternalFloatArray__EXTERNAL_FLOAT_ARRAY_TYPE", 0x8e },
+ { "v8dbg_type_ExternalIntArray__EXTERNAL_INT_ARRAY_TYPE", 0x8c },
+ { "v8dbg_type_ExternalShortArray__EXTERNAL_SHORT_ARRAY_TYPE", 0x8a },
+ { "v8dbg_type_ExternalString__EXTERNAL_STRING_TYPE", 0x2 },
+ { "v8dbg_type_ExternalString__EXTERNAL_SYMBOL_TYPE", 0x42 },
+ { "v8dbg_type_ExternalUnsignedByteArray__"
+ "EXTERNAL_UNSIGNED_BYTE_ARRAY_TYPE", 0x89 },
+ { "v8dbg_type_ExternalUnsignedIntArray__"
+ "EXTERNAL_UNSIGNED_INT_ARRAY_TYPE", 0x8d },
+ { "v8dbg_type_ExternalUnsignedShortArray__"
+ "EXTERNAL_UNSIGNED_SHORT_ARRAY_TYPE", 0x8b },
+ { "v8dbg_type_FixedArray__FIXED_ARRAY_TYPE", 0x9c },
+ { "v8dbg_type_FunctionTemplateInfo__"
+ "FUNCTION_TEMPLATE_INFO_TYPE", 0x94 },
+ { "v8dbg_type_HeapNumber__HEAP_NUMBER_TYPE", 0x84 },
+ { "v8dbg_type_InterceptorInfo__INTERCEPTOR_INFO_TYPE", 0x92 },
+ { "v8dbg_type_JSArray__JS_ARRAY_TYPE", 0xa5 },
+ { "v8dbg_type_JSBuiltinsObject__JS_BUILTINS_OBJECT_TYPE", 0xa3 },
+ { "v8dbg_type_JSFunction__JS_FUNCTION_TYPE", 0xa7 },
+ { "v8dbg_type_JSGlobalObject__JS_GLOBAL_OBJECT_TYPE", 0xa2 },
+ { "v8dbg_type_JSGlobalPropertyCell__"
+ "JS_GLOBAL_PROPERTY_CELL_TYPE", 0x83 },
+ { "v8dbg_type_JSGlobalProxy__JS_GLOBAL_PROXY_TYPE", 0xa4 },
+ { "v8dbg_type_JSMessageObject__JS_MESSAGE_OBJECT_TYPE", 0x9e },
+ { "v8dbg_type_JSObject__JS_OBJECT_TYPE", 0xa0 },
+ { "v8dbg_type_JSRegExp__JS_REGEXP_TYPE", 0xa6 },
+ { "v8dbg_type_JSValue__JS_VALUE_TYPE", 0x9f },
+ { "v8dbg_type_Map__MAP_TYPE", 0x80 },
+ { "v8dbg_type_ObjectTemplateInfo__OBJECT_TEMPLATE_INFO_TYPE", 0x95 },
+ { "v8dbg_type_Oddball__ODDBALL_TYPE", 0x82 },
+ { "v8dbg_type_Script__SCRIPT_TYPE", 0x98 },
+ { "v8dbg_type_SeqAsciiString__ASCII_STRING_TYPE", 0x4 },
+ { "v8dbg_type_SeqAsciiString__ASCII_SYMBOL_TYPE", 0x44 },
+ { "v8dbg_type_SharedFunctionInfo__SHARED_FUNCTION_INFO_TYPE", 0x9d },
+ { "v8dbg_type_SignatureInfo__SIGNATURE_INFO_TYPE", 0x96 },
+ { "v8dbg_type_String__STRING_TYPE", 0x0 },
+ { "v8dbg_type_String__SYMBOL_TYPE", 0x40 },
+ { "v8dbg_type_TypeSwitchInfo__TYPE_SWITCH_INFO_TYPE", 0x97 },
+
+ { "v8dbg_class_AccessCheckInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessCheckInfo__indexed_callback__Object", 0x8 },
+ { "v8dbg_class_AccessCheckInfo__named_callback__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessorInfo__flag__Smi", 0x14 },
+ { "v8dbg_class_AccessorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__name__Object", 0x10 },
+ { "v8dbg_class_AccessorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_BreakPointInfo__break_point_objects__Object", 0x10 },
+ { "v8dbg_class_BreakPointInfo__code_position__Smi", 0x4 },
+ { "v8dbg_class_BreakPointInfo__source_position__Smi", 0x8 },
+ { "v8dbg_class_BreakPointInfo__statement_position__Smi", 0xc },
+ { "v8dbg_class_ByteArray__length__SMI", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__callback__Object", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__data__Object", 0x8 },
+ { "v8dbg_class_Code__deoptimization_data__FixedArray", 0xc },
+ { "v8dbg_class_Code__instruction_size__int", 0x4 },
+ { "v8dbg_class_Code__instruction_start__int", 0x20 },
+ { "v8dbg_class_Code__relocation_info__ByteArray", 0x8 },
+ { "v8dbg_class_CodeCache__default_cache__FixedArray", 0x4 },
+ { "v8dbg_class_CodeCache__normal_type_cache__Object", 0x8 },
+ { "v8dbg_class_ConsString__first__String", 0xc },
+ { "v8dbg_class_ConsString__second__String", 0x10 },
+ { "v8dbg_class_DebugInfo__break_points__FixedArray", 0x14 },
+ { "v8dbg_class_DebugInfo__code__Code", 0xc },
+ { "v8dbg_class_DebugInfo__original_code__Code", 0x8 },
+ { "v8dbg_class_DebugInfo__shared__SharedFunctionInfo", 0x4 },
+ { "v8dbg_class_ExternalString__resource__Object", 0xc },
+ { "v8dbg_class_FixedArray__data__uintptr_t", 0x8 },
+ { "v8dbg_class_FixedArray__length__SMI", 0x4 },
+ { "v8dbg_class_FunctionTemplateInfo__access_check_info__Object", 0x38 },
+ { "v8dbg_class_FunctionTemplateInfo__call_code__Object", 0x10 },
+ { "v8dbg_class_FunctionTemplateInfo__class_name__Object", 0x2c },
+ { "v8dbg_class_FunctionTemplateInfo__flag__Smi", 0x3c },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "indexed_property_handler__Object", 0x24 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "instance_call_handler__Object", 0x34 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_template__Object", 0x28 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "named_property_handler__Object", 0x20 },
+ { "v8dbg_class_FunctionTemplateInfo__parent_template__Object", 0x1c },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "property_accessors__Object", 0x14 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "prototype_template__Object", 0x18 },
+ { "v8dbg_class_FunctionTemplateInfo__serial_number__Object", 0xc },
+ { "v8dbg_class_FunctionTemplateInfo__signature__Object", 0x30 },
+ { "v8dbg_class_GlobalObject__builtins__JSBuiltinsObject", 0xc },
+ { "v8dbg_class_GlobalObject__global_context__Context", 0x10 },
+ { "v8dbg_class_GlobalObject__global_receiver__JSObject", 0x14 },
+ { "v8dbg_class_HeapNumber__value__SMI", 0x4 },
+ { "v8dbg_class_HeapObject__map__Map", 0x0 },
+ { "v8dbg_class_InterceptorInfo__data__Object", 0x18 },
+ { "v8dbg_class_InterceptorInfo__deleter__Object", 0x10 },
+ { "v8dbg_class_InterceptorInfo__enumerator__Object", 0x14 },
+ { "v8dbg_class_InterceptorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_InterceptorInfo__query__Object", 0xc },
+ { "v8dbg_class_InterceptorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_JSArray__length__Object", 0xc },
+ { "v8dbg_class_JSFunction__literals__FixedArray", 0x1c },
+ { "v8dbg_class_JSFunction__next_function_link__Object", 0x20 },
+ { "v8dbg_class_JSFunction__prototype_or_initial_map__Object", 0x10 },
+ { "v8dbg_class_JSFunction__shared__SharedFunctionInfo", 0x14 },
+ { "v8dbg_class_JSGlobalProxy__context__Object", 0xc },
+ { "v8dbg_class_JSMessageObject__arguments__JSArray", 0x10 },
+ { "v8dbg_class_JSMessageObject__end_position__SMI", 0x24 },
+ { "v8dbg_class_JSMessageObject__script__Object", 0x14 },
+ { "v8dbg_class_JSMessageObject__stack_frames__Object", 0x1c },
+ { "v8dbg_class_JSMessageObject__stack_trace__Object", 0x18 },
+ { "v8dbg_class_JSMessageObject__start_position__SMI", 0x20 },
+ { "v8dbg_class_JSMessageObject__type__String", 0xc },
+ { "v8dbg_class_JSObject__elements__Object", 0x8 },
+ { "v8dbg_class_JSObject__properties__FixedArray", 0x4 },
+ { "v8dbg_class_JSRegExp__data__Object", 0xc },
+ { "v8dbg_class_JSValue__value__Object", 0xc },
+ { "v8dbg_class_Map__code_cache__Object", 0x18 },
+ { "v8dbg_class_Map__constructor__Object", 0x10 },
+ { "v8dbg_class_Map__inobject_properties__int", 0x5 },
+ { "v8dbg_class_Map__instance_size__int", 0x4 },
+ { "v8dbg_class_Map__instance_attributes__int", 0x8 },
+ { "v8dbg_class_Map__instance_descriptors__DescriptorArray", 0x14 },
+ { "v8dbg_class_ObjectTemplateInfo__constructor__Object", 0xc },
+ { "v8dbg_class_ObjectTemplateInfo__"
+ "internal_field_count__Object", 0x10 },
+ { "v8dbg_class_Oddball__to_number__Object", 0x8 },
+ { "v8dbg_class_Oddball__to_string__String", 0x4 },
+ { "v8dbg_class_Script__column_offset__Smi", 0x10 },
+ { "v8dbg_class_Script__compilation_type__Smi", 0x24 },
+ { "v8dbg_class_Script__context_data__Object", 0x18 },
+ { "v8dbg_class_Script__data__Object", 0x14 },
+ { "v8dbg_class_Script__eval_from_instructions_offset__Smi", 0x34 },
+ { "v8dbg_class_Script__eval_from_shared__Object", 0x30 },
+ { "v8dbg_class_Script__id__Object", 0x2c },
+ { "v8dbg_class_Script__line_ends__Object", 0x28 },
+ { "v8dbg_class_Script__line_offset__Smi", 0xc },
+ { "v8dbg_class_Script__name__Object", 0x8 },
+ { "v8dbg_class_Script__source__Object", 0x4 },
+ { "v8dbg_class_Script__type__Smi", 0x20 },
+ { "v8dbg_class_Script__wrapper__Proxy", 0x1c },
+ { "v8dbg_class_SeqAsciiString__chars__char", 0xc },
+ { "v8dbg_class_SharedFunctionInfo__code__Code", 0x8 },
+ { "v8dbg_class_SharedFunctionInfo__compiler_hints__SMI", 0x50 },
+ { "v8dbg_class_SharedFunctionInfo__construct_stub__Code", 0x10 },
+ { "v8dbg_class_SharedFunctionInfo__debug_info__Object", 0x20 },
+ { "v8dbg_class_SharedFunctionInfo__end_position__SMI", 0x48 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "expected_nof_properties__SMI", 0x3c },
+ { "v8dbg_class_SharedFunctionInfo__formal_parameter_count__SMI", 0x38 },
+ { "v8dbg_class_SharedFunctionInfo__function_data__Object", 0x18 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "function_token_position__SMI", 0x4c },
+ { "v8dbg_class_SharedFunctionInfo__inferred_name__String", 0x24 },
+ { "v8dbg_class_SharedFunctionInfo__initial_map__Object", 0x28 },
+ { "v8dbg_class_SharedFunctionInfo__instance_class_name__Object", 0x14 },
+ { "v8dbg_class_SharedFunctionInfo__length__SMI", 0x34 },
+ { "v8dbg_class_SharedFunctionInfo__name__Object", 0x4 },
+ { "v8dbg_class_SharedFunctionInfo__num_literals__SMI", 0x40 },
+ { "v8dbg_class_SharedFunctionInfo__opt_count__SMI", 0x58 },
+ { "v8dbg_class_SharedFunctionInfo__script__Object", 0x1c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "start_position_and_type__SMI", 0x44 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments__Object", 0x2c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments_count__SMI", 0x54 },
+ { "v8dbg_class_SignatureInfo__args__Object", 0x8 },
+ { "v8dbg_class_SignatureInfo__receiver__Object", 0x4 },
+ { "v8dbg_class_String__length__SMI", 0x4 },
+ { "v8dbg_class_TemplateInfo__property_list__Object", 0x8 },
+ { "v8dbg_class_TemplateInfo__tag__Object", 0x4 },
+ { "v8dbg_class_TypeSwitchInfo__types__Object", 0x4 },
+
+ { "v8dbg_parent_AccessCheckInfo__Struct", 0x0 },
+ { "v8dbg_parent_AccessorInfo__Struct", 0x0 },
+ { "v8dbg_parent_BreakPointInfo__Struct", 0x0 },
+ { "v8dbg_parent_ByteArray__HeapObject", 0x0 },
+ { "v8dbg_parent_CallHandlerInfo__Struct", 0x0 },
+ { "v8dbg_parent_Code__HeapObject", 0x0 },
+ { "v8dbg_parent_CodeCache__Struct", 0x0 },
+ { "v8dbg_parent_ConsString__String", 0x0 },
+ { "v8dbg_parent_DebugInfo__Struct", 0x0 },
+ { "v8dbg_parent_DeoptimizationInputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DeoptimizationOutputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DescriptorArray__FixedArray", 0x0 },
+ { "v8dbg_parent_ExternalArray__HeapObject", 0x0 },
+ { "v8dbg_parent_ExternalAsciiString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalFloatArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalString__String", 0x0 },
+ { "v8dbg_parent_ExternalTwoByteString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_Failure__MaybeObject", 0x0 },
+ { "v8dbg_parent_FixedArray__HeapObject", 0x0 },
+ { "v8dbg_parent_FunctionTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_GlobalObject__JSObject", 0x0 },
+ { "v8dbg_parent_HeapNumber__HeapObject", 0x0 },
+ { "v8dbg_parent_HeapObject__Object", 0x0 },
+ { "v8dbg_parent_InterceptorInfo__Struct", 0x0 },
+ { "v8dbg_parent_JSArray__JSObject", 0x0 },
+ { "v8dbg_parent_JSBuiltinsObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSFunction__JSObject", 0x0 },
+ { "v8dbg_parent_JSFunctionResultCache__FixedArray", 0x0 },
+ { "v8dbg_parent_JSGlobalObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSGlobalPropertyCell__HeapObject", 0x0 },
+ { "v8dbg_parent_JSGlobalProxy__JSObject", 0x0 },
+ { "v8dbg_parent_JSMessageObject__JSObject", 0x0 },
+ { "v8dbg_parent_JSObject__HeapObject", 0x0 },
+ { "v8dbg_parent_JSRegExp__JSObject", 0x0 },
+ { "v8dbg_parent_JSRegExpResult__JSArray", 0x0 },
+ { "v8dbg_parent_JSValue__JSObject", 0x0 },
+ { "v8dbg_parent_Map__HeapObject", 0x0 },
+ { "v8dbg_parent_NormalizedMapCache__FixedArray", 0x0 },
+ { "v8dbg_parent_Object__MaybeObject", 0x0 },
+ { "v8dbg_parent_ObjectTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_Oddball__HeapObject", 0x0 },
+ { "v8dbg_parent_Script__Struct", 0x0 },
+ { "v8dbg_parent_SeqAsciiString__SeqString", 0x0 },
+ { "v8dbg_parent_SeqString__String", 0x0 },
+ { "v8dbg_parent_SeqTwoByteString__SeqString", 0x0 },
+ { "v8dbg_parent_SharedFunctionInfo__HeapObject", 0x0 },
+ { "v8dbg_parent_SignatureInfo__Struct", 0x0 },
+ { "v8dbg_parent_Smi__Object", 0x0 },
+ { "v8dbg_parent_String__HeapObject", 0x0 },
+ { "v8dbg_parent_Struct__HeapObject", 0x0 },
+ { "v8dbg_parent_TemplateInfo__Struct", 0x0 },
+ { "v8dbg_parent_TypeSwitchInfo__Struct", 0x0 },
+
+ { "v8dbg_frametype_ArgumentsAdaptorFrame", 0x8 },
+ { "v8dbg_frametype_ConstructFrame", 0x7 },
+ { "v8dbg_frametype_EntryConstructFrame", 0x2 },
+ { "v8dbg_frametype_EntryFrame", 0x1 },
+ { "v8dbg_frametype_ExitFrame", 0x3 },
+ { "v8dbg_frametype_InternalFrame", 0x6 },
+ { "v8dbg_frametype_JavaScriptFrame", 0x4 },
+ { "v8dbg_frametype_OptimizedFrame", 0x5 },
+
+ { "v8dbg_off_fp_context", -0x4 },
+ { "v8dbg_off_fp_function", -0x8 },
+ { "v8dbg_off_fp_marker", -0x8 },
+ { "v8dbg_off_fp_args", 0x8 },
+
+ { "v8dbg_prop_idx_content", 0x0 },
+ { "v8dbg_prop_idx_first", 0x2 },
+ { "v8dbg_prop_type_field", 0x1 },
+ { "v8dbg_prop_type_first_phantom", 0x6 },
+ { "v8dbg_prop_type_mask", 0xf },
+
+ { "v8dbg_AsciiStringTag", 0x4 },
+ { "v8dbg_ConsStringTag", 0x1 },
+ { "v8dbg_ExternalStringTag", 0x2 },
+ { "v8dbg_FailureTag", 0x3 },
+ { "v8dbg_FailureTagMask", 0x3 },
+ { "v8dbg_FirstNonstringType", 0x80 },
+ { "v8dbg_HeapObjectTag", 0x1 },
+ { "v8dbg_HeapObjectTagMask", 0x3 },
+ { "v8dbg_IsNotStringMask", 0x80 },
+ { "v8dbg_NotStringTag", 0x80 },
+ { "v8dbg_SeqStringTag", 0x0 },
+ { "v8dbg_SmiTag", 0x0 },
+ { "v8dbg_SmiTagMask", 0x1 },
+ { "v8dbg_SmiValueShift", 0x1 },
+ { "v8dbg_StringEncodingMask", 0x4 },
+ { "v8dbg_StringRepresentationMask", 0x3 },
+ { "v8dbg_StringTag", 0x0 },
+ { "v8dbg_TwoByteStringTag", 0x0 },
+ { "v8dbg_PointerSizeLog2", 0x2 },
+
+ { NULL }
+};
+
+/*
+ * Canned configuration for the V8 bundled with Node.js v0.6.5.
+ */
+static v8_cfg_symbol_t v8_symbols_node_06[] = {
+ { "v8dbg_type_AccessCheckInfo__ACCESS_CHECK_INFO_TYPE", 0x93 },
+ { "v8dbg_type_AccessorInfo__ACCESSOR_INFO_TYPE", 0x92 },
+ { "v8dbg_type_BreakPointInfo__BREAK_POINT_INFO_TYPE", 0x9e },
+ { "v8dbg_type_ByteArray__BYTE_ARRAY_TYPE", 0x86 },
+ { "v8dbg_type_CallHandlerInfo__CALL_HANDLER_INFO_TYPE", 0x95 },
+ { "v8dbg_type_Code__CODE_TYPE", 0x81 },
+ { "v8dbg_type_CodeCache__CODE_CACHE_TYPE", 0x9b },
+ { "v8dbg_type_ConsString__CONS_ASCII_STRING_TYPE", 0x5 },
+ { "v8dbg_type_ConsString__CONS_ASCII_SYMBOL_TYPE", 0x45 },
+ { "v8dbg_type_ConsString__CONS_STRING_TYPE", 0x1 },
+ { "v8dbg_type_ConsString__CONS_SYMBOL_TYPE", 0x41 },
+ { "v8dbg_type_DebugInfo__DEBUG_INFO_TYPE", 0x9d },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_STRING_TYPE", 0x6 },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_SYMBOL_TYPE", 0x46 },
+ { "v8dbg_type_ExternalByteArray__EXTERNAL_BYTE_ARRAY_TYPE", 0x87 },
+ { "v8dbg_type_ExternalDoubleArray__EXTERNAL_DOUBLE_ARRAY_TYPE", 0x8e },
+ { "v8dbg_type_ExternalFloatArray__EXTERNAL_FLOAT_ARRAY_TYPE", 0x8d },
+ { "v8dbg_type_ExternalIntArray__EXTERNAL_INT_ARRAY_TYPE", 0x8b },
+ { "v8dbg_type_ExternalPixelArray__EXTERNAL_PIXEL_ARRAY_TYPE", 0x8f },
+ { "v8dbg_type_ExternalShortArray__EXTERNAL_SHORT_ARRAY_TYPE", 0x89 },
+ { "v8dbg_type_ExternalTwoByteString__EXTERNAL_STRING_TYPE", 0x2 },
+ { "v8dbg_type_ExternalTwoByteString__EXTERNAL_SYMBOL_TYPE", 0x42 },
+ { "v8dbg_type_ExternalUnsignedByteArray__"
+ "EXTERNAL_UNSIGNED_BYTE_ARRAY_TYPE", 0x88 },
+ { "v8dbg_type_ExternalUnsignedIntArray__"
+ "EXTERNAL_UNSIGNED_INT_ARRAY_TYPE", 0x8c },
+ { "v8dbg_type_ExternalUnsignedShortArray__"
+ "EXTERNAL_UNSIGNED_SHORT_ARRAY_TYPE", 0x8a },
+ { "v8dbg_type_FixedArray__FIXED_ARRAY_TYPE", 0x9f },
+ { "v8dbg_type_FixedDoubleArray__FIXED_DOUBLE_ARRAY_TYPE", 0x90 },
+ { "v8dbg_type_Foreign__FOREIGN_TYPE", 0x85 },
+ { "v8dbg_type_FunctionTemplateInfo__FUNCTION_TEMPLATE_INFO_TYPE",
+ 0x96 },
+ { "v8dbg_type_HeapNumber__HEAP_NUMBER_TYPE", 0x84 },
+ { "v8dbg_type_InterceptorInfo__INTERCEPTOR_INFO_TYPE", 0x94 },
+ { "v8dbg_type_JSArray__JS_ARRAY_TYPE", 0xa8 },
+ { "v8dbg_type_JSBuiltinsObject__JS_BUILTINS_OBJECT_TYPE", 0xa6 },
+ { "v8dbg_type_JSFunction__JS_FUNCTION_TYPE", 0xac },
+ { "v8dbg_type_JSFunctionProxy__JS_FUNCTION_PROXY_TYPE", 0xad },
+ { "v8dbg_type_JSGlobalObject__JS_GLOBAL_OBJECT_TYPE", 0xa5 },
+ { "v8dbg_type_JSGlobalPropertyCell__JS_GLOBAL_PROPERTY_CELL_TYPE",
+ 0x83 },
+ { "v8dbg_type_JSMessageObject__JS_MESSAGE_OBJECT_TYPE", 0xa1 },
+ { "v8dbg_type_JSObject__JS_OBJECT_TYPE", 0xa3 },
+ { "v8dbg_type_JSProxy__JS_PROXY_TYPE", 0xa9 },
+ { "v8dbg_type_JSRegExp__JS_REGEXP_TYPE", 0xab },
+ { "v8dbg_type_JSValue__JS_VALUE_TYPE", 0xa2 },
+ { "v8dbg_type_JSWeakMap__JS_WEAK_MAP_TYPE", 0xaa },
+ { "v8dbg_type_Map__MAP_TYPE", 0x80 },
+ { "v8dbg_type_ObjectTemplateInfo__OBJECT_TEMPLATE_INFO_TYPE", 0x97 },
+ { "v8dbg_type_Oddball__ODDBALL_TYPE", 0x82 },
+ { "v8dbg_type_PolymorphicCodeCache__POLYMORPHIC_CODE_CACHE_TYPE",
+ 0x9c },
+ { "v8dbg_type_Script__SCRIPT_TYPE", 0x9a },
+ { "v8dbg_type_SeqAsciiString__ASCII_STRING_TYPE", 0x4 },
+ { "v8dbg_type_SeqAsciiString__ASCII_SYMBOL_TYPE", 0x44 },
+ { "v8dbg_type_SeqTwoByteString__STRING_TYPE", 0x0 },
+ { "v8dbg_type_SeqTwoByteString__SYMBOL_TYPE", 0x40 },
+ { "v8dbg_type_SharedFunctionInfo__SHARED_FUNCTION_INFO_TYPE", 0xa0 },
+ { "v8dbg_type_SignatureInfo__SIGNATURE_INFO_TYPE", 0x98 },
+ { "v8dbg_type_SlicedString__SLICED_ASCII_STRING_TYPE", 0x7 },
+ { "v8dbg_type_SlicedString__SLICED_STRING_TYPE", 0x3 },
+ { "v8dbg_type_TypeSwitchInfo__TYPE_SWITCH_INFO_TYPE", 0x99 },
+
+ { "v8dbg_class_AccessCheckInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessCheckInfo__indexed_callback__Object", 0x8 },
+ { "v8dbg_class_AccessCheckInfo__named_callback__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessorInfo__flag__Smi", 0x14 },
+ { "v8dbg_class_AccessorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__name__Object", 0x10 },
+ { "v8dbg_class_AccessorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_BreakPointInfo__break_point_objects__Object", 0x10 },
+ { "v8dbg_class_BreakPointInfo__code_position__Smi", 0x4 },
+ { "v8dbg_class_BreakPointInfo__source_position__Smi", 0x8 },
+ { "v8dbg_class_BreakPointInfo__statement_position__Smi", 0xc },
+ { "v8dbg_class_CallHandlerInfo__callback__Object", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__data__Object", 0x8 },
+ { "v8dbg_class_Code__deoptimization_data__FixedArray", 0xc },
+ { "v8dbg_class_Code__instruction_size__int", 0x4 },
+ { "v8dbg_class_Code__instruction_start__int", 0x20 },
+ { "v8dbg_class_Code__next_code_flushing_candidate__Object", 0x10 },
+ { "v8dbg_class_Code__relocation_info__ByteArray", 0x8 },
+ { "v8dbg_class_CodeCache__default_cache__FixedArray", 0x4 },
+ { "v8dbg_class_CodeCache__normal_type_cache__Object", 0x8 },
+ { "v8dbg_class_ConsString__first__String", 0xc },
+ { "v8dbg_class_ConsString__second__String", 0x10 },
+ { "v8dbg_class_DebugInfo__break_points__FixedArray", 0x14 },
+ { "v8dbg_class_DebugInfo__code__Code", 0xc },
+ { "v8dbg_class_DebugInfo__original_code__Code", 0x8 },
+ { "v8dbg_class_DebugInfo__shared__SharedFunctionInfo", 0x4 },
+ { "v8dbg_class_ExternalString__resource__Object", 0xc },
+ { "v8dbg_class_FixedArray__data__uintptr_t", 0x8 },
+ { "v8dbg_class_FixedArrayBase__length__SMI", 0x4 },
+ { "v8dbg_class_FunctionTemplateInfo__access_check_info__Object", 0x38 },
+ { "v8dbg_class_FunctionTemplateInfo__call_code__Object", 0x10 },
+ { "v8dbg_class_FunctionTemplateInfo__class_name__Object", 0x2c },
+ { "v8dbg_class_FunctionTemplateInfo__flag__Smi", 0x3c },
+ { "v8dbg_class_FunctionTemplateInfo__indexed_property_handler__Object",
+ 0x24 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_call_handler__Object",
+ 0x34 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_template__Object", 0x28 },
+ { "v8dbg_class_FunctionTemplateInfo__named_property_handler__Object",
+ 0x20 },
+ { "v8dbg_class_FunctionTemplateInfo__parent_template__Object", 0x1c },
+ { "v8dbg_class_FunctionTemplateInfo__property_accessors__Object",
+ 0x14 },
+ { "v8dbg_class_FunctionTemplateInfo__prototype_template__Object",
+ 0x18 },
+ { "v8dbg_class_FunctionTemplateInfo__serial_number__Object", 0xc },
+ { "v8dbg_class_FunctionTemplateInfo__signature__Object", 0x30 },
+ { "v8dbg_class_GlobalObject__builtins__JSBuiltinsObject", 0xc },
+ { "v8dbg_class_GlobalObject__global_context__Context", 0x10 },
+ { "v8dbg_class_GlobalObject__global_receiver__JSObject", 0x14 },
+ { "v8dbg_class_HeapNumber__value__double", 0x4 },
+ { "v8dbg_class_HeapObject__map__Map", 0x0 },
+ { "v8dbg_class_InterceptorInfo__data__Object", 0x18 },
+ { "v8dbg_class_InterceptorInfo__deleter__Object", 0x10 },
+ { "v8dbg_class_InterceptorInfo__enumerator__Object", 0x14 },
+ { "v8dbg_class_InterceptorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_InterceptorInfo__query__Object", 0xc },
+ { "v8dbg_class_InterceptorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_JSArray__length__Object", 0xc },
+ { "v8dbg_class_JSFunction__literals__FixedArray", 0x1c },
+ { "v8dbg_class_JSFunction__next_function_link__Object", 0x20 },
+ { "v8dbg_class_JSFunction__prototype_or_initial_map__Object", 0x10 },
+ { "v8dbg_class_JSFunction__shared__SharedFunctionInfo", 0x14 },
+ { "v8dbg_class_JSFunctionProxy__call_trap__Object", 0x8 },
+ { "v8dbg_class_JSFunctionProxy__construct_trap__Object", 0xc },
+ { "v8dbg_class_JSGlobalProxy__context__Object", 0xc },
+ { "v8dbg_class_JSMessageObject__arguments__JSArray", 0x10 },
+ { "v8dbg_class_JSMessageObject__end_position__SMI", 0x24 },
+ { "v8dbg_class_JSMessageObject__script__Object", 0x14 },
+ { "v8dbg_class_JSMessageObject__stack_frames__Object", 0x1c },
+ { "v8dbg_class_JSMessageObject__stack_trace__Object", 0x18 },
+ { "v8dbg_class_JSMessageObject__start_position__SMI", 0x20 },
+ { "v8dbg_class_JSMessageObject__type__String", 0xc },
+ { "v8dbg_class_JSObject__elements__Object", 0x8 },
+ { "v8dbg_class_JSObject__properties__FixedArray", 0x4 },
+ { "v8dbg_class_JSProxy__handler__Object", 0x4 },
+ { "v8dbg_class_JSRegExp__data__Object", 0xc },
+ { "v8dbg_class_JSValue__value__Object", 0xc },
+ { "v8dbg_class_JSWeakMap__next__Object", 0x10 },
+ { "v8dbg_class_JSWeakMap__table__ObjectHashTable", 0xc },
+ { "v8dbg_class_Map__code_cache__Object", 0x18 },
+ { "v8dbg_class_Map__constructor__Object", 0x10 },
+ { "v8dbg_class_Map__inobject_properties__int", 0x5 },
+ { "v8dbg_class_Map__instance_attributes__int", 0x8 },
+ { "v8dbg_class_Map__instance_descriptors__FixedArray", 0x14 },
+ { "v8dbg_class_Map__instance_size__int", 0x4 },
+ { "v8dbg_class_Map__prototype_transitions__FixedArray", 0x1c },
+ { "v8dbg_class_ObjectTemplateInfo__constructor__Object", 0xc },
+ { "v8dbg_class_ObjectTemplateInfo__internal_field_count__Object",
+ 0x10 },
+ { "v8dbg_class_Oddball__to_number__Object", 0x8 },
+ { "v8dbg_class_Oddball__to_string__String", 0x4 },
+ { "v8dbg_class_PolymorphicCodeCache__cache__Object", 0x4 },
+ { "v8dbg_class_Script__column_offset__Smi", 0x10 },
+ { "v8dbg_class_Script__compilation_type__Smi", 0x24 },
+ { "v8dbg_class_Script__context_data__Object", 0x18 },
+ { "v8dbg_class_Script__data__Object", 0x14 },
+ { "v8dbg_class_Script__eval_from_instructions_offset__Smi", 0x34 },
+ { "v8dbg_class_Script__eval_from_shared__Object", 0x30 },
+ { "v8dbg_class_Script__id__Object", 0x2c },
+ { "v8dbg_class_Script__line_ends__Object", 0x28 },
+ { "v8dbg_class_Script__line_offset__Smi", 0xc },
+ { "v8dbg_class_Script__name__Object", 0x8 },
+ { "v8dbg_class_Script__source__Object", 0x4 },
+ { "v8dbg_class_Script__type__Smi", 0x20 },
+ { "v8dbg_class_Script__wrapper__Foreign", 0x1c },
+ { "v8dbg_class_SeqAsciiString__chars__char", 0xc },
+ { "v8dbg_class_SharedFunctionInfo__code__Code", 0x8 },
+ { "v8dbg_class_SharedFunctionInfo__compiler_hints__SMI", 0x50 },
+ { "v8dbg_class_SharedFunctionInfo__construct_stub__Code", 0x10 },
+ { "v8dbg_class_SharedFunctionInfo__debug_info__Object", 0x20 },
+ { "v8dbg_class_SharedFunctionInfo__end_position__SMI", 0x48 },
+ { "v8dbg_class_SharedFunctionInfo__expected_nof_properties__SMI",
+ 0x3c },
+ { "v8dbg_class_SharedFunctionInfo__formal_parameter_count__SMI", 0x38 },
+ { "v8dbg_class_SharedFunctionInfo__function_data__Object", 0x18 },
+ { "v8dbg_class_SharedFunctionInfo__function_token_position__SMI",
+ 0x4c },
+ { "v8dbg_class_SharedFunctionInfo__inferred_name__String", 0x24 },
+ { "v8dbg_class_SharedFunctionInfo__initial_map__Object", 0x28 },
+ { "v8dbg_class_SharedFunctionInfo__instance_class_name__Object", 0x14 },
+ { "v8dbg_class_SharedFunctionInfo__length__SMI", 0x34 },
+ { "v8dbg_class_SharedFunctionInfo__name__Object", 0x4 },
+ { "v8dbg_class_SharedFunctionInfo__num_literals__SMI", 0x40 },
+ { "v8dbg_class_SharedFunctionInfo__opt_count__SMI", 0x58 },
+ { "v8dbg_class_SharedFunctionInfo__script__Object", 0x1c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "start_position_and_type__SMI", 0x44 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments__Object", 0x2c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments_count__SMI", 0x54 },
+ { "v8dbg_class_SignatureInfo__args__Object", 0x8 },
+ { "v8dbg_class_SignatureInfo__receiver__Object", 0x4 },
+ { "v8dbg_class_SlicedString__offset__SMI", 0x10 },
+ { "v8dbg_class_String__length__SMI", 0x4 },
+ { "v8dbg_class_TemplateInfo__property_list__Object", 0x8 },
+ { "v8dbg_class_TemplateInfo__tag__Object", 0x4 },
+ { "v8dbg_class_TypeSwitchInfo__types__Object", 0x4 },
+
+ { "v8dbg_parent_AccessCheckInfo__Struct", 0x0 },
+ { "v8dbg_parent_AccessorInfo__Struct", 0x0 },
+ { "v8dbg_parent_BreakPointInfo__Struct", 0x0 },
+ { "v8dbg_parent_ByteArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_CallHandlerInfo__Struct", 0x0 },
+ { "v8dbg_parent_Code__HeapObject", 0x0 },
+ { "v8dbg_parent_CodeCache__Struct", 0x0 },
+ { "v8dbg_parent_ConsString__String", 0x0 },
+ { "v8dbg_parent_DebugInfo__Struct", 0x0 },
+ { "v8dbg_parent_DeoptimizationInputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DeoptimizationOutputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DescriptorArray__FixedArray", 0x0 },
+ { "v8dbg_parent_ExternalArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_ExternalAsciiString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalDoubleArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalFloatArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalPixelArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalString__String", 0x0 },
+ { "v8dbg_parent_ExternalTwoByteString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_Failure__MaybeObject", 0x0 },
+ { "v8dbg_parent_FixedArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_FixedArrayBase__HeapObject", 0x0 },
+ { "v8dbg_parent_FixedDoubleArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_Foreign__HeapObject", 0x0 },
+ { "v8dbg_parent_FunctionTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_GlobalObject__JSObject", 0x0 },
+ { "v8dbg_parent_HashTable__FixedArray", 0x0 },
+ { "v8dbg_parent_HeapNumber__HeapObject", 0x0 },
+ { "v8dbg_parent_HeapObject__Object", 0x0 },
+ { "v8dbg_parent_InterceptorInfo__Struct", 0x0 },
+ { "v8dbg_parent_JSArray__JSObject", 0x0 },
+ { "v8dbg_parent_JSBuiltinsObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSFunction__JSObject", 0x0 },
+ { "v8dbg_parent_JSFunctionProxy__JSProxy", 0x0 },
+ { "v8dbg_parent_JSFunctionResultCache__FixedArray", 0x0 },
+ { "v8dbg_parent_JSGlobalObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSGlobalPropertyCell__HeapObject", 0x0 },
+ { "v8dbg_parent_JSMessageObject__JSObject", 0x0 },
+ { "v8dbg_parent_JSObject__JSReceiver", 0x0 },
+ { "v8dbg_parent_JSProxy__JSReceiver", 0x0 },
+ { "v8dbg_parent_JSReceiver__HeapObject", 0x0 },
+ { "v8dbg_parent_JSRegExp__JSObject", 0x0 },
+ { "v8dbg_parent_JSRegExpResult__JSArray", 0x0 },
+ { "v8dbg_parent_JSValue__JSObject", 0x0 },
+ { "v8dbg_parent_JSWeakMap__JSObject", 0x0 },
+ { "v8dbg_parent_Map__HeapObject", 0x0 },
+ { "v8dbg_parent_NormalizedMapCache__FixedArray", 0x0 },
+ { "v8dbg_parent_ObjectTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_Oddball__HeapObject", 0x0 },
+ { "v8dbg_parent_PolymorphicCodeCache__Struct", 0x0 },
+ { "v8dbg_parent_Script__Struct", 0x0 },
+ { "v8dbg_parent_SeqAsciiString__SeqString", 0x0 },
+ { "v8dbg_parent_SeqString__String", 0x0 },
+ { "v8dbg_parent_SeqTwoByteString__SeqString", 0x0 },
+ { "v8dbg_parent_SharedFunctionInfo__HeapObject", 0x0 },
+ { "v8dbg_parent_SignatureInfo__Struct", 0x0 },
+ { "v8dbg_parent_SlicedString__String", 0x0 },
+ { "v8dbg_parent_Smi__Object", 0x0 },
+ { "v8dbg_parent_String__HeapObject", 0x0 },
+ { "v8dbg_parent_Struct__HeapObject", 0x0 },
+ { "v8dbg_parent_TemplateInfo__Struct", 0x0 },
+ { "v8dbg_parent_TypeSwitchInfo__Struct", 0x0 },
+
+ { "v8dbg_frametype_ArgumentsAdaptorFrame", 0x8 },
+ { "v8dbg_frametype_ConstructFrame", 0x7 },
+ { "v8dbg_frametype_EntryConstructFrame", 0x2 },
+ { "v8dbg_frametype_EntryFrame", 0x1 },
+ { "v8dbg_frametype_ExitFrame", 0x3 },
+ { "v8dbg_frametype_InternalFrame", 0x6 },
+ { "v8dbg_frametype_JavaScriptFrame", 0x4 },
+ { "v8dbg_frametype_OptimizedFrame", 0x5 },
+
+ { "v8dbg_off_fp_args", 0x8 },
+ { "v8dbg_off_fp_context", -0x4 },
+ { "v8dbg_off_fp_function", -0x8 },
+ { "v8dbg_off_fp_marker", -0x8 },
+
+ { "v8dbg_prop_idx_content", 0x1 },
+ { "v8dbg_prop_idx_first", 0x3 },
+ { "v8dbg_prop_type_field", 0x1 },
+ { "v8dbg_prop_type_first_phantom", 0x6 },
+ { "v8dbg_prop_type_mask", 0xf },
+
+ { "v8dbg_AsciiStringTag", 0x4 },
+ { "v8dbg_PointerSizeLog2", 0x2 },
+ { "v8dbg_SeqStringTag", 0x0 },
+ { "v8dbg_SmiTag", 0x0 },
+ { "v8dbg_SmiTagMask", 0x1 },
+ { "v8dbg_SmiValueShift", 0x1 },
+ { "v8dbg_StringEncodingMask", 0x4 },
+ { "v8dbg_StringRepresentationMask", 0x3 },
+ { "v8dbg_StringTag", 0x0 },
+ { "v8dbg_TwoByteStringTag", 0x0 },
+ { "v8dbg_ConsStringTag", 0x1 },
+ { "v8dbg_ExternalStringTag", 0x2 },
+ { "v8dbg_FailureTag", 0x3 },
+ { "v8dbg_FailureTagMask", 0x3 },
+ { "v8dbg_FirstNonstringType", 0x80 },
+ { "v8dbg_HeapObjectTag", 0x1 },
+ { "v8dbg_HeapObjectTagMask", 0x3 },
+ { "v8dbg_IsNotStringMask", 0x80 },
+ { "v8dbg_NotStringTag", 0x80 },
+
+ { NULL },
+};
+
+v8_cfg_t v8_cfg_04 = { "node-0.4", "node v0.4", v8_symbols_node_04,
+ v8cfg_canned_iter, v8cfg_canned_readsym };
+
+v8_cfg_t v8_cfg_06 = { "node-0.6", "node v0.6", v8_symbols_node_06,
+ v8cfg_canned_iter, v8cfg_canned_readsym };
+
+v8_cfg_t *v8_cfgs[] = {
+ &v8_cfg_04,
+ &v8_cfg_06,
+ NULL
+};
+
+v8_cfg_t v8_cfg_target = { NULL, NULL, NULL, v8cfg_target_iter,
+ v8cfg_target_readsym };
diff --git a/usr/src/cmd/mdb/common/modules/v8/v8cfg.h b/usr/src/cmd/mdb/common/modules/v8/v8cfg.h
new file mode 100644
index 0000000000..9e722b0e2b
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/v8cfg.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * v8cfg.h: canned configurations for previous V8 versions
+ */
+
+#ifndef V8CFG_H
+#define V8CFG_H
+
+#include <sys/types.h>
+#include <sys/mdb_modapi.h>
+
+typedef struct {
+ const char *v8cs_name; /* symbol name */
+ intptr_t v8cs_value; /* symbol value */
+} v8_cfg_symbol_t;
+
+typedef struct v8_cfg {
+ const char *v8cfg_name; /* canned config name */
+ const char *v8cfg_label; /* description */
+ v8_cfg_symbol_t *v8cfg_symbols; /* actual symbol values */
+
+ int (*v8cfg_iter)(struct v8_cfg *, int (*)(mdb_symbol_t *, void *),
+ void *);
+ int (*v8cfg_readsym)(struct v8_cfg *, const char *, intptr_t *);
+} v8_cfg_t;
+
+extern v8_cfg_t v8_cfg_04;
+extern v8_cfg_t v8_cfg_06;
+extern v8_cfg_t v8_cfg_target;
+extern v8_cfg_t *v8_cfgs[];
+
+#endif /* V8CFG_H */
diff --git a/usr/src/cmd/mdb/common/modules/v8/v8dbg.h b/usr/src/cmd/mdb/common/modules/v8/v8dbg.h
new file mode 100644
index 0000000000..36a30a79ef
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/v8dbg.h
@@ -0,0 +1,83 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * v8dbg.h: macros for use by V8 heap inspection tools. The consumer must
+ * define values for various tags and shifts. The MDB module gets these
+ * constants from information encoded in the binary itself.
+ */
+
+#ifndef _V8DBG_H
+#define _V8DBG_H
+
+/*
+ * Recall that while V8 heap objects are always 4-byte aligned, heap object
+ * pointers always have the last bit set. So when looking for a field nominally
+ * at offset X, one must be sure to clear the tag bit first.
+ */
+#define V8_OFF_HEAP(x) ((x) - V8_HeapObjectTag)
+
+/*
+ * Determine whether a given pointer refers to a SMI, Failure, or HeapObject.
+ */
+#define V8_IS_SMI(ptr) (((ptr) & V8_SmiTagMask) == V8_SmiTag)
+#define V8_IS_FAILURE(ptr) (((ptr) & V8_FailureTagMask) == V8_FailureTag)
+#define V8_IS_HEAPOBJECT(ptr) \
+ (((ptr) & V8_HeapObjectTagMask) == V8_HeapObjectTag)
+
+/*
+ * Extract the value of a SMI "pointer". Recall that small integers are stored
+ * using the upper 31 bits.
+ */
+#define V8_SMI_VALUE(smi) ((smi) >> (V8_SmiValueShift + V8_SmiShiftSize))
+
+/*
+ * Determine the encoding and representation of a V8 string.
+ */
+#define V8_TYPE_STRING(type) (((type) & V8_IsNotStringMask) == V8_StringTag)
+
+#define V8_STRENC_ASCII(type) \
+ (((type) & V8_StringEncodingMask) == V8_AsciiStringTag)
+
+#define V8_STRREP_SEQ(type) \
+ (((type) & V8_StringRepresentationMask) == V8_SeqStringTag)
+#define V8_STRREP_CONS(type) \
+ (((type) & V8_StringRepresentationMask) == V8_ConsStringTag)
+#define V8_STRREP_SLICED(type) \
+ (((type) & V8_StringRepresentationMask) == V8_SlicedStringTag)
+#define V8_STRREP_EXT(type) \
+ (((type) & V8_StringRepresentationMask) == V8_ExternalStringTag)
+
+/*
+ * Several of the following constants and transformations are hardcoded in V8 as
+ * well, so there's no way to extract them programmatically from the binary.
+ */
+#define V8_DESC_KEYIDX(x) ((x) + V8_PROP_IDX_FIRST)
+#define V8_DESC_VALIDX(x) ((x) << 1)
+#define V8_DESC_DETIDX(x) (((x) << 1) + 1)
+
+#define V8_DESC_ISFIELD(x) \
+ ((V8_SMI_VALUE(x) & V8_PROP_TYPE_MASK) == V8_PROP_TYPE_FIELD)
+
+#endif /* _V8DBG_H */
diff --git a/usr/src/cmd/mdb/i86pc/modules/unix/unix.c b/usr/src/cmd/mdb/i86pc/modules/unix/unix.c
index d774cde91f..b63876f1f8 100644
--- a/usr/src/cmd/mdb/i86pc/modules/unix/unix.c
+++ b/usr/src/cmd/mdb/i86pc/modules/unix/unix.c
@@ -22,6 +22,10 @@
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ */
+
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_ctf.h>
#include <sys/cpuvar.h>
@@ -36,6 +40,8 @@
#include <sys/mutex_impl.h>
#include "i86mmu.h"
#include <sys/apix.h>
+#include <sys/x86_archext.h>
+#include <sys/bitmap.h>
#define TT_HDLR_WIDTH 17
@@ -745,6 +751,137 @@ ptable_help(void)
"-m Interpret the PFN as an MFN (machine frame number)\n");
}
+/*
+ * NSEC_SHIFT is replicated here (it is not defined in a header file),
+ * but for amusement, the reader is directed to the comment that explains
+ * the rationale for this particular value on x86. Spoiler: the value is
+ * selected to accommodate 60 MHz Pentiums! (And a confession: if the voice
+ * in that comment sounds too familiar, it's because your author also wrote
+ * that code -- some fifteen years prior to this writing in 2011...)
+ */
+#define NSEC_SHIFT 5
+
+/*ARGSUSED*/
+static int
+scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint32_t nsec_scale;
+ hrtime_t tsc = addr, hrt;
+ unsigned int *tscp = (unsigned int *)&tsc;
+ uintptr_t scalehrtimef;
+ uint64_t scale;
+ GElf_Sym sym;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ if (argc != 1)
+ return (DCMD_USAGE);
+
+ switch (argv[0].a_type) {
+ case MDB_TYPE_STRING:
+ tsc = mdb_strtoull(argv[0].a_un.a_str);
+ break;
+ case MDB_TYPE_IMMEDIATE:
+ tsc = argv[0].a_un.a_val;
+ break;
+ default:
+ return (DCMD_USAGE);
+ }
+ }
+
+ if (mdb_readsym(&scalehrtimef,
+ sizeof (scalehrtimef), "scalehrtimef") == -1) {
+ mdb_warn("couldn't read 'scalehrtimef'");
+ return (DCMD_ERR);
+ }
+
+ if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
+ mdb_warn("couldn't find 'tsc_scalehrtime'");
+ return (DCMD_ERR);
+ }
+
+ if (sym.st_value != scalehrtimef) {
+ mdb_warn("::scalehrtime requires that scalehrtimef "
+ "be set to tsc_scalehrtime\n");
+ return (DCMD_ERR);
+ }
+
+ if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
+ mdb_warn("couldn't read 'nsec_scale'");
+ return (DCMD_ERR);
+ }
+
+ scale = (uint64_t)nsec_scale;
+
+ hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
+ hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
+
+ mdb_printf("0x%llx\n", hrt);
+
+ return (DCMD_OK);
+}
+
+/*
+ * The x86 feature set is implemented as a bitmap array. That bitmap array is
+ * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
+ * macro. We have the names for each of these features in unix's text segment
+ * so we do not have to duplicate them and instead just look them up.
+ */
+/*ARGSUSED*/
+static int
+x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
+ const mdb_arg_t *argv)
+{
+ uchar_t *fset;
+ GElf_Sym sym;
+ uintptr_t nptr;
+ char name[128];
+ int ii;
+
+ size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
+
+ if (argc != 0)
+ return (DCMD_USAGE);
+
+ if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
+ mdb_warn("couldn't find x86_feature_names");
+ return (DCMD_ERR);
+ }
+
+ fset = mdb_zalloc(sz, UM_NOSLEEP);
+ if (fset == NULL) {
+ mdb_warn("failed to allocate memory for x86_featureset");
+ return (DCMD_ERR);
+ }
+
+ if (mdb_readvar(fset, "x86_featureset") != sz) {
+ mdb_warn("failed to read x86_featureset");
+ mdb_free(fset, sz);
+ return (DCMD_ERR);
+ }
+
+ for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
+ if (!BT_TEST((ulong_t *)fset, ii))
+ continue;
+
+ if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
+ sizeof (void *) * ii) != sizeof (char *)) {
+ mdb_warn("failed to read feature array %d", ii);
+ mdb_free(fset, sz);
+ return (DCMD_ERR);
+ }
+
+ if (mdb_readstr(name, sizeof (name), nptr) == -1) {
+ mdb_warn("failed to read feature %d", ii);
+ mdb_free(fset, sz);
+ return (DCMD_ERR);
+ }
+ mdb_printf("%s\n", name);
+ }
+
+ mdb_free(fset, sz);
+ return (DCMD_OK);
+}
+
static const mdb_dcmd_t dcmds[] = {
{ "gate_desc", ":", "dump a gate descriptor", gate_desc },
{ "idt", ":[-v]", "dump an IDT", idt },
@@ -765,6 +902,10 @@ static const mdb_dcmd_t dcmds[] = {
{ "mfntopfn", ":", "convert hypervisor machine page to physical page",
mfntopfn_dcmd },
{ "memseg_list", ":", "show memseg list", memseg_list },
+ { "scalehrtime", ":",
+ "scale an unscaled high-res time", scalehrtime_cmd },
+ { "x86_featureset", NULL, "dump the x86_featureset vector",
+ x86_featureset_cmd },
{ NULL }
};
diff --git a/usr/src/cmd/mdb/intel/amd64/libctf/Makefile b/usr/src/cmd/mdb/intel/amd64/libctf/Makefile
new file mode 100644
index 0000000000..a65e7bd2e8
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/libctf/Makefile
@@ -0,0 +1,26 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+#
+
+MODULE = libctf.so
+MDBTGT = proc
+MODSRCS = mdb_modctf.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
+
+MODSRCS_DIR = ../../../common/modules/ctf
+CPPFLAGS += -I$(SRC)/common/ctf
diff --git a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
index 704ff65873..ae22217a1b 100644
--- a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
+++ b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/intel/amd64/v8/Makefile b/usr/src/cmd/mdb/intel/amd64/v8/Makefile
new file mode 100644
index 0000000000..cf74ba1d1c
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/v8/Makefile
@@ -0,0 +1,45 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+MODULE = v8.so
+MDBTGT = proc
+
+MODSRCS_DIR = ../../../common/modules/v8
+
+MODSRCS = mdb_v8.c mdb_v8_cfg.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
+
+dmod/$(MODULE) := LDLIBS += -lproc -lavl
+
+%.o: $(MODSRCS_DIR)/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
+%.ln: $(MODSRCS_DIR)/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/intel/ia32/libctf/Makefile b/usr/src/cmd/mdb/intel/ia32/libctf/Makefile
new file mode 100644
index 0000000000..16923da4fe
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/libctf/Makefile
@@ -0,0 +1,25 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+#
+
+MODULE = libctf.so
+MDBTGT = proc
+MODSRCS = mdb_modctf.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
+
+MODSRCS_DIR = ../../../common/modules/ctf
+CPPFLAGS += -I$(SRC)/common/ctf
diff --git a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
index a1ab338f40..bde1be90ac 100644
--- a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
+++ b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/intel/ia32/v8/Makefile b/usr/src/cmd/mdb/intel/ia32/v8/Makefile
new file mode 100644
index 0000000000..c84532289e
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/v8/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+MODULE = v8.so
+MDBTGT = proc
+
+MODSRCS_DIR = ../../../common/modules/v8
+
+MODSRCS = mdb_v8.c mdb_v8_cfg.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
+
+dmod/$(MODULE) := LDLIBS += -lproc -lavl
+
+%.o: $(MODSRCS_DIR)/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
+%.ln: $(MODSRCS_DIR)/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/sparc/v7/libctf/Makefile b/usr/src/cmd/mdb/sparc/v7/libctf/Makefile
new file mode 100644
index 0000000000..477cf40df5
--- /dev/null
+++ b/usr/src/cmd/mdb/sparc/v7/libctf/Makefile
@@ -0,0 +1,25 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+#
+
+MODULE = libctf.so
+MDBTGT = proc
+MODSRCS = mdb_modctf.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.sparcv7
+include ../../../Makefile.module
+
+MODSRCS_DIR = ../../../common/modules/ctf
+CPPFLAGS += -I$(SRC)/common/ctf
diff --git a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
index 906d05d5ea..0488e6739a 100644
--- a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
+++ b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/sparc/v9/libctf/Makefile b/usr/src/cmd/mdb/sparc/v9/libctf/Makefile
new file mode 100644
index 0000000000..a55126e198
--- /dev/null
+++ b/usr/src/cmd/mdb/sparc/v9/libctf/Makefile
@@ -0,0 +1,26 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+#
+
+MODULE = libctf.so
+MDBTGT = proc
+MODSRCS = mdb_modctf.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.sparcv9
+include ../../../Makefile.module
+
+MODSRCS_DIR = ../../../common/modules/ctf
+CPPFLAGS += -I$(SRC)/common/ctf
diff --git a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
index 09ea0473c6..87ce977423 100644
--- a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
+++ b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/test/mtest.sh b/usr/src/cmd/mdb/test/mtest.sh
index f21d0faa21..f21d0faa21 100644..100755
--- a/usr/src/cmd/mdb/test/mtest.sh
+++ b/usr/src/cmd/mdb/test/mtest.sh
diff --git a/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out b/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out
diff --git a/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out b/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out
diff --git a/usr/src/cmd/nicstat/Makefile b/usr/src/cmd/nicstat/Makefile
new file mode 100644
index 0000000000..935011119a
--- /dev/null
+++ b/usr/src/cmd/nicstat/Makefile
@@ -0,0 +1,31 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013 Joyent, Inc. All rights reserved.
+#
+
+PROG = nicstat
+
+include ../Makefile.cmd
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/nicstat/nicstat.pl b/usr/src/cmd/nicstat/nicstat.pl
new file mode 100644
index 0000000000..29e62f1ffd
--- /dev/null
+++ b/usr/src/cmd/nicstat/nicstat.pl
@@ -0,0 +1,422 @@
+#!/usr/perl5/bin/perl -w
+#
+# nicstat - print network traffic, Kbyte/s read and written.
+# Solaris 8+, Perl (Sun::Solaris::Kstat).
+#
+# "netstat -i" only gives a packet count, this program gives Kbytes.
+#
+# 04-Apr-2011, ver 1.00J
+#
+# USAGE: nicstat [-hsz] [-i int[,int...]] | [interval [count]]
+#
+# -h # help
+# -s # print summary output
+# -z # skip zero lines
+# -i int[,int...] # print these instances only
+# eg,
+# nicstat # print summary since boot
+# nicstat 1 # print continually, every 1 second
+# nicstat 1 5 # print 5 times, every 1 second
+# nicstat -i hme0 # only examine hme0
+#
+# This prints out the KB/s transferred for all the network cards (NICs),
+# including packet counts and average sizes. The first line is the summary
+# data since boot.
+#
+# FIELDS:
+# Int Interface
+# rKB/s read Kbytes/s
+# wKB/s write Kbytes/s
+# rPk/s read Packets/s
+# wPk/s write Packets/s
+# rAvs read Average size, bytes
+# wAvs write Average size, bytes
+# %Util %Utilisation (r or w/ifspeed)
+# Sat Saturation (defer, nocanput, norecvbuf, noxmtbuf)
+#
+# NOTES:
+#
+# - Some unusual network cards may not provide all the details to Kstat,
+# (or provide different symbols). Check for newer versions of this program,
+# and the @Network array in the code below.
+# - Utilisation is based on bytes transferred divided by speed of the interface
+# (if the speed is known). It should be impossible to reach 100% as there
+# are overheads due to bus negotiation and timing.
+# - Loopback interfaces may only provide packet counts (if anything), and so
+# bytes and %util will always be zero. Newer versions of Solaris (newer than
+# Solaris 10 6/06) may provide loopback byte stats.
+# - Saturation is determined by counting read and write errors caused by the
+# interface running at saturation. This approach is not ideal, and the value
+# reported is often lower than it should be (eg, 0.0). Reading the rKB/s and
+# wKB/s fields may be more useful.
+#
+# SEE ALSO:
+# nicstat.c # the C version, also on my website
+# kstat -n hme0 [interval [count]] # or qfe0, ...
+# netstat -iI hme0 [interval [count]]
+# se netstat.se [interval] # SE Toolkit
+# se nx.se [interval] # SE Toolkit
+#
+# COPYRIGHT: Copyright (c) 2013 Brendan Gregg.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Author: Brendan Gregg [Sydney, Australia]
+#
+# 18-Jul-2004 Brendan Gregg Created this.
+# 07-Jan-2005 " " added saturation value.
+# 07-Jan-2005 " " added summary style (from Peter Tribble).
+# 23-Jan-2006 " " Tweaked style.
+# 11-Aug-2006 " " Improved output neatness.
+# 30-Sep-2006 " " Added loopback, tweaked output.
+# 04-Apr-2011 brendan@joyent.com Updated for smartmachines.
+
+use strict;
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+
+#
+# Process command line args
+#
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hi:sz') or usage();
+usage() if defined $main::opt_h;
+my $STYLE = defined $main::opt_s ? $main::opt_s : 0;
+my $SKIPZERO = defined $main::opt_z ? $main::opt_z : 0;
+
+# process [interval [count]],
+my ($interval, $loop_max);
+if (defined $ARGV[0]) {
+ $interval = $ARGV[0];
+ $loop_max = defined $ARGV[1] ? $ARGV[1] : 2**32;
+ usage() if $interval == 0;
+}
+else {
+ $interval = 1;
+ $loop_max = 1;
+}
+
+# check for -i,
+my %NetworkOnly; # network interfaces to print
+my $NETWORKONLY = 0; # match on network interfaces
+if (defined $main::opt_i) {
+ foreach my $net (split /,/, $main::opt_i) {
+ $NetworkOnly{$net} = 1;
+ }
+ $NETWORKONLY = 1;
+}
+
+# globals,
+my $loop = 0; # current loop number
+my $PAGESIZE = 20; # max lines per header
+my $line = $PAGESIZE; # counter for lines printed
+my %NetworkNames; # Kstat network interfaces
+my %NetworkData; # network interface data
+my %NetworkDataOld; # network interface data
+$main::opt_h = 0;
+$| = 1; # autoflush
+
+# kstat "link" module includes:
+my @Network = qw(dmfe bge be bnx ce eri external ge hme igb ige internal ixgbe le ppp qfe rtls);
+my %Network;
+$Network{$_} = 1 foreach (@Network);
+my $ZONENAME = `/usr/bin/zonename`;
+chomp $ZONENAME;
+
+### Determine network interfaces
+unless (find_nets()) {
+ if ($NETWORKONLY) {
+ print STDERR "ERROR1: $main::opt_i matched no network interfaces.\n";
+ }
+ else {
+ print STDERR "ERROR1: No network interfaces found!\n";
+ }
+ exit 1;
+}
+
+
+#
+# Main
+#
+while (1) {
+
+ ### Print Header
+ if ($line >= $PAGESIZE) {
+ if ($STYLE == 0) {
+ printf "%8s %12s %7s %7s %7s %7s %7s %7s %6s %5s\n",
+ "Time", "Int", "rKB/s", "wKB/s", "rPk/s", "wPk/s", "rAvs",
+ "wAvs", "%Util", "Sat";
+ }
+ elsif ($STYLE == 1) {
+ printf "%8s %12s %14s %14s\n", "Time", "Int", "rKB/s", "wKB/s";
+ }
+
+ $line = 0;
+ }
+
+ ### Get new data
+ my (@NetworkData) = fetch_net_data();
+
+ foreach my $network_data (@NetworkData) {
+
+ ### Extract values
+ my ($int, $rbytes, $wbytes, $rpackets, $wpackets, $speed, $sat, $time)
+ = split /:/, $network_data;
+
+ ### Retrieve old values
+ my ($old_rbytes, $old_wbytes, $old_rpackets, $old_wpackets, $old_sat,
+ $old_time);
+ if (defined $NetworkDataOld{$int}) {
+ ($old_rbytes, $old_wbytes, $old_rpackets, $old_wpackets,
+ $old_sat, $old_time) = split /:/, $NetworkDataOld{$int};
+ }
+ else {
+ $old_rbytes = $old_wbytes = $old_rpackets = $old_wpackets
+ = $old_sat = $old_time = 0;
+ }
+
+ #
+ # Calculate statistics
+ #
+
+ # delta time
+ my $tdiff = $time - $old_time;
+
+ # per second values
+ my $rbps = ($rbytes - $old_rbytes) / $tdiff;
+ my $wbps = ($wbytes - $old_wbytes) / $tdiff;
+ my $rkps = $rbps / 1024;
+ my $wkps = $wbps / 1024;
+ my $rpps = ($rpackets - $old_rpackets) / $tdiff;
+ my $wpps = ($wpackets - $old_wpackets) / $tdiff;
+ my $ravs = $rpps > 0 ? $rbps / $rpps : 0;
+ my $wavs = $wpps > 0 ? $wbps / $wpps : 0;
+
+ # skip zero lines if asked
+ next if $SKIPZERO and ($rbps + $wbps) == 0;
+
+ # % utilisation
+ my $util;
+ if ($speed > 0) {
+ # the following has a mysterious "800", it is 100
+ # for the % conversion, and 8 for bytes2bits.
+ my $rutil = $rbps * 800 / $speed;
+ my $wutil = $wbps * 800 / $speed;
+ $util = $rutil > $wutil ? $rutil : $wutil;
+ $util = 100 if $util > 100;
+ }
+ else {
+ $util = 0;
+ }
+
+ # saturation per sec
+ my $sats = ($sat - $old_sat) / $tdiff;
+
+ #
+ # Print statistics
+ #
+ if ($rbps ne "") {
+ my @Time = localtime();
+
+ if ($STYLE == 0) {
+ printf "%02d:%02d:%02d %12s ",
+ $Time[2], $Time[1], $Time[0], $int;
+ print_neat($rkps);
+ print_neat($wkps);
+ print_neat($rpps);
+ print_neat($wpps);
+ print_neat($ravs);
+ print_neat($wavs);
+ printf "%6.2f %5.2f\n", $util, $sats;
+ }
+ elsif ($STYLE == 1) {
+ printf "%02d:%02d:%02d %12s %14.3f %14.3f\n",
+ $Time[2], $Time[1], $Time[0], $int, $rkps, $wkps;
+ }
+
+ $line++;
+
+ # for multiple interfaces, always print the header
+ $line += $PAGESIZE if @NetworkData > 1;
+ }
+
+ ### Store old values
+ $NetworkDataOld{$int}
+ = "$rbytes:$wbytes:$rpackets:$wpackets:$sat:$time";
+ }
+
+ ### Check for end
+ last if ++$loop == $loop_max;
+
+ ### Interval
+ sleep $interval;
+}
+
+
+# find_nets - walk Kstat to discover network interfaces.
+#
+# This walks %Kstat and populates a %NetworkNames with discovered
+# network interfaces.
+#
+sub find_nets {
+ my $found = 0;
+
+ ### Loop over all Kstat modules
+ foreach my $module (keys %$Kstat) {
+ my $Modules = $Kstat->{$module};
+
+ foreach my $instance (keys %$Modules) {
+ my $Instances = $Modules->{$instance};
+
+ foreach my $name (keys %$Instances) {
+
+ ### Skip interface if asked
+ if ($NETWORKONLY) {
+ next unless $NetworkOnly{$name};
+ }
+
+ my $Names = $Instances->{$name};
+
+ # Check this is a network device.
+ # Matching on ifspeed has been more reliable than "class"
+ # we also match loopback and "link" interfaces.
+ if (defined $$Names{ifspeed} || $module eq "lo"
+ || $module eq "link") {
+ next if $name eq "mac";
+ if ($module eq "link") {
+ my $nname = $name;
+ $nname =~ s/\d+$//;
+ next unless defined $Network{$nname}
+ or $ZONENAME eq $nname
+ or $ZONENAME eq "global";
+ }
+ ### Save network interface
+ $NetworkNames{$name} = $Names;
+ $found++;
+ }
+ }
+ }
+ }
+
+ return $found;
+}
+
+# fetch - fetch Kstat data for the network interfaces.
+#
+# This uses the interfaces in %NetworkNames and returns useful Kstat data.
+# The Kstat values used are rbytes64, obytes64, ipackets64, opackets64
+# (or the 32 bit versions if the 64 bit values are not there).
+#
+sub fetch_net_data {
+ my ($rbytes, $wbytes, $rpackets, $wpackets, $speed, $time);
+ my @NetworkData = ();
+
+ $Kstat->update();
+
+ ### Loop over previously found network interfaces
+ foreach my $name (sort keys %NetworkNames) {
+ my $Names = $NetworkNames{$name};
+
+ if (defined $$Names{opackets}) {
+
+ ### Fetch write bytes
+ if (defined $$Names{obytes64}) {
+ $rbytes = $$Names{rbytes64};
+ $wbytes = $$Names{obytes64};
+ }
+ elsif (defined $$Names{obytes}) {
+ $rbytes = $$Names{rbytes};
+ $wbytes = $$Names{obytes};
+ } else {
+ $rbytes = $wbytes = 0;
+ }
+
+ ### Fetch read bytes
+ if (defined $$Names{opackets64}) {
+ $rpackets = $$Names{ipackets64};
+ $wpackets = $$Names{opackets64};
+ }
+ else {
+ $rpackets = $$Names{ipackets};
+ $wpackets = $$Names{opackets};
+ }
+
+ ### Fetch interface speed
+ if (defined $$Names{ifspeed}) {
+ $speed = $$Names{ifspeed};
+ }
+ else {
+ # if we can't fetch the speed, print the
+ # %Util as 0.0 . To do this we,
+ $speed = 2 ** 48;
+ }
+
+ ### Determine saturation value
+ my $sat = 0;
+ if (defined $$Names{nocanput} or defined $$Names{norcvbuf}) {
+ $sat += defined $$Names{defer} ? $$Names{defer} : 0;
+ $sat += defined $$Names{nocanput} ? $$Names{nocanput} : 0;
+ $sat += defined $$Names{norcvbuf} ? $$Names{norcvbuf} : 0;
+ $sat += defined $$Names{noxmtbuf} ? $$Names{noxmtbuf} : 0;
+ }
+
+ ### use the last snaptime value,
+ $time = $$Names{snaptime};
+
+ ### store data
+ push @NetworkData, "$name:$rbytes:$wbytes:" .
+ "$rpackets:$wpackets:$speed:$sat:$time";
+ }
+ }
+
+ return @NetworkData;
+}
+
+# print_neat - print a float with decimal places if appropriate.
+#
+# This specifically keeps the width to 7 characters, if possible, plus
+# a trailing space.
+#
+sub print_neat {
+ my $num = shift;
+ if ($num >= 100000) {
+ printf "%7d ", $num;
+ } elsif ($num >= 100) {
+ printf "%7.1f ", $num;
+ } else {
+ printf "%7.2f ", $num;
+ }
+}
+
+# usage - print usage and exit.
+#
+sub usage {
+ print STDERR <<END;
+USAGE: nicstat [-hsz] [-i int[,int...]] | [interval [count]]
+ eg, nicstat # print summary since boot
+ nicstat 1 # print continually every 1 second
+ nicstat 1 5 # print 5 times, every 1 second
+ nicstat -s # summary output
+ nicstat -i hme0 # print hme0 only
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/nscd/Makefile b/usr/src/cmd/nscd/Makefile
index aa478fa3b0..1e51819bb2 100644
--- a/usr/src/cmd/nscd/Makefile
+++ b/usr/src/cmd/nscd/Makefile
@@ -29,6 +29,7 @@ MANIFEST= name-service-cache.xml
SVCMETHOD= svc-nscd
include ../Makefile.cmd
+include ../Makefile.ctf
ROOTMANIFESTDIR= $(ROOTSVCSYSTEM)
diff --git a/usr/src/cmd/nscd/svc-nscd b/usr/src/cmd/nscd/svc-nscd
index 0c6aa1bc4b..78b318bf87 100644
--- a/usr/src/cmd/nscd/svc-nscd
+++ b/usr/src/cmd/nscd/svc-nscd
@@ -23,8 +23,8 @@
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
@@ -51,7 +51,7 @@ if (smf_is_system_labeled); then
$SMF_FMRI`
fi
if [ "$duration" != "transient" ]; then
- ( while true ; do sleep 3600 ; done ) &
+ exit $SMF_EXIT_NODAEMON
fi
# The real daemon is not started in non-global zones,
diff --git a/usr/src/cmd/passwd/Makefile b/usr/src/cmd/passwd/Makefile
index 561357a16c..079e8b6050 100644
--- a/usr/src/cmd/passwd/Makefile
+++ b/usr/src/cmd/passwd/Makefile
@@ -33,6 +33,8 @@ lint := LDLIBS += -lpasswdutil
LDFLAGS += $(ZIGNORE)
LDLIBS += -lbsm -lpam -lnsl
+CPPFLAGS += -D__EXTENSIONS__
+
FILEMODE = 06555
XGETFLAGS += -a -x $(PROG).xcl
diff --git a/usr/src/cmd/pgrep/pgrep.c b/usr/src/cmd/pgrep/pgrep.c
index 4531f11267..857f6ef818 100644
--- a/usr/src/cmd/pgrep/pgrep.c
+++ b/usr/src/cmd/pgrep/pgrep.c
@@ -597,6 +597,8 @@ main(int argc, char *argv[])
const char *optstr;
optdesc_t *optd;
int nmatches, c;
+ const char *zroot;
+ char buf[PATH_MAX];
DIR *dirp;
@@ -626,6 +628,12 @@ main(int argc, char *argv[])
opterr = 0;
+ zroot = zone_get_nroot();
+ if (zroot != NULL) {
+ (void) snprintf(buf, sizeof (buf), "%s/%s", zroot, g_procdir);
+ g_procdir = buf;
+ }
+
while (optind < argc) {
while ((c = getopt(argc, argv, optstr)) != (int)EOF) {
diff --git a/usr/src/cmd/prstat/prstat.c b/usr/src/cmd/prstat/prstat.c
index fc16e435f6..982c860d0d 100644
--- a/usr/src/cmd/prstat/prstat.c
+++ b/usr/src/cmd/prstat/prstat.c
@@ -26,6 +26,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -181,6 +182,33 @@ static optdesc_t opts = {
-1 /* sort in decreasing order */
};
+
+static int
+proc_snprintf(char *_RESTRICT_KYWD s, size_t n,
+ const char *_RESTRICT_KYWD fmt, ...)
+{
+ static boolean_t ptools_zroot_valid = B_FALSE;
+ static const char *ptools_zroot = NULL;
+ va_list args;
+ int ret, nret = 0;
+
+ if (ptools_zroot_valid == B_FALSE) {
+ ptools_zroot_valid = B_TRUE;
+ ptools_zroot = zone_get_nroot();
+ }
+
+ if (ptools_zroot != NULL) {
+ nret = snprintf(s, n, "%s", ptools_zroot);
+ if (nret > n)
+ return (nret);
+ }
+ va_start(args, fmt);
+ ret = vsnprintf(s + nret, n - nret, fmt, args);
+ va_end(args);
+
+ return (ret + nret);
+}
+
/*
* Print timestamp as decimal reprentation of time_t value (-d u was specified)
* or the standard date format (-d d was specified).
@@ -236,7 +264,12 @@ list_getsize(list_t *list)
size_t i;
uint_t flags = 0;
int ret;
- size_t physmem = sysconf(_SC_PHYS_PAGES) * pagesize;
+ size_t physmem;
+
+ if (!(opts.o_outpmode & OPT_VMUSAGE))
+ return;
+
+ physmem = sysconf(_SC_PHYS_PAGES) * pagesize;
/*
* Determine what swap/rss results to calculate. getvmusage() will
@@ -848,9 +881,9 @@ lwp_update(lwp_info_t *lwp, pid_t pid, id_t lwpid, struct prusage *usage)
static int
read_procfile(fd_t **fd, char *pidstr, char *file, void *buf, size_t bufsize)
{
- char procfile[MAX_PROCFS_PATH];
+ char procfile[PATH_MAX];
- (void) snprintf(procfile, MAX_PROCFS_PATH,
+ (void) proc_snprintf(procfile, PATH_MAX,
"/proc/%s/%s", pidstr, file);
if ((*fd = fd_open(procfile, O_RDONLY, *fd)) == NULL)
return (1);
@@ -1376,6 +1409,7 @@ main(int argc, char **argv)
int timeout;
struct pollfd pollset;
char key;
+ char procpath[PATH_MAX];
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
@@ -1386,7 +1420,7 @@ main(int argc, char **argv)
pagesize = sysconf(_SC_PAGESIZE);
while ((opt = getopt(argc, argv,
- "vcd:HmarRLtu:U:n:p:C:P:h:s:S:j:k:TJWz:Z")) != (int)EOF) {
+ "vVcd:HmarRLtu:U:n:p:C:P:h:s:S:j:k:TJWz:Z")) != (int)EOF) {
switch (opt) {
case 'r':
opts.o_outpmode |= OPT_NORESOLVE;
@@ -1466,6 +1500,9 @@ main(int argc, char **argv)
while (p = strtok(NULL, ", "))
add_uid(&ruid_tbl, p);
break;
+ case 'V':
+ opts.o_outpmode |= OPT_VMUSAGE;
+ break;
case 'p':
fill_table(&pid_tbl, optarg, 'p');
break;
@@ -1575,7 +1612,8 @@ main(int argc, char **argv)
list_setkeyfunc(NULL, &opts, &lgroups, LT_LGRPS);
if (opts.o_outpmode & OPT_TERMCAP)
curses_on();
- if ((procdir = opendir("/proc")) == NULL)
+ (void) proc_snprintf(procpath, sizeof (procpath), "/proc");
+ if ((procdir = opendir(procpath)) == NULL)
Die(gettext("cannot open /proc directory\n"));
if (opts.o_outpmode & OPT_TTY) {
(void) printf(gettext("Please wait...\r"));
diff --git a/usr/src/cmd/prstat/prstat.h b/usr/src/cmd/prstat/prstat.h
index 293123c5b9..bf38b3e2bd 100644
--- a/usr/src/cmd/prstat/prstat.h
+++ b/usr/src/cmd/prstat/prstat.h
@@ -26,6 +26,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#ifndef _PRSTAT_H
@@ -72,6 +73,7 @@ extern "C" {
#define OPT_ZONES 0x2000 /* report about zones */
#define OPT_PSETS 0x4000 /* report for specified psets */
#define OPT_LGRP 0x8000 /* report home lgroups */
+#define OPT_VMUSAGE 0x10000 /* print accurate, but expensive RSS */
#define OPT_UDATE 0x20000 /* print unix timestamp */
#define OPT_DDATE 0x40000 /* print timestamp in date(1) format */
#define OPT_NORESOLVE 0x80000 /* no nsswitch lookups */
diff --git a/usr/src/cmd/prstat/prutil.c b/usr/src/cmd/prstat/prutil.c
index 0f9cbd6c4d..7def1bd40b 100644
--- a/usr/src/cmd/prstat/prutil.c
+++ b/usr/src/cmd/prstat/prutil.c
@@ -25,6 +25,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -108,7 +109,7 @@ void
Usage()
{
(void) fprintf(stderr, gettext(
- "Usage:\tprstat [-acHJLmrRtTvWZ] [-u euidlist] [-U uidlist]\n"
+ "Usage:\tprstat [-acHJLmrRtTvVWZ] [-u euidlist] [-U uidlist]\n"
"\t[-p pidlist] [-P cpulist] [-C psrsetlist] [-h lgrouplist]\n"
"\t[-j projidlist] [-k taskidlist] [-z zoneidlist]\n"
"\t[-s key | -S key] [-n nprocs[,nusers]] [-d d|u]\n"
diff --git a/usr/src/cmd/prtconf/prtconf.c b/usr/src/cmd/prtconf/prtconf.c
index 551144976e..0982f1d40b 100644
--- a/usr/src/cmd/prtconf/prtconf.c
+++ b/usr/src/cmd/prtconf/prtconf.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -168,7 +169,7 @@ cleanup_path(const char *input_path, char *path)
#ifdef DEBUG
static const char *optstring = "abcdDvVxpPFf:M:dLuC";
#else
-static const char *optstring = "abcdDvVxpPFf:uC";
+static const char *optstring = "abcdDvVxmpPFf:uC";
#endif /* DEBUG */
int
@@ -201,6 +202,9 @@ main(int argc, char *argv[])
case 'v':
++opts.o_verbose;
break;
+ case 'm':
+ ++opts.o_memory;
+ break;
case 'p':
++opts.o_prominfo;
break;
@@ -338,34 +342,42 @@ main(int argc, char *argv[])
return (0);
}
- ret = sysinfo(SI_HW_PROVIDER, hw_provider, sizeof (hw_provider));
- /*
- * If 0 bytes are returned (the system returns '1', for the \0),
- * we're probably on x86, default to Oracle.
- */
- if (ret <= 1) {
- (void) strncpy(hw_provider, "Oracle Corporation",
+ if (!opts.o_memory) {
+ ret = sysinfo(SI_HW_PROVIDER, hw_provider,
sizeof (hw_provider));
+ /*
+ * If 0 bytes are returned (the system returns '1', for the \0),
+ * we're probably on x86, and there has been no si-hw-provider
+ * set in /etc/bootrc, default to Joyent.
+ */
+ if (ret <= 1) {
+ (void) strncpy(hw_provider, "Joyent",
+ sizeof (hw_provider));
+ }
+ (void) printf("System Configuration: %s %s\n", hw_provider,
+ opts.o_uts.machine);
}
- (void) printf("System Configuration: %s %s\n", hw_provider,
- opts.o_uts.machine);
pagesize = sysconf(_SC_PAGESIZE);
npages = sysconf(_SC_PHYS_PAGES);
- (void) printf("Memory size: ");
- if (pagesize == -1 || npages == -1)
- (void) printf("unable to determine\n");
- else {
- const int64_t kbyte = 1024;
+ if (pagesize == -1 || npages == -1) {
+ if (opts.o_memory) {
+ (void) printf("0\n");
+ return (1);
+ } else {
+ (void) printf("Memory size: unable to determine\n");
+ }
+ } else {
const int64_t mbyte = 1024 * 1024;
int64_t ii = (int64_t)pagesize * npages;
- if (ii >= mbyte)
- (void) printf("%ld Megabytes\n",
+ if (opts.o_memory) {
+ (void) printf("%ld\n", (long)((ii+mbyte-1) / mbyte));
+ return (0);
+ } else {
+ (void) printf("Memory size: %ld Megabytes\n",
(long)((ii+mbyte-1) / mbyte));
- else
- (void) printf("%ld Kilobytes\n",
- (long)((ii+kbyte-1) / kbyte));
+ }
}
if (opts.o_prominfo) {
diff --git a/usr/src/cmd/prtconf/prtconf.h b/usr/src/cmd/prtconf/prtconf.h
index 5f9abe968c..0284cc8af1 100644
--- a/usr/src/cmd/prtconf/prtconf.h
+++ b/usr/src/cmd/prtconf/prtconf.h
@@ -54,6 +54,7 @@ struct prt_opts {
int o_drv_name;
int o_pseudodevs;
int o_fbname;
+ int o_memory;
int o_noheader;
int o_prominfo;
int o_productinfo;
diff --git a/usr/src/cmd/ptools/Makefile.bld b/usr/src/cmd/ptools/Makefile.bld
index f5b50f5ea1..e8bb27b043 100644
--- a/usr/src/cmd/ptools/Makefile.bld
+++ b/usr/src/cmd/ptools/Makefile.bld
@@ -26,6 +26,8 @@
PROG:sh = basename `cd ..; pwd`
+include ../../../Makefile.ctf
+
OBJS = $(PROG).o
SRCS = ../$(PROG).c
@@ -69,6 +71,12 @@ CERRWARN_pargs += -_gcc=-Wno-type-limits
CERRWARN += $(CERRWARN_$(PROG))
+#
+# Common code definitions
+#
+COBJS = ptools_common.o
+CINC = -I../../common
+
# pargs depends on ../../common/elfcap components
# pmadvise depends on pmap components
@@ -79,14 +87,32 @@ CPPFLAGS_pargs = -I$(ELFCAP)
OBJS_pargs = elfcap.o
SRCS_pargs = $(ELFCAP)/elfcap.c
-CPPFLAGS_pmap = -I$(PMAP)
-OBJS_pmap = pmap_common.o
+CPPFLAGS_pmap = -I$(PMAP) $(CINC)
+OBJS_pmap = pmap_common.o $(COBJS)
SRCS_pmap = $(PMAP)/pmap_common.c
-CPPFLAGS_pmadvise = -I$(PMAP)
-OBJS_pmadvise = pmap_common.o
+CPPFLAGS_pmadvise = -I$(PMAP) $(CINC)
+OBJS_pmadvise = pmap_common.o $(COBJS)
SRCS_pmadvise = $(PMAP)/pmap_common.c
+CPPFLAGS_preap = $(CINC)
+OBJS_preap = $(COBJS)
+
+CPPFLAGS_psig = $(CINC)
+OBJS_psig = $(COBJS)
+
+CPPFLAGS_ptime = $(CINC)
+OBJS_ptime = $(COBJS)
+
+CPPFLAGS_ptree = $(CINC)
+OBJS_ptree = $(COBJS)
+
+CPPFLAGS_pwait = $(CINC)
+OBJS_pwait = $(COBJS)
+
+CPPFLAGS_pwdx = $(CINC)
+OBJS_pwdx = $(COBJS)
+
CPPFLAGS += $(CPPFLAGS_$(PROG))
OBJS += $(OBJS_$(PROG))
SRCS += $(SRCS_$(PROG))
@@ -99,12 +125,19 @@ INSTALL_LEGACY=$(RM) $(ROOTPROCBINSYMLINK) ; \
elfcap.o: $(ELFCAP)/elfcap.c
$(COMPILE.c) -o $@ $(ELFCAP)/elfcap.c
+ $(POST_PROCESS_O)
pmap_common.o: $(PMAP)/pmap_common.c
$(COMPILE.c) -o $@ $(PMAP)/pmap_common.c
+ $(POST_PROCESS_O)
%.o: ../%.c
$(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+%.o: ../../common/%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
all: $(PROG)
diff --git a/usr/src/cmd/ptools/common/ptools_common.c b/usr/src/cmd/ptools/common/ptools_common.c
new file mode 100644
index 0000000000..a747ab213e
--- /dev/null
+++ b/usr/src/cmd/ptools/common/ptools_common.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/feature_tests.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <zone.h>
+
+/*
+ * Common routines for ptools.
+ */
+
+int
+proc_snprintf(char *_RESTRICT_KYWD s, size_t n,
+ const char *_RESTRICT_KYWD fmt, ...)
+{
+ static boolean_t ptools_zroot_valid = B_FALSE;
+ static const char *ptools_zroot = NULL;
+ va_list args;
+ int ret, nret = 0;
+
+ if (ptools_zroot_valid == B_FALSE) {
+ ptools_zroot_valid = B_TRUE;
+ ptools_zroot = zone_get_nroot();
+ }
+
+ if (ptools_zroot != NULL) {
+ nret = snprintf(s, n, "%s", ptools_zroot);
+ if (nret > n)
+ return (nret);
+ }
+ va_start(args, fmt);
+ ret = vsnprintf(s + nret, n - nret, fmt, args);
+ va_end(args);
+
+ return (ret + nret);
+}
diff --git a/usr/src/cmd/ptools/common/ptools_common.h b/usr/src/cmd/ptools/common/ptools_common.h
new file mode 100644
index 0000000000..52bcae9e51
--- /dev/null
+++ b/usr/src/cmd/ptools/common/ptools_common.h
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _PTOOLS_COMMON_H
+#define _PTOOLS_COMMON_H
+
+#include <sys/feature_tests.h>
+
+/*
+ * Common functions for the ptools.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int proc_snprintf(char *_RESTRICT_KYWD, size_t,
+ const char *_RESTRICT_KYWD, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PTOOLS_COMMON_H */
diff --git a/usr/src/cmd/ptools/pargs/pargs.c b/usr/src/cmd/ptools/pargs/pargs.c
index 8c3cbba962..2e00c92f67 100644
--- a/usr/src/cmd/ptools/pargs/pargs.c
+++ b/usr/src/cmd/ptools/pargs/pargs.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -791,6 +791,7 @@ static struct aux_id aux_arr[] = {
{ AT_BASE, "AT_BASE", at_null },
{ AT_FLAGS, "AT_FLAGS", at_null },
{ AT_ENTRY, "AT_ENTRY", at_null },
+ { AT_RANDOM, "AT_RANDOM", at_null },
{ AT_SUN_UID, "AT_SUN_UID", at_uid },
{ AT_SUN_RUID, "AT_SUN_RUID", at_uid },
{ AT_SUN_GID, "AT_SUN_GID", at_gid },
@@ -810,9 +811,11 @@ static struct aux_id aux_arr[] = {
{ AT_SUN_AUXFLAGS, "AT_SUN_AUXFLAGS", at_flags },
{ AT_SUN_EMULATOR, "AT_SUN_EMULATOR", at_str },
{ AT_SUN_BRANDNAME, "AT_SUN_BRANDNAME", at_str },
+ { AT_SUN_BRAND_NROOT, "AT_SUN_BRAND_NROOT", at_str },
{ AT_SUN_BRAND_AUX1, "AT_SUN_BRAND_AUX1", at_null },
{ AT_SUN_BRAND_AUX2, "AT_SUN_BRAND_AUX2", at_null },
- { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null }
+ { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null },
+ { AT_SUN_BRAND_AUX4, "AT_SUN_BRAND_AUX4", at_null }
};
#define N_AT_ENTS (sizeof (aux_arr) / sizeof (struct aux_id))
diff --git a/usr/src/cmd/ptools/pfiles/pfiles.c b/usr/src/cmd/ptools/pfiles/pfiles.c
index fbe54fc0dd..8e09ef534b 100644
--- a/usr/src/cmd/ptools/pfiles/pfiles.c
+++ b/usr/src/cmd/ptools/pfiles/pfiles.c
@@ -24,7 +24,7 @@
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
*/
/*
- * Copyright (c) 2013 Joyent, Inc. All Rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All Rights reserved.
*/
#include <stdio.h>
@@ -271,15 +271,14 @@ show_file(void *data, prfdinfo_t *info)
(mode & S_IFMT) == S_IFDOOR);
if (Pstate(Pr) != PS_DEAD) {
- char *dev;
+ char *dev = NULL;
if ((mode & S_IFMT) == S_IFSOCK)
dosocket(Pr, info->pr_fd);
else if ((mode & S_IFMT) == S_IFIFO)
dofifo(Pr, info->pr_fd);
- if ((mode & S_IFMT) == S_IFCHR &&
- (dev = strrchr(info->pr_path, ':')) != NULL) {
+ if ((mode & S_IFMT) == S_IFCHR) {
/*
* There's no elegant way to determine
* if a character device supports TLI,
@@ -291,11 +290,20 @@ show_file(void *data, prfdinfo_t *info)
"tcp", "tcp6", "udp", "udp6", NULL
};
- dev++; /* skip past the `:' */
- for (i = 0; tlidevs[i] != NULL; i++) {
- if (strcmp(dev, tlidevs[i]) == 0) {
- dotli(Pr, info->pr_fd);
- break;
+ /* global zone: /devices paths */
+ dev = strrchr(info->pr_path, ':');
+ /* also check the /dev path for zones */
+ if (dev == NULL)
+ dev = strrchr(info->pr_path, '/');
+ if (dev != NULL) {
+ dev++; /* skip past the `:' */
+
+ for (i = 0; tlidevs[i] != NULL; i++) {
+ if (strcmp(dev, tlidevs[i]) ==
+ 0) {
+ dotli(Pr, info->pr_fd);
+ break;
+ }
}
}
}
@@ -549,6 +557,7 @@ show_sockaddr(const char *str, struct sockaddr *sa, socklen_t len)
case AF_IPX: p = "AF_IPX"; break;
case AF_ROUTE: p = "AF_ROUTE"; break;
case AF_LINK: p = "AF_LINK"; break;
+ case AF_LX_NETLINK: p = "AF_LX_NETLINK"; break;
}
(void) printf("\t%s: %s\n", str, p);
@@ -796,9 +805,11 @@ dotli(struct ps_prochandle *Pr, int fd)
strcmd.sc_cmd = TI_GETMYNAME;
if (pr_ioctl(Pr, fd, _I_CMD, &strcmd, sizeof (strcmd)) == 0)
- show_sockaddr("sockname", (void *)&strcmd.sc_buf, 0);
+ show_sockaddr("sockname", (void *)&strcmd.sc_buf,
+ (size_t)strcmd.sc_len);
strcmd.sc_cmd = TI_GETPEERNAME;
if (pr_ioctl(Pr, fd, _I_CMD, &strcmd, sizeof (strcmd)) == 0)
- show_sockaddr("peername", (void *)&strcmd.sc_buf, 0);
+ show_sockaddr("peername", (void *)&strcmd.sc_buf,
+ (size_t)strcmd.sc_len);
}
diff --git a/usr/src/cmd/ptools/pmap/pmap.c b/usr/src/cmd/ptools/pmap/pmap.c
index 78bfa6b596..03f8bde791 100644
--- a/usr/src/cmd/ptools/pmap/pmap.c
+++ b/usr/src/cmd/ptools/pmap/pmap.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -42,6 +43,7 @@
#include <sys/mman.h>
#include <sys/lgrp_user.h>
#include <libproc.h>
+#include "ptools_common.h"
#include "pmap_common.h"
@@ -199,7 +201,7 @@ main(int argc, char **argv)
const char *bar;
struct rlimit rlim;
struct stat64 statbuf;
- char buf[128];
+ char buf[PATH_MAX];
int mapfd;
int prg_gflags = PGRAB_RDONLY;
int prr_flags = 0;
@@ -358,7 +360,7 @@ main(int argc, char **argv)
proc_unctrl_psinfo(&psinfo);
if (Pstate(Pr) != PS_DEAD) {
- (void) snprintf(buf, sizeof (buf),
+ (void) proc_snprintf(buf, sizeof (buf),
"/proc/%d/map", (int)psinfo.pr_pid);
if ((mapfd = open(buf, O_RDONLY)) < 0) {
(void) fprintf(stderr, "%s: cannot "
@@ -590,7 +592,7 @@ rmapping_iter(struct ps_prochandle *Pr, proc_map_f *func, void *cd)
prmap_t *prmapp, *pmp;
ssize_t n;
- (void) snprintf(mapname, sizeof (mapname),
+ (void) proc_snprintf(mapname, sizeof (mapname),
"/proc/%d/rmap", (int)Pstatus(Pr)->pr_pid);
if ((mapfd = open(mapname, O_RDONLY)) < 0 || fstat(mapfd, &st) != 0) {
@@ -631,7 +633,7 @@ xmapping_iter(struct ps_prochandle *Pr, proc_xmap_f *func, void *cd, int doswap)
prxmap_t *prmapp, *pmp;
ssize_t n;
- (void) snprintf(mapname, sizeof (mapname),
+ (void) proc_snprintf(mapname, sizeof (mapname),
"/proc/%d/xmap", (int)Pstatus(Pr)->pr_pid);
if ((mapfd = open(mapname, O_RDONLY)) < 0 || fstat(mapfd, &st) != 0) {
diff --git a/usr/src/cmd/ptools/pmap/pmap_common.c b/usr/src/cmd/ptools/pmap/pmap_common.c
index fff55ffdbc..81f42d67f7 100644
--- a/usr/src/cmd/ptools/pmap/pmap_common.c
+++ b/usr/src/cmd/ptools/pmap/pmap_common.c
@@ -37,6 +37,7 @@
#include <sys/types.h>
#include "pmap_common.h"
+#include "ptools_common.h"
/*
* We compare the high memory addresses since stacks are faulted in from
@@ -88,7 +89,7 @@ make_name(struct ps_prochandle *Pr, int lflag, uintptr_t addr,
return (NULL);
/* first see if we can find a path via /proc */
- (void) snprintf(path, sizeof (path), "/proc/%d/path/%s",
+ (void) proc_snprintf(path, sizeof (path), "/proc/%d/path/%s",
(int)Psp->pr_pid, mapname);
len = readlink(path, buf, bufsz - 1);
if (len >= 0) {
@@ -97,7 +98,7 @@ make_name(struct ps_prochandle *Pr, int lflag, uintptr_t addr,
}
/* fall back to object information reported by /proc */
- (void) snprintf(path, sizeof (path),
+ (void) proc_snprintf(path, sizeof (path),
"/proc/%d/object/%s", (int)Psp->pr_pid, mapname);
if (stat(path, &statb) == 0) {
dev_t dev = statb.st_dev;
diff --git a/usr/src/cmd/ptools/preap/preap.c b/usr/src/cmd/ptools/preap/preap.c
index 8d30b8027c..6d8eb75611 100644
--- a/usr/src/cmd/ptools/preap/preap.c
+++ b/usr/src/cmd/ptools/preap/preap.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -37,6 +35,8 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <libproc.h>
+#include <limits.h>
+#include "ptools_common.h"
#define NOREAP_TIME 60 /* wait 60 seconds before allow a reap */
@@ -53,11 +53,11 @@ intr(int sig)
static int
open_usage(pid_t pid, int *perr)
{
- char path[64];
+ char path[PATH_MAX];
struct stat64 st;
int fd;
- (void) snprintf(path, sizeof (path), "/proc/%d/usage", (int)pid);
+ (void) proc_snprintf(path, sizeof (path), "/proc/%d/usage", (int)pid);
/*
* Attempt to open the usage file, and return the fd if we can
diff --git a/usr/src/cmd/ptools/psig/psig.c b/usr/src/cmd/ptools/psig/psig.c
index 70af35cb5e..848fda834c 100644
--- a/usr/src/cmd/ptools/psig/psig.c
+++ b/usr/src/cmd/ptools/psig/psig.c
@@ -21,10 +21,9 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@@ -37,6 +36,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <libproc.h>
+#include "ptools_common.h"
/* evil knowledge of libc internals */
#include "../../../lib/libc/inc/thr_uberdata.h"
@@ -172,7 +172,7 @@ lwp_iter(void *cd, const lwpstatus_t *lwpstatus)
static int
look(char *arg)
{
- char pathname[100];
+ char pathname[PATH_MAX];
struct stat statb;
int fd = -1;
int sig, gcode;
@@ -199,7 +199,8 @@ look(char *arg)
(void) memcpy(&psinfo, psinfop, sizeof (psinfo_t));
proc_unctrl_psinfo(&psinfo);
- (void) sprintf(pathname, "/proc/%d/sigact", (int)psinfo.pr_pid);
+ (void) proc_snprintf(pathname, sizeof (pathname), "/proc/%d/sigact",
+ (int)psinfo.pr_pid);
if ((fd = open(pathname, O_RDONLY)) < 0) {
perr("open sigact");
goto look_error;
@@ -213,8 +214,8 @@ look(char *arg)
action = malloc(maxsig * sizeof (struct sigaction));
if (action == NULL) {
(void) fprintf(stderr,
- "%s: cannot malloc() space for %d sigaction structures\n",
- command, maxsig);
+ "%s: cannot malloc() space for %d sigaction structures\n",
+ command, maxsig);
goto look_error;
}
if (read(fd, (char *)action, maxsig * sizeof (struct sigaction)) !=
@@ -239,7 +240,7 @@ look(char *arg)
if (psinfo.pr_dmodel != PR_MODEL_NATIVE) {
caddr32_t addr;
aharraddr = uberaddr +
- offsetof(uberdata32_t, siguaction);
+ offsetof(uberdata32_t, siguaction);
aharrlen = sizeof (siguaction32_t) * NSIG;
(void) Pread(Pr, &addr, sizeof (addr),
uberaddr + offsetof(uberdata32_t, sigacthandler));
@@ -248,7 +249,7 @@ look(char *arg)
#endif
{
aharraddr = uberaddr +
- offsetof(uberdata_t, siguaction);
+ offsetof(uberdata_t, siguaction);
aharrlen = sizeof (siguaction_t) * NSIG;
(void) Pread(Pr, &intfnaddr, sizeof (intfnaddr),
uberaddr + offsetof(uberdata_t, sigacthandler));
@@ -323,7 +324,7 @@ look(char *arg)
(void) printf("\t%-8s", hname);
else
(void) printf("\t0x%-8lx",
- (ulong_t)haddr);
+ (ulong_t)haddr);
s = sigflags(sig, sp->sa_flags);
(void) printf("%s", (*s != '\0')? s : "\t0");
@@ -334,7 +335,7 @@ look(char *arg)
}
} else if (sig == SIGCLD) {
s = sigflags(sig,
- sp->sa_flags & (SA_NOCLDWAIT|SA_NOCLDSTOP));
+ sp->sa_flags & (SA_NOCLDWAIT|SA_NOCLDSTOP));
if (*s != '\0')
(void) printf("\t\t%s", s);
}
@@ -371,7 +372,7 @@ sigflags(int sig, int flags)
static char code_buf[100];
char *str = code_buf;
int flagmask =
- (SA_ONSTACK|SA_RESETHAND|SA_RESTART|SA_SIGINFO|SA_NODEFER);
+ (SA_ONSTACK|SA_RESETHAND|SA_RESTART|SA_SIGINFO|SA_NODEFER);
if (sig == SIGCLD)
flagmask |= (SA_NOCLDSTOP|SA_NOCLDWAIT);
@@ -414,19 +415,19 @@ deinterpose(int sig, void *aharr, psinfo_t *psinfo, struct sigaction *sp)
#ifdef _LP64
if (psinfo->pr_dmodel != PR_MODEL_NATIVE) {
struct sigaction32 *sa32 = (struct sigaction32 *)
- ((uintptr_t)aharr + sig * sizeof (siguaction32_t) +
- offsetof(siguaction32_t, sig_uaction));
+ ((uintptr_t)aharr + sig * sizeof (siguaction32_t) +
+ offsetof(siguaction32_t, sig_uaction));
sp->sa_flags = sa32->sa_flags;
sp->sa_handler = (void (*)())(uintptr_t)sa32->sa_handler;
(void) memcpy(&sp->sa_mask, &sa32->sa_mask,
- sizeof (sp->sa_mask));
+ sizeof (sp->sa_mask));
} else
#endif
{
struct sigaction *sa = (struct sigaction *)
- ((uintptr_t)aharr + sig * sizeof (siguaction_t) +
- offsetof(siguaction_t, sig_uaction));
+ ((uintptr_t)aharr + sig * sizeof (siguaction_t) +
+ offsetof(siguaction_t, sig_uaction));
sp->sa_flags = sa->sa_flags;
sp->sa_handler = sa->sa_handler;
diff --git a/usr/src/cmd/ptools/ptime/ptime.c b/usr/src/cmd/ptools/ptime/ptime.c
index bc862cddb8..7c9be226e1 100644
--- a/usr/src/cmd/ptools/ptime/ptime.c
+++ b/usr/src/cmd/ptools/ptime/ptime.c
@@ -24,6 +24,9 @@
*
* Portions Copyright 2008 Chad Mynhier
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <stdio.h>
#include <stdlib.h>
@@ -38,6 +41,8 @@
#include <sys/time.h>
#include <signal.h>
#include <libproc.h>
+#include <limits.h>
+#include "ptools_common.h"
static int look(pid_t);
static void hr_min_sec(char *, long);
@@ -55,16 +60,39 @@ static char procname[64];
static int Fflag;
static int mflag;
static int errflg;
+static int pflag;
+
+static int
+ptime_pid(const char *pidstr)
+{
+ struct ps_prochandle *Pr;
+ pid_t pid;
+ int gret;
+
+ if ((Pr = proc_arg_grab(pidstr, PR_ARG_PIDS,
+ Fflag | PGRAB_RDONLY, &gret)) == NULL) {
+ (void) fprintf(stderr, "%s: cannot examine %s: %s\n",
+ command, pidstr, Pgrab_error(gret));
+ return (1);
+ }
+
+ pid = Pstatus(Pr)->pr_pid;
+ (void) sprintf(procname, "%d", (int)pid); /* for perr() */
+ (void) look(pid);
+ Prelease(Pr, 0);
+ return (0);
+}
int
main(int argc, char **argv)
{
- int opt;
+ int opt, exit;
pid_t pid;
struct siginfo info;
int status;
int gret;
struct ps_prochandle *Pr;
+ char *pp, *np;
if ((command = strrchr(argv[0], '/')) != NULL)
command++;
@@ -80,6 +108,7 @@ main(int argc, char **argv)
mflag = 1;
break;
case 'p':
+ pflag = 1;
pidarg = optarg;
break;
default:
@@ -93,69 +122,76 @@ main(int argc, char **argv)
if (((pidarg != NULL) ^ (argc < 1)) || errflg) {
(void) fprintf(stderr,
- "usage:\t%s [-mh] [-p pid | command [ args ... ]]\n",
+ "usage:\t%s [-mh] [-p pidlist | command [ args ... ]]\n",
command);
(void) fprintf(stderr,
" (time a command using microstate accounting)\n");
return (1);
}
- if (pidarg != NULL) {
- if ((Pr = proc_arg_grab(pidarg, PR_ARG_PIDS,
- Fflag | PGRAB_RDONLY, &gret)) == NULL) {
- (void) fprintf(stderr, "%s: cannot examine %s: %s\n",
- command, pidarg, Pgrab_error(gret));
- return (1);
- }
- } else {
- if ((Pr = Pcreate(argv[0], &argv[0], &gret, NULL, 0)) == NULL) {
- (void) fprintf(stderr, "%s: failed to exec %s: %s\n",
- command, argv[0], Pcreate_error(gret));
- return (1);
- }
- if (Psetrun(Pr, 0, 0) == -1) {
- (void) fprintf(stderr, "%s: failed to set running %s: "
- "%s\n", command, argv[0], strerror(errno));
- return (1);
+ if (pflag) {
+ exit = 0;
+ (void) signal(SIGINT, SIG_IGN);
+ (void) signal(SIGQUIT, SIG_IGN);
+ pp = pidarg;
+ if ((np = strchr(pp, ' ')) != NULL ||
+ (np = strchr(pp, ',')) != NULL)
+ pflag++;
+ while (np != NULL) {
+ *np = '\0';
+ exit |= ptime_pid(pp);
+ pp = np + 1;
+ np = strchr(pp, ' ');
+ if (np == NULL)
+ np = strchr(pp, ',');
}
+ exit |= ptime_pid(pp);
+ return (exit);
+ }
+
+
+ if ((Pr = Pcreate(argv[0], &argv[0], &gret, NULL, 0)) == NULL) {
+ (void) fprintf(stderr, "%s: failed to exec %s: %s\n",
+ command, argv[0], Pcreate_error(gret));
+ return (1);
+ }
+ if (Psetrun(Pr, 0, 0) == -1) {
+ (void) fprintf(stderr, "%s: failed to set running %s: "
+ "%s\n", command, argv[0], strerror(errno));
+ return (1);
}
pid = Pstatus(Pr)->pr_pid;
+
(void) sprintf(procname, "%d", (int)pid); /* for perr() */
(void) signal(SIGINT, SIG_IGN);
(void) signal(SIGQUIT, SIG_IGN);
- if (pidarg == NULL)
- (void) waitid(P_PID, pid, &info, WEXITED | WNOWAIT);
+ (void) waitid(P_PID, pid, &info, WEXITED | WNOWAIT);
(void) look(pid);
- if (pidarg != NULL) {
- Prelease(Pr, 0);
- return (0);
- } else {
- (void) waitpid(pid, &status, 0);
+ (void) waitpid(pid, &status, 0);
- if (WIFEXITED(status))
- return (WEXITSTATUS(status));
+ if (WIFEXITED(status))
+ return (WEXITSTATUS(status));
- if (WIFSIGNALED(status)) {
- int sig = WTERMSIG(status);
- char name[SIG2STR_MAX];
+ if (WIFSIGNALED(status)) {
+ int sig = WTERMSIG(status);
+ char name[SIG2STR_MAX];
- (void) fprintf(stderr, "%s: command terminated "
- "abnormally by %s\n", command,
- proc_signame(sig, name, sizeof (name)));
- }
-
- return (status | WCOREFLG); /* see time(1) */
+ (void) fprintf(stderr, "%s: command terminated "
+ "abnormally by %s\n", command,
+ proc_signame(sig, name, sizeof (name)));
}
+
+ return (status | WCOREFLG); /* see time(1) */
}
static int
look(pid_t pid)
{
- char pathname[100];
+ char pathname[PATH_MAX];
int rval = 0;
int fd;
psinfo_t psinfo;
@@ -167,7 +203,8 @@ look(pid_t pid)
if (proc_get_psinfo(pid, &psinfo) < 0)
return (perr("read psinfo"));
- (void) sprintf(pathname, "/proc/%d/usage", (int)pid);
+ (void) proc_snprintf(pathname, sizeof (pathname), "/proc/%d/usage",
+ (int)pid);
if ((fd = open(pathname, O_RDONLY)) < 0)
return (perr("open usage"));
@@ -187,6 +224,9 @@ look(pid_t pid)
tsadd(&sys, &sys, &pup->pr_ttime);
(void) fprintf(stderr, "\n");
+ if (pflag > 1)
+ (void) fprintf(stderr, "%d:\t%.70s\n",
+ (int)psinfo.pr_pid, psinfo.pr_psargs);
prtime("real", &real);
prtime("user", &user);
prtime("sys", &sys);
diff --git a/usr/src/cmd/ptools/ptree/ptree.c b/usr/src/cmd/ptools/ptree/ptree.c
index 27fef9b7f0..92a65e2f44 100644
--- a/usr/src/cmd/ptools/ptree/ptree.c
+++ b/usr/src/cmd/ptools/ptree/ptree.c
@@ -21,14 +21,13 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
* ptree -- print family tree of processes
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <stdio.h>
#include <string.h>
@@ -48,6 +47,7 @@
#include <sys/ctfs.h>
#include <libcontract_priv.h>
#include <sys/stat.h>
+#include "ptools_common.h"
#define FAKEDPID0(p) (p->pid == 0 && p->psargs[0] == '\0')
@@ -103,10 +103,11 @@ main(int argc, char **argv)
char *s;
int n;
int retc = 0;
+ char ppath[PATH_MAX];
DIR *dirp;
struct dirent *dentp;
- char pname[100];
+ char pname[PATH_MAX];
int pdlen;
ps_t *p;
@@ -169,16 +170,18 @@ main(int argc, char **argv)
psize = 0;
ps = NULL;
+ (void) proc_snprintf(ppath, sizeof (ppath), "/proc");
+
/*
* Search the /proc directory for all processes.
*/
- if ((dirp = opendir("/proc")) == NULL) {
- (void) fprintf(stderr, "%s: cannot open /proc directory\n",
- command);
+ if ((dirp = opendir(ppath)) == NULL) {
+ (void) fprintf(stderr, "%s: cannot open %s directory\n",
+ command, ppath);
return (1);
}
- (void) strcpy(pname, "/proc");
+ (void) strcpy(pname, ppath);
pdlen = strlen(pname);
pname[pdlen++] = '/';
diff --git a/usr/src/cmd/ptools/pwait/pwait.c b/usr/src/cmd/ptools/pwait/pwait.c
index 0733c355cf..ec11573477 100644
--- a/usr/src/cmd/ptools/pwait/pwait.c
+++ b/usr/src/cmd/ptools/pwait/pwait.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdio_ext.h>
#include <ctype.h>
@@ -39,6 +37,8 @@
#include <poll.h>
#include <procfs.h>
#include <sys/resource.h>
+#include <limits.h>
+#include "ptools_common.h"
static int count_my_files();
static char *command;
@@ -49,6 +49,7 @@ static char *command;
int
main(int argc, char **argv)
{
+ char buf[PATH_MAX];
unsigned long remain = 0;
struct pollfd *pollfd;
struct pollfd *pfd;
@@ -75,10 +76,12 @@ main(int argc, char **argv)
(void) fprintf(stderr, "usage:\t%s [-v] pid ...\n", command);
(void) fprintf(stderr, " (wait for processes to terminate)\n");
(void) fprintf(stderr,
- " -v: verbose; report terminations to standard out\n");
+ " -v: verbose; report terminations to standard out\n");
return (2);
}
+ (void) proc_snprintf(buf, sizeof (buf), "/proc/");
+
/* make sure we have enough file descriptors */
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
int nfiles = count_my_files();
@@ -87,8 +90,8 @@ main(int argc, char **argv)
rlim.rlim_cur = argc + nfiles + SLOP;
if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
(void) fprintf(stderr,
- "%s: insufficient file descriptors\n",
- command);
+ "%s: insufficient file descriptors\n",
+ command);
return (2);
}
}
@@ -108,11 +111,11 @@ main(int argc, char **argv)
if (strchr(arg, '/') != NULL)
(void) strncpy(psinfofile, arg, sizeof (psinfofile));
else {
- (void) strcpy(psinfofile, "/proc/");
+ (void) strcpy(psinfofile, buf);
(void) strncat(psinfofile, arg, sizeof (psinfofile)-6);
}
(void) strncat(psinfofile, "/psinfo",
- sizeof (psinfofile)-strlen(psinfofile));
+ sizeof (psinfofile)-strlen(psinfofile));
pfd = &pollfd[i];
if ((pfd->fd = open(psinfofile, O_RDONLY)) >= 0) {
@@ -126,7 +129,7 @@ main(int argc, char **argv)
pfd->revents = 0;
} else if (errno == ENOENT) {
(void) fprintf(stderr, "%s: no such process: %s\n",
- command, arg);
+ command, arg);
} else {
perror(arg);
}
@@ -160,9 +163,9 @@ main(int argc, char **argv)
if (pread(pfd->fd, &psinfo,
sizeof (psinfo), (off_t)0)
== sizeof (psinfo)) {
- (void) printf(
- "%s: terminated, wait status 0x%.4x\n",
- arg, psinfo.pr_wstat);
+ (void) printf("%s: terminated, "
+ "wait status 0x%.4x\n",
+ arg, psinfo.pr_wstat);
} else {
(void) printf(
"%s: terminated\n", arg);
@@ -170,10 +173,10 @@ main(int argc, char **argv)
}
if (pfd->revents & POLLNVAL)
(void) printf("%s: system process\n",
- arg);
+ arg);
if (pfd->revents & ~(POLLPRI|POLLHUP|POLLNVAL))
(void) printf("%s: unknown error\n",
- arg);
+ arg);
}
(void) close(pfd->fd);
diff --git a/usr/src/cmd/ptools/pwdx/pwdx.c b/usr/src/cmd/ptools/pwdx/pwdx.c
index adf42c0877..4a2c6f0c3f 100644
--- a/usr/src/cmd/ptools/pwdx/pwdx.c
+++ b/usr/src/cmd/ptools/pwdx/pwdx.c
@@ -22,10 +22,9 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <unistd.h>
#include <string.h>
@@ -33,6 +32,8 @@
#include <libproc.h>
#include <sys/param.h>
+#include "ptools_common.h"
+
static char *command;
static int
@@ -49,7 +50,7 @@ show_cwd(const char *arg)
return (1);
}
- (void) snprintf(proc, sizeof (proc), "/proc/%d/path/cwd",
+ (void) proc_snprintf(proc, sizeof (proc), "/proc/%d/path/cwd",
(int)p.pr_pid);
if ((ret = readlink(proc, cwd, sizeof (cwd) - 1)) <= 0) {
diff --git a/usr/src/cmd/rcap/common/utils.c b/usr/src/cmd/rcap/common/utils.c
index 799fdcef23..dd511c7c50 100644
--- a/usr/src/cmd/rcap/common/utils.c
+++ b/usr/src/cmd/rcap/common/utils.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -257,77 +258,3 @@ xatoi(char *p)
return (i);
}
}
-
-/*
- * get_running_zones() calls zone_list(2) to find out how many zones are
- * running. It then calls zone_list(2) again to fetch the list of running
- * zones (stored in *zents).
- */
-int
-get_running_zones(uint_t *nzents, zone_entry_t **zents)
-{
- zoneid_t *zids;
- uint_t nzents_saved;
- int i;
- zone_entry_t *zentp;
- zone_state_t zstate;
-
- *zents = NULL;
- if (zone_list(NULL, nzents) != 0) {
- warn(gettext("could not get zoneid list\n"));
- return (E_ERROR);
- }
-
-again:
- if (*nzents == 0)
- return (E_SUCCESS);
-
- if ((zids = (zoneid_t *)calloc(*nzents, sizeof (zoneid_t))) == NULL) {
- warn(gettext("out of memory: zones will not be capped\n"));
- return (E_ERROR);
- }
-
- nzents_saved = *nzents;
-
- if (zone_list(zids, nzents) != 0) {
- warn(gettext("could not get zone list\n"));
- free(zids);
- return (E_ERROR);
- }
- if (*nzents != nzents_saved) {
- /* list changed, try again */
- free(zids);
- goto again;
- }
-
- *zents = calloc(*nzents, sizeof (zone_entry_t));
- if (*zents == NULL) {
- warn(gettext("out of memory: zones will not be capped\n"));
- free(zids);
- return (E_ERROR);
- }
-
- zentp = *zents;
- for (i = 0; i < *nzents; i++) {
- char name[ZONENAME_MAX];
-
- if (getzonenamebyid(zids[i], name, sizeof (name)) < 0) {
- warn(gettext("could not get name for "
- "zoneid %d\n"), zids[i]);
- continue;
- }
-
- (void) strlcpy(zentp->zname, name, sizeof (zentp->zname));
- zentp->zid = zids[i];
- if (zone_get_state(name, &zstate) != Z_OK ||
- zstate != ZONE_STATE_RUNNING)
- continue;
-
-
- zentp++;
- }
- *nzents = zentp - *zents;
-
- free(zids);
- return (E_SUCCESS);
-}
diff --git a/usr/src/cmd/rcap/common/utils.h b/usr/src/cmd/rcap/common/utils.h
index 7196cfb4ce..cf2e17c080 100644
--- a/usr/src/cmd/rcap/common/utils.h
+++ b/usr/src/cmd/rcap/common/utils.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#ifndef _UTILS_H
@@ -98,7 +99,6 @@ extern void vdprintfe(int, const char *, va_list);
extern void dprintfe(int, char *, ...);
extern void hrt2ts(hrtime_t, timestruc_t *);
extern int xatoi(char *);
-extern int get_running_zones(uint_t *, zone_entry_t **);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/rcap/rcapadm/rcapadm.c b/usr/src/cmd/rcap/rcapadm/rcapadm.c
index 92888b2071..b92115469a 100644
--- a/usr/src/cmd/rcap/rcapadm/rcapadm.c
+++ b/usr/src/cmd/rcap/rcapadm/rcapadm.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -145,20 +146,29 @@ out:
scf_handle_destroy(h);
}
+static int
+set_zone_cap(char *zonename, uint64_t mcap)
+{
+ char cmd[128 + ZONENAME_MAX];
+
+ (void) snprintf(cmd, sizeof (cmd), "/usr/bin/prctl -r "
+ "-n zone.max-physical-memory -v %llu -i zone %s", mcap, zonename);
+ return (system(cmd));
+}
+
/*
* Update the in-kernel memory cap for the specified zone.
*/
static int
update_zone_mcap(char *zonename, char *maxrss)
{
- zoneid_t zone_id;
uint64_t num;
if (getzoneid() != GLOBAL_ZONEID || zonecfg_in_alt_root())
return (E_SUCCESS);
/* get the running zone from the kernel */
- if ((zone_id = getzoneidbyname(zonename)) == -1) {
+ if (getzoneidbyname(zonename) == -1) {
(void) fprintf(stderr, gettext("zone '%s' must be running\n"),
zonename);
return (E_ERROR);
@@ -169,7 +179,7 @@ update_zone_mcap(char *zonename, char *maxrss)
return (E_ERROR);
}
- if (zone_setattr(zone_id, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
+ if (set_zone_cap(zonename, num) == -1) {
(void) fprintf(stderr, gettext("could not set memory "
"cap for zone '%s'\n"), zonename);
return (E_ERROR);
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
index db86aa6276..88403dda37 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
@@ -21,16 +21,17 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <procfs.h>
#include <project.h>
#include <stdlib.h>
#include <strings.h>
#include <zone.h>
#include <libzonecfg.h>
+#include <dirent.h>
+#include <libproc.h>
#include "rcapd.h"
#include "utils.h"
@@ -39,61 +40,117 @@ extern boolean_t gz_capped;
/* round up to next y = 2^n */
#define ROUNDUP(x, y) (((x) + ((y) - 1)) & ~((y) - 1))
-static void
-update_zone(zone_entry_t *zent, void *walk_data)
+static struct ps_prochandle *
+grab_zone_proc(zoneid_t zid)
{
- void(*update_notification_cb)(char *, char *, int, uint64_t, int) =
- (void(*)(char *, char *, int, uint64_t, int))walk_data;
- int changes;
- int64_t max_rss;
+ DIR *dirp;
+ struct dirent *dentp;
+ int pid, pid_self, tmp;
+ psinfo_t psinfo;
+ struct ps_prochandle *pr = NULL;
+
+ pid_self = getpid();
+
+ if ((dirp = opendir("/proc")) == NULL)
+ return (NULL);
+
+ while (dentp = readdir(dirp)) {
+ pid = atoi(dentp->d_name);
+
+ /* Skip self */
+ if (pid == pid_self)
+ continue;
+
+ if (proc_get_psinfo(pid, &psinfo) != 0)
+ continue;
+
+ if (psinfo.pr_zoneid != zid)
+ continue;
+
+ /* attempt to grab process */
+ if ((pr = Pgrab(pid, 0, &tmp)) != NULL) {
+ if (Psetflags(pr, PR_RLC) != 0) {
+ Prelease(pr, 0);
+ }
+ if (Pcreate_agent(pr) == 0) {
+ if (pr_getzoneid(pr) != zid) {
+ Prelease(pr, 0);
+ continue;
+ }
+
+ (void) closedir(dirp);
+ return (pr);
+ } else {
+ Prelease(pr, 0);
+ }
+ }
+ }
+
+ (void) closedir(dirp);
+ return (NULL);
+}
+
+static uint64_t
+get_zone_cap(zoneid_t zid)
+{
+ rctlblk_t *rblk;
uint64_t mcap;
- lcollection_t *lcol;
- rcid_t colid;
+ struct ps_prochandle *pr;
- if (zone_getattr(zent->zid, ZONE_ATTR_PHYS_MCAP, &mcap,
- sizeof (mcap)) != -1 && mcap != 0)
- max_rss = ROUNDUP(mcap, 1024) / 1024;
- else
- max_rss = 0;
-
- if (zent->zid == GLOBAL_ZONEID) {
- if (max_rss > 0)
- gz_capped = B_TRUE;
- else
- gz_capped = B_FALSE;
+ if ((rblk = (rctlblk_t *)malloc(rctlblk_size())) == NULL)
+ return (UINT64_MAX);
+
+ if ((pr = grab_zone_proc(zid)) == NULL) {
+ free(rblk);
+ return (UINT64_MAX);
}
+ if (pr_getrctl(pr, "zone.max-physical-memory", NULL, rblk,
+ RCTL_FIRST)) {
+ Pdestroy_agent(pr);
+ Prelease(pr, 0);
+ free(rblk);
+ return (UINT64_MAX);
+ }
- colid.rcid_type = RCIDT_ZONE;
- colid.rcid_val = zent->zid;
+ Pdestroy_agent(pr);
+ Prelease(pr, 0);
- lcol = lcollection_insert_update(&colid, max_rss, zent->zname,
- &changes);
- if (update_notification_cb != NULL)
- update_notification_cb("zone", zent->zname, changes, max_rss,
- (lcol != NULL) ? lcol->lcol_mark : 0);
+ mcap = rctlblk_get_value(rblk);
+ free(rblk);
+ return (mcap);
}
-
+/*
+ * For zones, rcapd only caps the global zone, since each non-global zone
+ * caps itself.
+ */
/* ARGSUSED */
void
lcollection_update_zone(lcollection_update_type_t ut,
void(*update_notification_cb)(char *, char *, int, uint64_t, int))
{
- int i;
- uint_t nzents;
- zone_entry_t *zents;
-
- /*
- * Enumerate running zones.
- */
- if (get_running_zones(&nzents, &zents) != 0)
- return;
-
- for (i = 0; i < nzents; i++) {
- update_zone(&zents[i], (void *)update_notification_cb);
+ int changes;
+ int64_t max_rss;
+ uint64_t mcap;
+ lcollection_t *lcol;
+ rcid_t colid;
+ mcap = get_zone_cap(GLOBAL_ZONEID);
+ if (mcap != 0 && mcap != UINT64_MAX) {
+ max_rss = ROUNDUP(mcap, 1024) / 1024;
+ gz_capped = B_TRUE;
+ } else {
+ max_rss = UINT64_MAX / 1024;
+ gz_capped = B_FALSE;
}
- free(zents);
+ colid.rcid_type = RCIDT_ZONE;
+ colid.rcid_val = GLOBAL_ZONEID;
+
+ lcol = lcollection_insert_update(&colid, max_rss, GLOBAL_ZONENAME,
+ &changes);
+ if (update_notification_cb != NULL)
+ update_notification_cb("zone", GLOBAL_ZONENAME, changes,
+ max_rss, (lcol != NULL) ? lcol->lcol_mark : 0);
}
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
index b39811b552..254bb9e922 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
@@ -21,6 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -551,7 +552,7 @@ pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
errno = 0;
res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
- (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0);
+ (caddr_t)(MS_ASYNC | MS_INVALCURPROC), 0, 0);
debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
/*
diff --git a/usr/src/cmd/rcap/rcapstat/rcapstat.c b/usr/src/cmd/rcap/rcapstat/rcapstat.c
index 0632250fed..2838c6e5d5 100644
--- a/usr/src/cmd/rcap/rcapstat/rcapstat.c
+++ b/usr/src/cmd/rcap/rcapstat/rcapstat.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -72,6 +73,8 @@ typedef struct col {
static col_t *col_head;
static int ncol;
+#define RCAPD_NA "rcapd is not active (try zonememstat)\n"
+
static col_t *
col_find(rcid_t id)
{
@@ -152,7 +155,7 @@ read_stats(rcid_type_t stat_type)
struct stat st;
if ((fd = open(STAT_FILE_DEFAULT, O_RDONLY)) < 0) {
- warn(gettext("rcapd is not active\n"));
+ warn(gettext(RCAPD_NA));
return (E_ERROR);
}
@@ -173,7 +176,7 @@ read_stats(rcid_type_t stat_type)
pid = hdr.rs_pid;
(void) snprintf(procfile, 20, "/proc/%lld/psinfo", pid);
if ((proc_fd = open(procfile, O_RDONLY)) < 0) {
- warn(gettext("rcapd is not active\n"));
+ warn(gettext(RCAPD_NA));
(void) close(fd);
return (E_ERROR);
}
diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com
index 4b12d1cd27..59e6e49371 100644
--- a/usr/src/cmd/rcm_daemon/Makefile.com
+++ b/usr/src/cmd/rcm_daemon/Makefile.com
@@ -56,7 +56,6 @@ COMMON_MOD_SRC = \
$(COMMON)/pool_rcm.c \
$(COMMON)/mpxio_rcm.c \
$(COMMON)/ip_anon_rcm.c \
- $(COMMON)/svm_rcm.c \
$(COMMON)/bridge_rcm.c
sparc_MOD_SRC = $(COMMON)/ttymux_rcm.c
@@ -82,7 +81,6 @@ COMMON_MOD_OBJ = \
pool_rcm.o \
mpxio_rcm.o \
ip_anon_rcm.o \
- svm_rcm.o \
bridge_rcm.o
sparc_MOD_OBJ = ttymux_rcm.o
@@ -103,7 +101,6 @@ COMMON_RCM_MODS = \
SUNW_pool_rcm.so \
SUNW_mpxio_rcm.so \
SUNW_ip_anon_rcm.so \
- SUNW_svm_rcm.so \
SUNW_bridge_rcm.so
sparc_RCM_MODS = SUNW_ttymux_rcm.so
@@ -132,7 +129,6 @@ LINTFLAGS += -u -erroff=E_FUNC_ARG_UNUSED
LDLIBS_MODULES =
SUNW_pool_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lpool
-SUNW_svm_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lmeta
SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
diff --git a/usr/src/cmd/sed/main.c b/usr/src/cmd/sed/main.c
index 204583b877..dc3ef02619 100644
--- a/usr/src/cmd/sed/main.c
+++ b/usr/src/cmd/sed/main.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson <johann@myrkraverk.com>
* Copyright (c) 2011 Gary Mills
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
@@ -42,9 +42,7 @@
#include <err.h>
#include <errno.h>
#include <fcntl.h>
-#include <getopt.h>
#include <libgen.h>
-#include <libintl.h>
#include <limits.h>
#include <locale.h>
#include <regex.h>
@@ -53,6 +51,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <libintl.h>
#include "defs.h"
#include "extern.h"
@@ -107,11 +106,6 @@ static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
static const char *inplace; /* Inplace edit file extension. */
ulong_t linenum;
-static const struct option lopts[] = {
- {"in-place", optional_argument, NULL, 'i'},
- {NULL, 0, NULL, 0}
-};
-
static void add_compunit(enum e_cut, char *);
static void add_file(char *);
static void usage(void);
@@ -133,18 +127,14 @@ main(int argc, char *argv[])
fflag = 0;
inplace = NULL;
- while ((c = getopt_long(argc, argv, "EI::ae:f:i::lnr", lopts, NULL)) !=
- -1)
+ while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1)
switch (c) {
case 'r': /* Gnu sed compat */
case 'E':
rflags = REG_EXTENDED;
break;
case 'I':
- if (optarg != NULL)
- inplace = optarg;
- else
- inplace = "";
+ inplace = optarg;
ispan = 1; /* span across input files */
break;
case 'a':
@@ -161,10 +151,7 @@ main(int argc, char *argv[])
add_compunit(CU_FILE, optarg);
break;
case 'i':
- if (optarg != NULL)
- inplace = optarg;
- else
- inplace = "";
+ inplace = optarg;
ispan = 0; /* don't span across input files */
break;
case 'l':
@@ -205,8 +192,8 @@ main(int argc, char *argv[])
static void
usage(void)
{
- (void) fputs(_("usage: sed script [-Ealn] [-i[extension]] [file...]\n"
- " sed [-Ealn] [-i[extension]] [-e script]... "
+ (void) fputs(_("usage: sed script [-Ealn] [-i extension] [file...]\n"
+ " sed [-Ealn] [-i extension] [-e script]... "
"[-f script_file]... [file...]\n"),
stderr);
exit(1);
diff --git a/usr/src/cmd/sendmail/src/Makefile b/usr/src/cmd/sendmail/src/Makefile
index 14793754fd..9512a7d976 100644
--- a/usr/src/cmd/sendmail/src/Makefile
+++ b/usr/src/cmd/sendmail/src/Makefile
@@ -46,7 +46,7 @@ LDFLAGS += $(MAPFILES:%=-M%)
LDLIBS += ../libsmutil/libsmutil.a ../libsm/libsm.a -lresolv -lsocket \
-lnsl ../db/libdb.a -lldap -lsldap -lwrap -lumem \
- -lssl -lcrypto -lsasl
+ -lsunw_ssl -lsunw_crypto -lsasl
INCPATH= -I. -I../include -I../db
diff --git a/usr/src/cmd/sgs/elfdump/Makefile.targ b/usr/src/cmd/sgs/elfdump/Makefile.targ
index 5eda4d1b47..b763a3faf0 100644
--- a/usr/src/cmd/sgs/elfdump/Makefile.targ
+++ b/usr/src/cmd/sgs/elfdump/Makefile.targ
@@ -60,6 +60,8 @@ delete:
package \
install: all $(VAR_SGSBINPROG) $(VAR_SGSCCSLINK)
+ -$(RM) $(ROOTPROG)
+ -$(LN) $(ISAEXEC) $(ROOTPROG)
.PARALLEL: $(LINTOUT32) $(LINTOUT64)
diff --git a/usr/src/cmd/sgs/elfdump/amd64/Makefile b/usr/src/cmd/sgs/elfdump/amd64/Makefile
index 906b6b6754..fdcf09abed 100644
--- a/usr/src/cmd/sgs/elfdump/amd64/Makefile
+++ b/usr/src/cmd/sgs/elfdump/amd64/Makefile
@@ -39,6 +39,8 @@ LINTFLAGS64 += $(VAR_LINTFLAGS64)
VAR_SGSBINPROG= $(VAR_SGSBINPROG64)
VAR_SGSCCSLINK= $(VAR_SGSCCSLINK64)
+install: all $(ROOTPROG64)
+
include ../Makefile.targ
include ../../Makefile.sub.64
diff --git a/usr/src/cmd/sgs/elfdump/i386/Makefile b/usr/src/cmd/sgs/elfdump/i386/Makefile
index 607c8a31ce..31fd227274 100644
--- a/usr/src/cmd/sgs/elfdump/i386/Makefile
+++ b/usr/src/cmd/sgs/elfdump/i386/Makefile
@@ -30,4 +30,6 @@ include ../Makefile.com
.KEEP_STATE:
+install: all $(ROOTPROG32)
+
include ../Makefile.targ
diff --git a/usr/src/cmd/sgs/include/debug.h b/usr/src/cmd/sgs/include/debug.h
index 9db202574d..5f740afe57 100644
--- a/usr/src/cmd/sgs/include/debug.h
+++ b/usr/src/cmd/sgs/include/debug.h
@@ -502,7 +502,6 @@ extern void Dbg_help(void);
#define Dbg_util_intoolate Dbg64_util_intoolate
#define Dbg_util_lcinterface Dbg64_util_lcinterface
#define Dbg_util_nl Dbg64_util_nl
-#define Dbg_util_no_init Dbg64_util_no_init
#define Dbg_util_scc_entry Dbg64_util_scc_entry
#define Dbg_util_scc_title Dbg64_util_scc_title
#define Dbg_util_str Dbg64_util_str
@@ -736,7 +735,6 @@ extern void Dbg_help(void);
#define Dbg_util_intoolate Dbg32_util_intoolate
#define Dbg_util_lcinterface Dbg32_util_lcinterface
#define Dbg_util_nl Dbg32_util_nl
-#define Dbg_util_no_init Dbg32_util_no_init
#define Dbg_util_scc_entry Dbg32_util_scc_entry
#define Dbg_util_scc_title Dbg32_util_scc_title
#define Dbg_util_str Dbg32_util_str
@@ -1051,7 +1049,6 @@ extern void Dbg_util_edge_out(Rt_map *, Rt_map *);
extern void Dbg_util_intoolate(Rt_map *);
extern void Dbg_util_lcinterface(Rt_map *, int, char *);
extern void Dbg_util_nl(Lm_list *, int);
-extern void Dbg_util_no_init(Rt_map *);
extern void Dbg_util_scc_entry(Rt_map *, uint_t);
extern void Dbg_util_scc_title(Lm_list *, int);
extern void Dbg_util_str(Lm_list *, const char *);
diff --git a/usr/src/cmd/sgs/include/rtld.h b/usr/src/cmd/sgs/include/rtld.h
index 344671d515..65eb25c4b1 100644
--- a/usr/src/cmd/sgs/include/rtld.h
+++ b/usr/src/cmd/sgs/include/rtld.h
@@ -719,7 +719,10 @@ struct rt_map {
Capchain *rt_capchain; /* capabilities chain data */
uint_t rt_cntl; /* link-map control list we belong to */
uint_t rt_aflags; /* auditor flags, see LML_TFLG_AUD_ */
+ Rt_cond rt_cv; /* for waiting on flags changes */
+ Rt_lock rt_lock; /* for coordinating flags changes */
/* address of _init */
+ thread_t rt_init_thread; /* thread id in this lm's _init */
void (*rt_init)(void);
/* address of _fini */
void (*rt_fini)(void);
diff --git a/usr/src/cmd/sgs/lex/Makefile.targ b/usr/src/cmd/sgs/lex/Makefile.targ
index ea1bee3a62..d1e01f71c9 100644
--- a/usr/src/cmd/sgs/lex/Makefile.targ
+++ b/usr/src/cmd/sgs/lex/Makefile.targ
@@ -102,4 +102,4 @@ $(LINTPOUT): $(SRCS)
$(LINT.c) $(LIBSRCS) $(LDLIBS) 2>&1 | tee -a $(LINTPOUT)
$(LINTLIB): $(LINTSRCS)
- $(LINT.c) -o $(LIBNAME) $(LINTSRCS)
+ $(LINT.c) -o $(LIBNAME) $(LINTSRCS) > $(LINTOUT) 2>&1
diff --git a/usr/src/cmd/sgs/lex/common/main.c b/usr/src/cmd/sgs/lex/common/main.c
index 17ba4808a4..a1fd526cf9 100644
--- a/usr/src/cmd/sgs/lex/common/main.c
+++ b/usr/src/cmd/sgs/lex/common/main.c
@@ -30,11 +30,15 @@
/* Copyright 1976, Bell Telephone Laboratories, Inc. */
+/* Copyright (c) 2013, joyent, Inc. All rights reserved. */
+
#include <string.h>
#include "once.h"
#include "sgs.h"
#include <locale.h>
#include <limits.h>
+#include <unistd.h>
+#include <libgen.h>
static wchar_t L_INITIAL[] = {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0};
static void get1core(void);
@@ -46,6 +50,25 @@ static void get3core(void);
static void free3core(void);
#endif
+static int
+lex_construct_path(char *buf, size_t size, const char *file, int type)
+{
+ int ret;
+ char origin[PATH_MAX];
+
+ if (type != 0) {
+ ret = readlink("/proc/self/path/a.out", origin, PATH_MAX - 1);
+ if (ret < 0)
+ error(
+ "lex: failed to read origin from /proc\n");
+ origin[ret] = '\0';
+ return (snprintf(buf, size, "%s/../%s/%s", dirname(origin),
+ NBASE, file));
+ }
+
+ return (snprintf(buf, size, "%s/%s/%s", NPREFIX, NBASE, file));
+}
+
int
main(int argc, char **argv)
{
@@ -53,6 +76,7 @@ main(int argc, char **argv)
int c;
char *apath = NULL;
char *ypath;
+ char pathbuf[PATH_MAX];
Boolean eoption = 0, woption = 0;
sargv = argv;
@@ -224,6 +248,11 @@ main(int argc, char **argv)
free3core();
#endif
+ /*
+ * Try to find the file relative to $ORIGIN. Note that we don't touch
+ * antyhing related to -Y. In fact, unfortunately it's always been
+ * ignored it seems.
+ */
if (handleeuc) {
if (ratfor)
error("Ratfor is not supported by -w or -e option.");
@@ -232,9 +261,19 @@ main(int argc, char **argv)
else
ypath = ratfor ? RATNAME : CNAME;
- if (apath != NULL)
- ypath = strcat(apath, strrchr(ypath, '/'));
- fother = fopen(ypath, "r");
+ if (apath == NULL) {
+ (void) lex_construct_path(pathbuf, sizeof (pathbuf), ypath, 1);
+ fother = fopen(pathbuf, "r");
+ if (fother == NULL) {
+ (void) lex_construct_path(pathbuf, sizeof (pathbuf),
+ ypath, 0);
+ fother = fopen(pathbuf, "r");
+ }
+ } else {
+ apath = strcat(apath, "/");
+ ypath = strcat(apath, ypath);
+ fother = fopen(ypath, "r");
+ }
if (fother == NULL)
error("Lex driver missing, file %s", ypath);
while ((i = getc(fother)) != EOF)
diff --git a/usr/src/cmd/sgs/lex/common/once.h b/usr/src/cmd/sgs/lex/common/once.h
index 014ca00b17..9e4b0e5e00 100644
--- a/usr/src/cmd/sgs/lex/common/once.h
+++ b/usr/src/cmd/sgs/lex/common/once.h
@@ -26,11 +26,11 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
+/* Copyright (c) 2013, joyent, Inc. All rights reserved. */
+
#ifndef _ONCE_H
#define _ONCE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "ldefs.h"
/* once.c */
@@ -73,9 +73,11 @@ int peek = '\n'; /* next input character */
CHR *pushptr = pushc;
CHR *slptr = slist;
-#define CNAME "/usr/share/lib/ccs/ncform"
-#define RATNAME "/usr/share/lib/ccs/nrform"
-#define EUCNAME "/usr/share/lib/ccs/nceucform"
+#define NPREFIX "/usr"
+#define NBASE "/share/lib/ccs/"
+#define CNAME "ncform"
+#define RATNAME "nrform"
+#define EUCNAME "nceucform"
int ccount = 1;
int casecount = 1;
diff --git a/usr/src/cmd/sgs/libconv/common/corenote.c b/usr/src/cmd/sgs/libconv/common/corenote.c
index eb998bae45..863c3bc917 100644
--- a/usr/src/cmd/sgs/libconv/common/corenote.c
+++ b/usr/src/cmd/sgs/libconv/common/corenote.c
@@ -25,7 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -76,7 +76,7 @@ const char *
conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
Conv_inv_buf_t *inv_buf)
{
- static const Msg types_0_22[] = {
+ static const Msg types_0_25[] = {
MSG_AUXV_AT_NULL, MSG_AUXV_AT_IGNORE,
MSG_AUXV_AT_EXECFD, MSG_AUXV_AT_PHDR,
MSG_AUXV_AT_PHENT, MSG_AUXV_AT_PHNUM,
@@ -88,10 +88,11 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
MSG_AUXV_AT_HWCAP, MSG_AUXV_AT_CLKTCK,
MSG_AUXV_AT_FPUCW, MSG_AUXV_AT_DCACHEBSIZE,
MSG_AUXV_AT_ICACHEBSIZE, MSG_AUXV_AT_UCACHEBSIZE,
- MSG_AUXV_AT_IGNOREPPC
+ MSG_AUXV_AT_IGNOREPPC, MSG_AUXV_AT_SECURE,
+ MSG_AUXV_AT_BASE_PLATFORM, MSG_AUXV_AT_RANDOM
};
- static const conv_ds_msg_t ds_types_0_22 = {
- CONV_DS_MSG_INIT(0, types_0_22) };
+ static const conv_ds_msg_t ds_types_0_25 = {
+ CONV_DS_MSG_INIT(0, types_0_25) };
static const Msg types_2000_2011[] = {
MSG_AUXV_AT_SUN_UID, MSG_AUXV_AT_SUN_RUID,
@@ -104,19 +105,20 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
static const conv_ds_msg_t ds_types_2000_2011 = {
CONV_DS_MSG_INIT(2000, types_2000_2011) };
- static const Msg types_2014_2023[] = {
+ static const Msg types_2014_2024[] = {
MSG_AUXV_AT_SUN_EXECNAME, MSG_AUXV_AT_SUN_MMU,
MSG_AUXV_AT_SUN_LDDATA, MSG_AUXV_AT_SUN_AUXFLAGS,
MSG_AUXV_AT_SUN_EMULATOR, MSG_AUXV_AT_SUN_BRANDNAME,
MSG_AUXV_AT_SUN_BRAND_AUX1, MSG_AUXV_AT_SUN_BRAND_AUX2,
- MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2
+ MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2,
+ MSG_AUXV_AT_SUN_BRAND_NROOT
};
- static const conv_ds_msg_t ds_types_2014_2023 = {
- CONV_DS_MSG_INIT(2014, types_2014_2023) };
+ static const conv_ds_msg_t ds_types_2014_2024 = {
+ CONV_DS_MSG_INIT(2014, types_2014_2024) };
static const conv_ds_t *ds[] = {
- CONV_DS_ADDR(ds_types_0_22), CONV_DS_ADDR(ds_types_2000_2011),
- CONV_DS_ADDR(ds_types_2014_2023), NULL };
+ CONV_DS_ADDR(ds_types_0_25), CONV_DS_ADDR(ds_types_2000_2011),
+ CONV_DS_ADDR(ds_types_2014_2024), NULL };
return (conv_map_ds(ELFOSABI_NONE, EM_NONE, type, ds, fmt_flags,
inv_buf));
diff --git a/usr/src/cmd/sgs/libconv/common/corenote.msg b/usr/src/cmd/sgs/libconv/common/corenote.msg
index f1cc32dfde..c68b826f72 100644
--- a/usr/src/cmd/sgs/libconv/common/corenote.msg
+++ b/usr/src/cmd/sgs/libconv/common/corenote.msg
@@ -24,7 +24,7 @@
# Use is subject to license terms.
#
# Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
@ MSG_NT_PRSTATUS "[ NT_PRSTATUS ]"
@@ -78,6 +78,9 @@
@ MSG_AUXV_AT_ICACHEBSIZE "ICACHEBSIZE"
@ MSG_AUXV_AT_UCACHEBSIZE "UCACHEBSIZE"
@ MSG_AUXV_AT_IGNOREPPC "IGNOREPPC"
+@ MSG_AUXV_AT_SECURE "SECURE"
+@ MSG_AUXV_AT_BASE_PLATFORM "BASE_PLATFORM"
+@ MSG_AUXV_AT_RANDOM "RANDOM"
@ MSG_AUXV_AT_SUN_UID "SUN_UID"
@ MSG_AUXV_AT_SUN_RUID "SUN_RUID"
@ MSG_AUXV_AT_SUN_GID "SUN_GID"
@@ -100,6 +103,7 @@
@ MSG_AUXV_AT_SUN_BRAND_AUX2 "SUN_BRAND_AUX2"
@ MSG_AUXV_AT_SUN_BRAND_AUX3 "SUN_BRAND_AUX3"
@ MSG_AUXV_AT_SUN_HWCAP2 "SUN_HWCAP2"
+@ MSG_AUXV_AT_SUN_BRAND_NROOT "SUN_BRAND_NROOT"
@ MSG_CC_CONTENT_STACK "STACK"
diff --git a/usr/src/cmd/sgs/liblddbg/common/liblddbg.msg b/usr/src/cmd/sgs/liblddbg/common/liblddbg.msg
index 71f432220f..34c012acab 100644
--- a/usr/src/cmd/sgs/liblddbg/common/liblddbg.msg
+++ b/usr/src/cmd/sgs/liblddbg/common/liblddbg.msg
@@ -1102,8 +1102,6 @@
@ MSG_UTL_DYN "dynamically triggered"
@ MSG_UTL_DONE "done"
-@ MSG_UTL_NOINIT "warning: calling %s whose init has not completed"
-
@ MSG_UTL_DBNOTIFY "notify debugger: event: %s state: %s"
@ MSG_UTL_SCC_TITLE " cycle detected - sorting cyclic dependencies in %s"
diff --git a/usr/src/cmd/sgs/liblddbg/common/llib-llddbg b/usr/src/cmd/sgs/liblddbg/common/llib-llddbg
index 0b6bfc747b..e3a43c6e0c 100644
--- a/usr/src/cmd/sgs/liblddbg/common/llib-llddbg
+++ b/usr/src/cmd/sgs/liblddbg/common/llib-llddbg
@@ -502,8 +502,6 @@ void Dbg32_util_lcinterface(Rt_map *, int, char *);
void Dbg64_util_lcinterface(Rt_map *, int, char *);
void Dbg32_util_nl(Lm_list *, int);
void Dbg64_util_nl(Lm_list *, int);
-void Dbg32_util_no_init(Rt_map *);
-void Dbg64_util_no_init(Rt_map *);
void Dbg32_util_scc_entry(Rt_map *, uint_t);
void Dbg64_util_scc_entry(Rt_map *, uint_t);
void Dbg32_util_scc_title(Lm_list *, int);
diff --git a/usr/src/cmd/sgs/liblddbg/common/mapfile-vers b/usr/src/cmd/sgs/liblddbg/common/mapfile-vers
index 0e4338fce1..1fac50603d 100644
--- a/usr/src/cmd/sgs/liblddbg/common/mapfile-vers
+++ b/usr/src/cmd/sgs/liblddbg/common/mapfile-vers
@@ -490,8 +490,6 @@ SYMBOL_VERSION SUNWprivate_4.83 {
Dbg64_util_intoolate;
Dbg32_util_nl;
Dbg64_util_nl;
- Dbg32_util_no_init;
- Dbg64_util_no_init;
Dbg32_util_scc_entry;
Dbg64_util_scc_entry;
Dbg32_util_scc_title;
diff --git a/usr/src/cmd/sgs/liblddbg/common/util.c b/usr/src/cmd/sgs/liblddbg/common/util.c
index 02de483a82..575a9bd15f 100644
--- a/usr/src/cmd/sgs/liblddbg/common/util.c
+++ b/usr/src/cmd/sgs/liblddbg/common/util.c
@@ -66,19 +66,6 @@ Dbg_util_call_init(Rt_map *lmp, int flag)
}
void
-Dbg_util_no_init(Rt_map *lmp)
-{
- Lm_list *lml = LIST(lmp);
-
- if (DBG_NOTCLASS(DBG_C_INIT))
- return;
-
- Dbg_util_nl(lml, DBG_NL_STD);
- dbg_print(lml, MSG_INTL(MSG_UTL_NOINIT), NAME(lmp));
- Dbg_util_nl(lml, DBG_NL_STD);
-}
-
-void
Dbg_util_intoolate(Rt_map *lmp)
{
Lm_list *lml = LIST(lmp);
diff --git a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
index fbc595f5f4..5b0ad9533a 100644
--- a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
+++ b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
@@ -24,6 +24,10 @@
# Use is subject to license terms.
#
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
# Message file for cmd/sgs/librtld_db.
@ MSG_ID_LIBRTLD_DB
@@ -104,7 +108,7 @@
@ MSG_DB_RDOBJPADE "rtld_db: rd_objpad_enable(padsize=0x%llx)"
@ MSG_DB_64BIT_PREFIX "64/"
@ MSG_DB_BRAND_HELPERPATH_PREFIX "%s/%s/%s/%s%s_librtld_db.so.1"
-@ MSG_DB_BRAND_HELPERPATH "%s/%s/%s%s_librtld_db.so.1"
+@ MSG_DB_BRAND_HELPERPATH "%s/%s/%s/%s%s_librtld_db.so.1"
@ MSG_DB_HELPERNOOPS "rtld_db: helper lib loaded but ops not preset"
@ MSG_DB_HELPERLOADED "rtld_db: helper library loaded for brand \"%s\""
@ MSG_DB_HELPERLOADFAILED "rtld_db: couldn't load brand helper library %s"
diff --git a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
index 410b819b30..b3de685b81 100644
--- a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
+++ b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
#include <stdlib.h>
#include <stdio.h>
#include <proc_service.h>
@@ -38,6 +42,14 @@
#include <sys/param.h>
/*
+ * We want to include zone.h to pull in the prototype for zone_get_nroot(),
+ * but we need to avoid pulling in <sys/stream.h>, which has a definition
+ * of M_DATA that conflicts with the ELF-related definition in machdep_*.h.
+ */
+#define _SYS_STREAM_H
+#include <zone.h>
+
+/*
* 64-bit builds are going to compile this module twice, the
* second time with _ELF64 defined. These defines should make
* all the necessary adjustments to the code.
@@ -283,7 +295,9 @@ _rd_reset32(struct rd_agent *rap)
* If we are debugging a branded executable, load the appropriate
* helper library, and call its initialization routine. Being unable
* to load the helper library is not a critical error. (Hopefully
- * we'll still be able to access some objects in the target.)
+ * we'll still be able to access some objects in the target.) Note
+ * that we pull in the native root here to allow for helper libraries
+ * to be properly found from within the branded zone.
*/
ps_pbrandname = (ps_pbrandname_fp_t)dlsym(RTLD_PROBE, "ps_pbrandname");
while ((ps_pbrandname != NULL) &&
@@ -294,17 +308,23 @@ _rd_reset32(struct rd_agent *rap)
isa = MSG_ORIG(MSG_DB_64BIT_PREFIX);
#endif /* _LP64 */
- if (rtld_db_helper_path[0] != '\0')
+ if (rtld_db_helper_path[0] != '\0') {
(void) snprintf(brandlib, MAXPATHLEN,
MSG_ORIG(MSG_DB_BRAND_HELPERPATH_PREFIX),
rtld_db_helper_path,
MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa,
brandname);
- else
+ } else {
+ const char *nroot = zone_get_nroot();
+
+ if (nroot == NULL)
+ nroot = "";
+
(void) snprintf(brandlib, MAXPATHLEN,
- MSG_ORIG(MSG_DB_BRAND_HELPERPATH),
+ MSG_ORIG(MSG_DB_BRAND_HELPERPATH), nroot,
MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa,
brandname);
+ }
rap->rd_helper.rh_dlhandle = dlopen(brandlib,
RTLD_LAZY | RTLD_LOCAL);
diff --git a/usr/src/cmd/sgs/rtld/common/_rtld.h b/usr/src/cmd/sgs/rtld/common/_rtld.h
index 3cbb58fe25..83b7071471 100644
--- a/usr/src/cmd/sgs/rtld/common/_rtld.h
+++ b/usr/src/cmd/sgs/rtld/common/_rtld.h
@@ -26,7 +26,7 @@
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef __RTLD_H
#define __RTLD_H
@@ -589,6 +589,8 @@ extern const char *rpl_ldflags; /* replaceable LD_FLAGS string */
extern const char *rpl_libpath; /* replaceable LD_LIBRARY string */
extern Alist *rpl_libdirs; /* and its associated Pdesc list */
extern const char *rpl_preload; /* replaceable LD_PRELOAD string */
+extern const char *rpl_ldtoxic; /* replaceable LD_TOXIC_PATH string */
+extern Alist *rpl_toxdirs; /* and associated Pdesc list */
extern const char *prm_audit; /* permanent LD_AUDIT string */
extern const char *prm_debug; /* permanent LD_DEBUG string */
@@ -764,7 +766,6 @@ extern int remove_hdl(Grp_hdl *, Rt_map *, int *);
extern void remove_lmc(Lm_list *, Rt_map *, Aliste, const char *);
extern void remove_lml(Lm_list *);
extern void remove_so(Lm_list *, Rt_map *, Rt_map *);
-extern int rt_cond_wait(Rt_cond *, Rt_lock *);
extern int rt_critical(void);
extern int rt_bind_guard(int);
extern int rt_bind_clear(int);
diff --git a/usr/src/cmd/sgs/rtld/common/analyze.c b/usr/src/cmd/sgs/rtld/common/analyze.c
index e14c121f07..df05f21924 100644
--- a/usr/src/cmd/sgs/rtld/common/analyze.c
+++ b/usr/src/cmd/sgs/rtld/common/analyze.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -834,6 +835,44 @@ is_so_loaded(Lm_list *lml, const char *name, int *in_nfavl)
}
/*
+ * Walk the toxic path list and determine if the object in question has violated
+ * the toxic path. When evaluating the toxic path we need to ensure that we
+ * match any path that's a subdirectory of a listed entry. In other words if
+ * /foo/bar is toxic, something in /foo/bar/baz/ is no good. However, we need to
+ * ensure that we don't mark /foo/barbaz/ as bad.
+ */
+static int
+is_load_toxic(Lm_list *lml, Rt_map *nlmp)
+{
+ const char *fpath = PATHNAME(nlmp);
+ size_t flen = strlen(fpath);
+ Pdesc *pdp;
+ Aliste idx;
+
+ for (ALIST_TRAVERSE(rpl_toxdirs, idx, pdp)) {
+ if (pdp->pd_plen == 0)
+ continue;
+
+ if (strncmp(pdp->pd_pname, fpath, pdp->pd_plen) == 0) {
+ if (pdp->pd_pname[pdp->pd_plen-1] != '/') {
+ /*
+ * Path didn't end in a /, make sure
+ * we're at a directory boundary
+ * nonetheless.
+ */
+ if (flen > pdp->pd_plen &&
+ fpath[pdp->pd_plen] == '/')
+ return (1);
+ continue;
+ }
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+/*
* Tracing is enabled by the LD_TRACE_LOADED_OPTIONS environment variable which
* is normally set from ldd(1). For each link map we load, print the load name
* and the full pathname of the associated object.
@@ -2169,6 +2208,17 @@ load_finish(Lm_list *lml, const char *name, Rt_map *clmp, int nmode,
uint_t rdflags;
/*
+ * If this dependency is associated with a toxic path, then we must
+ * honor the user's request to die.
+ */
+ if (is_load_toxic(lml, nlmp) != 0) {
+ eprintf(lml, ERR_FATAL, MSG_INTL(MSG_TOXIC_FILE),
+ PATHNAME(nlmp));
+ rtldexit(lml, 1);
+
+ }
+
+ /*
* If this dependency is associated with a required version ensure that
* the version is present in the loaded file.
*/
diff --git a/usr/src/cmd/sgs/rtld/common/globals.c b/usr/src/cmd/sgs/rtld/common/globals.c
index 5f48fafc5f..2e98768551 100644
--- a/usr/src/cmd/sgs/rtld/common/globals.c
+++ b/usr/src/cmd/sgs/rtld/common/globals.c
@@ -24,6 +24,7 @@
* All Rights Reserved
*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -132,6 +133,8 @@ const char *rpl_ldflags = NULL; /* replaceable LD_FLAGS string */
const char *rpl_libpath = NULL; /* replaceable LD_LIBRARY_PATH string */
Alist *rpl_libdirs = NULL; /* and associated Pdesc list */
const char *rpl_preload = NULL; /* replaceable LD_PRELOAD string */
+const char *rpl_ldtoxic = NULL; /* replaceable LD_TOXIC string */
+Alist *rpl_toxdirs = NULL; /* and associated Pdesc list */
const char *prm_audit = NULL; /* permanent LD_AUDIT string */
const char *prm_debug = NULL; /* permanent LD_DEBUG string */
diff --git a/usr/src/cmd/sgs/rtld/common/rtld.msg b/usr/src/cmd/sgs/rtld/common/rtld.msg
index 97e2841b8f..9309d0c62e 100644
--- a/usr/src/cmd/sgs/rtld/common/rtld.msg
+++ b/usr/src/cmd/sgs/rtld/common/rtld.msg
@@ -21,6 +21,7 @@
#
# Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
@ _START_
@@ -99,9 +100,14 @@
@ MSG_SYS_MPROT "%s: mprotect failed: %s"
@ MSG_SYS_MMAPANON "mmap anon failed: %s"
+# Secure path failures
+
@ MSG_SEC_OPEN "%s: open failed: No such file in secure directories"
@ MSG_SEC_ILLEGAL "%s: open failed: illegal insecure pathname"
+# Toxic failures
+
+@ MSG_TOXIC_FILE "%s: dependency marked as toxic"
# Configuration failures
@@ -395,6 +401,7 @@
@ MSG_LD_PROFILE_OUTPUT "PROFILE_OUTPUT"
@ MSG_LD_SFCAP "SFCAP"
@ MSG_LD_SIGNAL "SIGNAL"
+@ MSG_LD_TOXICPATH "TOXIC_PATH"
@ MSG_LD_TRACE_OBJS "TRACE_LOADED_OBJECTS"
@ MSG_LD_TRACE_OBJS_E "TRACE_LOADED_OBJECTS_E"
@ MSG_LD_TRACE_OBJS_A "TRACE_LOADED_OBJECTS_A"
diff --git a/usr/src/cmd/sgs/rtld/common/setup.c b/usr/src/cmd/sgs/rtld/common/setup.c
index 862bb7d61f..98e3ba5d33 100644
--- a/usr/src/cmd/sgs/rtld/common/setup.c
+++ b/usr/src/cmd/sgs/rtld/common/setup.c
@@ -28,7 +28,7 @@
* All Rights Reserved
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -819,6 +819,14 @@ setup(char **envp, auxv_t *auxv, Word _flags, char *_platform, int _syspagsz,
return (0);
}
+ /*
+ * Initialize our toxic paths
+ */
+ if (rpl_ldtoxic != NULL) {
+ (void) expand_paths(mlmp, rpl_ldtoxic, &rpl_toxdirs,
+ AL_CNT_SEARCH, 0, PD_TKN_CAP);
+ }
+
#if defined(_ELF64)
/*
* If this is a 64-bit process, determine whether this process has
diff --git a/usr/src/cmd/sgs/rtld/common/util.c b/usr/src/cmd/sgs/rtld/common/util.c
index 5f9979ddf0..bd80f05a37 100644
--- a/usr/src/cmd/sgs/rtld/common/util.c
+++ b/usr/src/cmd/sgs/rtld/common/util.c
@@ -24,6 +24,7 @@
* All Rights Reserved
*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -627,16 +628,22 @@ is_dep_init(Rt_map *dlmp, Rt_map *clmp)
if ((dlmp == clmp) || (rtld_flags & RT_FL_INITFIRST))
return;
+ rt_mutex_lock(&dlmp->rt_lock);
+ while (dlmp->rt_init_thread != rt_thr_self() && (FLAGS(dlmp) &
+ (FLG_RT_RELOCED | FLG_RT_INITCALL | FLG_RT_INITDONE)) ==
+ (FLG_RT_RELOCED | FLG_RT_INITCALL)) {
+ leave(LIST(dlmp), 0);
+ (void) _lwp_cond_wait(&dlmp->rt_cv, (mutex_t *)&dlmp->rt_lock);
+ rt_mutex_unlock(&dlmp->rt_lock);
+ (void) enter(0);
+ rt_mutex_lock(&dlmp->rt_lock);
+ }
+ rt_mutex_unlock(&dlmp->rt_lock);
+
if ((FLAGS(dlmp) & (FLG_RT_RELOCED | FLG_RT_INITDONE)) ==
(FLG_RT_RELOCED | FLG_RT_INITDONE))
return;
- if ((FLAGS(dlmp) & (FLG_RT_RELOCED | FLG_RT_INITCALL)) ==
- (FLG_RT_RELOCED | FLG_RT_INITCALL)) {
- DBG_CALL(Dbg_util_no_init(dlmp));
- return;
- }
-
if ((tobj = calloc(2, sizeof (Rt_map *))) != NULL) {
tobj[0] = dlmp;
call_init(tobj, DBG_INIT_DYN);
@@ -717,6 +724,7 @@ call_init(Rt_map **tobj, int flag)
continue;
FLAGS(lmp) |= FLG_RT_INITCALL;
+ lmp->rt_init_thread = rt_thr_self();
/*
* Establish an initfirst state if necessary - no other inits
@@ -752,7 +760,11 @@ call_init(Rt_map **tobj, int flag)
* signifies that a .fini must be called should it exist.
* Clear the sort field for use in later .fini processing.
*/
+ rt_mutex_lock(&lmp->rt_lock);
FLAGS(lmp) |= FLG_RT_INITDONE;
+ lmp->rt_init_thread = (thread_t)0;
+ _lwp_cond_broadcast(&lmp->rt_cv);
+ rt_mutex_unlock(&lmp->rt_lock);
SORTVAL(lmp) = -1;
/*
@@ -1417,6 +1429,7 @@ static u_longlong_t cmdisa = 0; /* command line (-e) ISA */
#define ENV_FLG_CAP_FILES 0x0080000000000ULL
#define ENV_FLG_DEFERRED 0x0100000000000ULL
#define ENV_FLG_NOENVIRON 0x0200000000000ULL
+#define ENV_FLG_TOXICPATH 0x0400000000000ULL
#define SEL_REPLACE 0x0001
#define SEL_PERMANT 0x0002
@@ -1590,8 +1603,7 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
if ((len == MSG_LD_FLAGS_SIZE) && (strncmp(s1,
MSG_ORIG(MSG_LD_FLAGS), MSG_LD_FLAGS_SIZE) == 0)) {
select |= SEL_ACT_SPEC_1;
- str = (select & SEL_REPLACE) ? &rpl_ldflags :
- &prm_ldflags;
+ str = &rpl_ldflags;
variable = ENV_FLG_FLAGS;
}
}
@@ -1802,6 +1814,8 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
* In case an auditor is called, which in turn might exec(2) a
* subprocess, this variable is disabled, so that any subprocess
* escapes ldd(1) processing.
+ *
+ * Also, look for LD_TOXIC_PATH
*/
else if (*s1 == 'T') {
if (((len == MSG_LD_TRACE_OBJS_SIZE) &&
@@ -1839,7 +1853,13 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
select |= SEL_ACT_LML;
val = LML_FLG_TRC_SEARCH;
variable = ENV_FLG_TRACE_PTHS;
+ } else if ((len == MSG_LD_TOXICPATH_SIZE) && (strncmp(s1,
+ MSG_ORIG(MSG_LD_TOXICPATH), MSG_LD_TOXICPATH_SIZE) == 0)) {
+ select |= SEL_ACT_SPEC_1;
+ str = &rpl_ldtoxic;
+ variable = ENV_FLG_TOXICPATH;
}
+
}
/*
* LD_UNREF and LD_UNUSED (internal, used by ldd(1)).
@@ -1963,7 +1983,8 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
*lmtflags &= ~val;
} else if (select & SEL_ACT_SPEC_1) {
/*
- * variable is either ENV_FLG_FLAGS or ENV_FLG_LIBPATH
+ * variable is either ENV_FLG_FLAGS, ENV_FLG_LIBPATH, or
+ * ENV_FLG_TOXICPATH
*/
if (env_flags & ENV_TYP_NULL)
*str = NULL;
diff --git a/usr/src/cmd/sgs/yacc/Makefile.targ b/usr/src/cmd/sgs/yacc/Makefile.targ
index 87f8c5221e..e97d36e4ff 100644
--- a/usr/src/cmd/sgs/yacc/Makefile.targ
+++ b/usr/src/cmd/sgs/yacc/Makefile.targ
@@ -100,4 +100,4 @@ $(LINTPOUT): $(SRCS)
$(LINTLIB): $(LINTSRCS)
- $(LINT.c) -o $(LIBNAME) $(LINTSRCS)
+ $(LINT.c) -o $(LIBNAME) $(LINTSRCS) > $(LINTOUT) 2>&1
diff --git a/usr/src/cmd/sgs/yacc/common/dextern.h b/usr/src/cmd/sgs/yacc/common/dextern.h
index e90aa60468..54b441cac4 100644
--- a/usr/src/cmd/sgs/yacc/common/dextern.h
+++ b/usr/src/cmd/sgs/yacc/common/dextern.h
@@ -26,11 +26,13 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _DEXTERN_H
#define _DEXTERN_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <inttypes.h>
#include <ctype.h>
@@ -42,6 +44,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <wctype.h>
+#include <limits.h>
#ifdef __cplusplus
extern "C" {
@@ -301,6 +304,12 @@ extern int wscmp(const wchar_t *, const wchar_t *);
extern char *parser;
+#ifndef PBUFSIZE
+#define PBUFSIZE PATH_MAX
+#endif
+
+extern char pbuf[PBUFSIZE];
+
/* default settings for a number of macros */
/* name of yacc tempfiles */
@@ -324,7 +333,11 @@ extern char *parser;
#endif
#ifndef PARSER
-#define PARSER "/usr/share/lib/ccs/yaccpar"
+#define PARSER "/share/lib/ccs/yaccpar"
+#endif
+
+#ifndef PARSERPREFIX
+#define PARSERPREFIX "/usr"
#endif
/*
diff --git a/usr/src/cmd/sgs/yacc/common/y1.c b/usr/src/cmd/sgs/yacc/common/y1.c
index 845f82d367..0e67d9047b 100644
--- a/usr/src/cmd/sgs/yacc/common/y1.c
+++ b/usr/src/cmd/sgs/yacc/common/y1.c
@@ -26,7 +26,9 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include "dextern.h"
#include <sys/param.h>
@@ -34,6 +36,7 @@
#include <unistd.h>
#include <locale.h>
#include <stdarg.h> /* For error() */
+#include <libgen.h>
static void mktbls(void);
static void others(void);
@@ -236,6 +239,25 @@ mktbls()
lsetsize = INIT_LSIZE + 1;
}
+static int
+yacc_assemble_path(char *buf, size_t size, const char *file, int type)
+{
+ int ret;
+ char origin[PATH_MAX];
+
+ if (type != 0) {
+ ret = readlink("/proc/self/path/a.out", origin, PATH_MAX - 1);
+ if (ret < 0)
+ error(gettext(
+ "yacc: failed to read origin from /proc\n"));
+ origin[ret] = '\0';
+ return (snprintf(buf, size, "%s/../%s", dirname(origin),
+ file));
+ }
+
+ return (snprintf(buf, size, "%s/%s", PARSERPREFIX, file));
+}
+
/* put out other arrays, copy the parsers */
static void
others()
@@ -244,7 +266,17 @@ others()
int c, i, j;
int tmpline;
- finput = fopen(parser, "r");
+ if (parser == NULL) {
+ parser = pbuf;
+ (void) yacc_assemble_path(pbuf, PBUFSIZE, PARSER, 1);
+ finput = fopen(parser, "r");
+ if (finput == NULL) {
+ (void) yacc_assemble_path(pbuf, PBUFSIZE, PARSER, 0);
+ finput = fopen(parser, "r");
+ }
+ } else {
+ finput = fopen(parser, "r");
+ }
if (finput == NULL)
/*
* TRANSLATION_NOTE -- This is a message from yacc.
diff --git a/usr/src/cmd/sgs/yacc/common/y2.c b/usr/src/cmd/sgs/yacc/common/y2.c
index 3599d40904..ca8dcc61f5 100644
--- a/usr/src/cmd/sgs/yacc/common/y2.c
+++ b/usr/src/cmd/sgs/yacc/common/y2.c
@@ -26,7 +26,9 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include "dextern.h"
#include "sgs.h"
@@ -60,7 +62,8 @@ char *infile; /* input file name */
static int numbval; /* value of an input number */
static int toksize = NAMESIZE;
static wchar_t *tokname; /* input token name */
-char *parser = PARSER; /* location of common parser */
+char *parser = NULL; /* location of common parser */
+char pbuf[PBUFSIZE];
static void finact(void);
static wchar_t *cstash(wchar_t *);
diff --git a/usr/src/cmd/ssh/Makefile b/usr/src/cmd/ssh/Makefile
index a36a9fb762..62f30b3f36 100644
--- a/usr/src/cmd/ssh/Makefile
+++ b/usr/src/cmd/ssh/Makefile
@@ -77,7 +77,7 @@ check: $(CHECKHDRS)
_msg:
$(RM) $(POFILE)
$(TOUCH) $(POFILE)
- $(MAKE) $(POFILE) XGETTEXT=/usr/bin/gxgettext
+ $(MAKE) $(POFILE) XGETTEXT=$(GNUXGETTEXT)
$(CP) $(POFILE) $(MSGFILE)
$(CP) $(MSGFILE) $(MSGDOMAIN)
diff --git a/usr/src/cmd/ssh/include/key.h b/usr/src/cmd/ssh/include/key.h
index 862b2d81d4..ec4993a9c1 100644
--- a/usr/src/cmd/ssh/include/key.h
+++ b/usr/src/cmd/ssh/include/key.h
@@ -39,11 +39,17 @@ extern "C" {
typedef struct Key Key;
enum types {
- KEY_RSA1,
- KEY_RSA,
- KEY_DSA,
+ KEY_RSA1,
+ KEY_RSA,
+ KEY_DSA,
+ KEY_ECDSA,
+ KEY_RSA_CERT,
+ KEY_DSA_CERT,
+ KEY_ECDSA_CERT,
+ KEY_RSA_CERT_V00,
+ KEY_DSA_CERT_V00,
KEY_NULL,
- KEY_UNSPEC
+ KEY_UNSPEC
};
enum fp_type {
SSH_FP_SHA1,
@@ -87,6 +93,7 @@ int key_names_valid2(const char *);
int key_sign(Key *, u_char **, u_int *, u_char *, u_int);
int key_verify(Key *, u_char *, u_int, u_char *, u_int);
+int key_type_plain(int type);
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/ssh/include/servconf.h b/usr/src/cmd/ssh/include/servconf.h
index a66c6415cb..9a32544c2a 100644
--- a/usr/src/cmd/ssh/include/servconf.h
+++ b/usr/src/cmd/ssh/include/servconf.h
@@ -167,6 +167,7 @@ typedef struct {
char *pre_userauth_hook;
char *pam_service_prefix;
char *pam_service_name;
+ char *pubkey_plugin;
} ServerOptions;
diff --git a/usr/src/cmd/ssh/libssh/common/canohost.c b/usr/src/cmd/ssh/libssh/common/canohost.c
index 2d427b9e8d..87aab396cf 100644
--- a/usr/src/cmd/ssh/libssh/common/canohost.c
+++ b/usr/src/cmd/ssh/libssh/common/canohost.c
@@ -73,9 +73,6 @@ get_remote_hostname(int socket, int verify_reverse_mapping)
if (getnameinfo((struct sockaddr *)&from, fromlen, name, sizeof(name),
NULL, 0, NI_NAMEREQD) != 0) {
/* Host name not found. Use ip address. */
-#if 0
- log("Could not reverse map address %.100s.", ntop);
-#endif
return xstrdup(ntop);
}
@@ -206,36 +203,6 @@ get_socket_address(int socket, int remote, int flags)
return (xstrdup(result));
}
-#if 0
-static char *
-get_socket_address(int socket, int remote, int flags)
-{
- struct sockaddr_storage addr;
- socklen_t addrlen;
- char ntop[NI_MAXHOST];
-
- /* Get IP address of client. */
- addrlen = sizeof(addr);
- memset(&addr, 0, sizeof(addr));
-
- if (remote) {
- if (getpeername(socket, (struct sockaddr *)&addr, &addrlen)
- < 0)
- return NULL;
- } else {
- if (getsockname(socket, (struct sockaddr *)&addr, &addrlen)
- < 0)
- return NULL;
- }
- /* Get the address in ascii. */
- if (getnameinfo((struct sockaddr *)&addr, addrlen, ntop, sizeof(ntop),
- NULL, 0, flags) != 0) {
- error("get_socket_ipaddr: getnameinfo %d failed", flags);
- return NULL;
- }
- return xstrdup(ntop);
-}
-#endif
char *
get_peer_ipaddr(int socket)
@@ -388,4 +355,4 @@ inet_ntop_native(int af, const void *src, char *dst, size_t size)
}
return (result);
-}
+}
diff --git a/usr/src/cmd/ssh/libssh/common/key.c b/usr/src/cmd/ssh/libssh/common/key.c
index f648d3b640..8ee2583d93 100644
--- a/usr/src/cmd/ssh/libssh/common/key.c
+++ b/usr/src/cmd/ssh/libssh/common/key.c
@@ -874,3 +874,20 @@ key_demote(Key *k)
return (pk);
}
+
+int
+key_type_plain(int type)
+{
+ switch (type) {
+ case KEY_RSA_CERT_V00:
+ case KEY_RSA_CERT:
+ return KEY_RSA;
+ case KEY_DSA_CERT_V00:
+ case KEY_DSA_CERT:
+ return KEY_DSA;
+ case KEY_ECDSA_CERT:
+ return KEY_ECDSA;
+ default:
+ return type;
+ }
+}
diff --git a/usr/src/cmd/ssh/sftp-server/Makefile b/usr/src/cmd/ssh/sftp-server/Makefile
index c2bdf26c1e..f49dde54ee 100644
--- a/usr/src/cmd/ssh/sftp-server/Makefile
+++ b/usr/src/cmd/ssh/sftp-server/Makefile
@@ -31,7 +31,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lsunw_crypto
POFILE_DIR = ..
diff --git a/usr/src/cmd/ssh/sftp/Makefile b/usr/src/cmd/ssh/sftp/Makefile
index 8bd8bb4ac3..f3c0ea7a75 100644
--- a/usr/src/cmd/ssh/sftp/Makefile
+++ b/usr/src/cmd/ssh/sftp/Makefile
@@ -35,7 +35,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto -ltecla
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lsunw_crypto -ltecla
POFILE_DIR = ..
diff --git a/usr/src/cmd/ssh/ssh-add/Makefile b/usr/src/cmd/ssh/ssh-add/Makefile
index 1fb132f741..3235839e06 100644
--- a/usr/src/cmd/ssh/ssh-add/Makefile
+++ b/usr/src/cmd/ssh/ssh-add/Makefile
@@ -32,7 +32,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lsunw_crypto
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/ssh-agent/Makefile b/usr/src/cmd/ssh/ssh-agent/Makefile
index 3d4a366c17..ab2e1eb49d 100644
--- a/usr/src/cmd/ssh/ssh-agent/Makefile
+++ b/usr/src/cmd/ssh/ssh-agent/Makefile
@@ -32,7 +32,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lsunw_crypto
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/ssh-keygen/Makefile b/usr/src/cmd/ssh/ssh-keygen/Makefile
index f92c437045..0c90716768 100644
--- a/usr/src/cmd/ssh/ssh-keygen/Makefile
+++ b/usr/src/cmd/ssh/ssh-keygen/Makefile
@@ -32,7 +32,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lcrypto -lsocket
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsunw_crypto -lsocket
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c b/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c
index ebad79b0f8..c79e76ae36 100644
--- a/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c
+++ b/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c
@@ -11,7 +11,7 @@
* called by a name other than "ssh" or "Secure Shell".
*/
-/* $OpenBSD: ssh-keygen.c,v 1.160 2007/01/21 01:41:54 stevesk Exp $ */
+/* $OpenBSD: ssh-keygen.c,v 1.205 2011/01/11 06:13:10 djm Exp $ */
#include "includes.h"
#include <openssl/evp.h>
@@ -76,9 +76,14 @@ char *identity_new_passphrase = NULL;
/* This is set to the new comment if given on the command line. */
char *identity_comment = NULL;
-/* Dump public key file in format used by real and the original SSH 2 */
-int convert_to_ssh2 = 0;
-int convert_from_ssh2 = 0;
+/* Conversion to/from various formats */
+int convert_to = 0;
+int convert_from = 0;
+enum {
+ FMT_RFC4716,
+ FMT_PKCS8,
+ FMT_PEM
+} convert_format = FMT_RFC4716;
int print_public = 0;
char *key_type_name = NULL;
@@ -154,41 +159,105 @@ load_identity(char *filename)
#define SSH_COM_PRIVATE_KEY_MAGIC 0x3f6ff9eb
static void
-do_convert_to_ssh2(struct passwd *pw)
+do_convert_to_ssh2(struct passwd *pw, Key *k)
{
- Key *k;
u_int len;
u_char *blob;
- struct stat st;
+ char comment[61];
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
+ if (key_to_blob(k, &blob, &len) <= 0) {
+ fprintf(stderr, "key_to_blob failed\n");
exit(1);
}
+ /* Comment + surrounds must fit into 72 chars (RFC 4716 sec 3.3) */
+ snprintf(comment, sizeof(comment),
+ "%u-bit %s, converted by %s@%s from OpenSSH",
+ key_size(k), key_type(k),
+ pw->pw_name, hostname);
+
+ fprintf(stdout, "%s\n", SSH_COM_PUBLIC_BEGIN);
+ fprintf(stdout, "Comment: \"%s\"\n", comment);
+ dump_base64(stdout, blob, len);
+ fprintf(stdout, "%s\n", SSH_COM_PUBLIC_END);
+ key_free(k);
+ xfree(blob);
+ exit(0);
+}
+
+static void
+do_convert_to_pkcs8(Key *k)
+{
+ switch (key_type_plain(k->type)) {
+ case KEY_RSA:
+ if (!PEM_write_RSA_PUBKEY(stdout, k->rsa))
+ fatal("PEM_write_RSA_PUBKEY failed");
+ break;
+ case KEY_DSA:
+ if (!PEM_write_DSA_PUBKEY(stdout, k->dsa))
+ fatal("PEM_write_DSA_PUBKEY failed");
+ break;
+ default:
+ fatal("%s: unsupported key type %s", __func__, key_type(k));
+ }
+ exit(0);
+}
+
+static void
+do_convert_to_pem(Key *k)
+{
+ switch (key_type_plain(k->type)) {
+ case KEY_RSA:
+ if (!PEM_write_RSAPublicKey(stdout, k->rsa))
+ fatal("PEM_write_RSAPublicKey failed");
+ break;
+ case KEY_DSA:
+ if (!PEM_write_DSA_PUBKEY(stdout, k->dsa))
+ fatal("PEM_write_DSAPublicKey failed");
+ break;
+ default:
+ fatal("%s: unsupported key type %s", __func__, key_type(k));
+ }
+ exit(0);
+}
+
+static void
+do_convert_to(struct passwd *pw)
+{
+ Key *k;
+ struct stat st;
+
+ if (!have_identity)
+ ask_filename(pw, "Enter file in which the key is");
+ if (stat(identity_file, &st) < 0)
+ fatal("%s: %s: %s", __progname, identity_file, strerror(errno));
if ((k = key_load_public(identity_file, NULL)) == NULL) {
if ((k = load_identity(identity_file)) == NULL) {
- fprintf(stderr, gettext("load failed\n"));
+ fprintf(stderr, "load failed\n");
exit(1);
}
}
- if (key_to_blob(k, &blob, &len) <= 0) {
- fprintf(stderr, gettext("key_to_blob failed\n"));
+ if (k->type == KEY_RSA1) {
+ fprintf(stderr, "version 1 keys are not supported\n");
exit(1);
}
- fprintf(stdout, "%s\n", SSH_COM_PUBLIC_BEGIN);
- fprintf(stdout, gettext(
- "Comment: \"%u-bit %s, converted from OpenSSH by %s@%s\"\n"),
- key_size(k), key_type(k),
- pw->pw_name, hostname);
- dump_base64(stdout, blob, len);
- fprintf(stdout, "%s\n", SSH_COM_PUBLIC_END);
- key_free(k);
- xfree(blob);
+
+ switch (convert_format) {
+ case FMT_RFC4716:
+ do_convert_to_ssh2(pw, k);
+ break;
+ case FMT_PKCS8:
+ do_convert_to_pkcs8(k);
+ break;
+ case FMT_PEM:
+ do_convert_to_pem(k);
+ break;
+ default:
+ fatal("%s: unknown key format %d", __func__, convert_format);
+ }
exit(0);
}
+
static void
buffer_get_bignum_bits(Buffer *b, BIGNUM *value)
{
@@ -327,24 +396,16 @@ get_line(FILE *fp, char *line, size_t len)
}
static void
-do_convert_from_ssh2(struct passwd *pw)
+do_convert_from_ssh2(struct passwd *pw, Key **k, int *private)
{
- Key *k;
int blen;
u_int len;
char line[1024];
u_char blob[8096];
char encoded[8096];
- struct stat st;
- int escaped = 0, private = 0, ok;
+ int escaped = 0;
FILE *fp;
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
fp = fopen(identity_file, "r");
if (fp == NULL) {
perror(identity_file);
@@ -357,7 +418,7 @@ do_convert_from_ssh2(struct passwd *pw)
if (strncmp(line, "----", 4) == 0 ||
strstr(line, ": ") != NULL) {
if (strstr(line, SSH_COM_PRIVATE_BEGIN) != NULL)
- private = 1;
+ *private = 1;
if (strstr(line, " END ") != NULL) {
break;
}
@@ -382,26 +443,117 @@ do_convert_from_ssh2(struct passwd *pw)
fprintf(stderr, gettext("uudecode failed.\n"));
exit(1);
}
- k = private ?
+ *k = *private ?
do_convert_private_ssh2_from_blob(blob, blen) :
key_from_blob(blob, blen);
- if (k == NULL) {
+ if (*k == NULL) {
fprintf(stderr, gettext("decode blob failed.\n"));
exit(1);
}
- ok = private ?
- (k->type == KEY_DSA ?
- PEM_write_DSAPrivateKey(stdout, k->dsa, NULL, NULL, 0, NULL, NULL) :
- PEM_write_RSAPrivateKey(stdout, k->rsa, NULL, NULL, 0, NULL, NULL)) :
- key_write(k, stdout);
+ fclose(fp);
+}
+
+static void
+do_convert_from_pkcs8(Key **k, int *private)
+{
+ EVP_PKEY *pubkey;
+ FILE *fp;
+
+ if ((fp = fopen(identity_file, "r")) == NULL)
+ fatal("%s: %s: %s", __progname, identity_file, strerror(errno));
+ if ((pubkey = PEM_read_PUBKEY(fp, NULL, NULL, NULL)) == NULL) {
+ fatal("%s: %s is not a recognised public key format", __func__,
+ identity_file);
+ }
+ fclose(fp);
+ switch (EVP_PKEY_type(pubkey->type)) {
+ case EVP_PKEY_RSA:
+ *k = key_new(KEY_UNSPEC);
+ (*k)->type = KEY_RSA;
+ (*k)->rsa = EVP_PKEY_get1_RSA(pubkey);
+ break;
+ case EVP_PKEY_DSA:
+ *k = key_new(KEY_UNSPEC);
+ (*k)->type = KEY_DSA;
+ (*k)->dsa = EVP_PKEY_get1_DSA(pubkey);
+ break;
+ default:
+ fatal("%s: unsupported pubkey type %d", __func__,
+ EVP_PKEY_type(pubkey->type));
+ }
+ EVP_PKEY_free(pubkey);
+ return;
+}
+
+static void
+do_convert_from_pem(Key **k, int *private)
+{
+ FILE *fp;
+ RSA *rsa;
+
+ if ((fp = fopen(identity_file, "r")) == NULL)
+ fatal("%s: %s: %s", __progname, identity_file, strerror(errno));
+ if ((rsa = PEM_read_RSAPublicKey(fp, NULL, NULL, NULL)) != NULL) {
+ *k = key_new(KEY_UNSPEC);
+ (*k)->type = KEY_RSA;
+ (*k)->rsa = rsa;
+ fclose(fp);
+ return;
+ }
+ fatal("%s: unrecognised raw private key format", __func__);
+}
+
+static void
+do_convert_from(struct passwd *pw)
+{
+ Key *k = NULL;
+ int private = 0, ok = 0;
+ struct stat st;
+
+ if (!have_identity)
+ ask_filename(pw, "Enter file in which the key is");
+ if (stat(identity_file, &st) < 0)
+ fatal("%s: %s: %s", __progname, identity_file, strerror(errno));
+
+ switch (convert_format) {
+ case FMT_RFC4716:
+ do_convert_from_ssh2(pw, &k, &private);
+ break;
+ case FMT_PKCS8:
+ do_convert_from_pkcs8(&k, &private);
+ break;
+ case FMT_PEM:
+ do_convert_from_pem(&k, &private);
+ break;
+ default:
+ fatal("%s: unknown key format %d", __func__, convert_format);
+ }
+
+ if (!private)
+ ok = key_write(k, stdout);
+ if (ok)
+ fprintf(stdout, "\n");
+ else {
+ switch (k->type) {
+ case KEY_DSA:
+ ok = PEM_write_DSAPrivateKey(stdout, k->dsa, NULL,
+ NULL, 0, NULL, NULL);
+ break;
+ case KEY_RSA:
+ ok = PEM_write_RSAPrivateKey(stdout, k->rsa, NULL,
+ NULL, 0, NULL, NULL);
+ break;
+ default:
+ fatal("%s: unsupported key type %s", __func__,
+ key_type(k));
+ }
+ }
+
if (!ok) {
- fprintf(stderr, gettext("key write failed"));
+ fprintf(stderr, "key write failed\n");
exit(1);
}
key_free(k);
- if (!private)
- fprintf(stdout, "\n");
- fclose(fp);
exit(0);
}
@@ -917,12 +1069,13 @@ usage(void)
" -B Show bubblebabble digest of key file.\n"
" -c Change comment in private and public key files.\n"
" -C comment Provide new comment.\n"
- " -e Convert OpenSSH to IETF SECSH key file.\n"
+ " -e Convert OpenSSH to foreign format key file.\n"
" -f filename Filename of the key file.\n"
" -F hostname Find hostname in known hosts file.\n"
" -H Hash names in known_hosts file.\n"
- " -i Convert IETF SECSH to OpenSSH key file.\n"
+ " -i Convert foreign format to OpenSSH key file.\n"
" -l Show fingerprint of key file.\n"
+ " -m key_fmt Conversion format for -e/-i (PEM|PKCS8|RFC4716).\n"
" -N phrase Provide new passphrase.\n"
" -p Change passphrase of private key file.\n"
" -P phrase Provide old passphrase.\n"
@@ -974,7 +1127,7 @@ main(int argc, char **argv)
exit(1);
}
-#define GETOPT_ARGS "BcdeHilpqxXyb:C:f:F:N:P:R:t:"
+#define GETOPT_ARGS "BcdeHilpqxXyb:C:f:F:m:N:P:R:t:"
while ((opt = getopt(argc, argv, GETOPT_ARGS)) != -1) {
switch (opt) {
@@ -1002,6 +1155,22 @@ main(int argc, char **argv)
case 'B':
print_bubblebabble = 1;
break;
+ case 'm':
+ if (strcasecmp(optarg, "RFC4716") == 0 ||
+ strcasecmp(optarg, "ssh2") == 0) {
+ convert_format = FMT_RFC4716;
+ break;
+ }
+ if (strcasecmp(optarg, "PKCS8") == 0) {
+ convert_format = FMT_PKCS8;
+ break;
+ }
+ if (strcasecmp(optarg, "PEM") == 0) {
+ convert_format = FMT_PEM;
+ break;
+ }
+ fatal("Unsupported conversion format \"%s\"", optarg);
+ /*NOTREACHED*/
case 'p':
change_passphrase = 1;
break;
@@ -1027,12 +1196,12 @@ main(int argc, char **argv)
case 'e':
case 'x':
/* export key */
- convert_to_ssh2 = 1;
+ convert_to = 1;
break;
case 'i':
case 'X':
/* import key */
- convert_from_ssh2 = 1;
+ convert_from = 1;
break;
case 'y':
print_public = 1;
@@ -1064,10 +1233,10 @@ main(int argc, char **argv)
do_change_passphrase(pw);
if (change_comment)
do_change_comment(pw);
- if (convert_to_ssh2)
- do_convert_to_ssh2(pw);
- if (convert_from_ssh2)
- do_convert_from_ssh2(pw);
+ if (convert_to)
+ do_convert_to(pw);
+ if (convert_from)
+ do_convert_from(pw);
if (print_public)
do_print_public(pw);
diff --git a/usr/src/cmd/ssh/ssh-keyscan/Makefile b/usr/src/cmd/ssh/ssh-keyscan/Makefile
index 9e2fd17160..90428880a3 100644
--- a/usr/src/cmd/ssh/ssh-keyscan/Makefile
+++ b/usr/src/cmd/ssh/ssh-keyscan/Makefile
@@ -32,7 +32,7 @@ SRCS = $(OBJS:.o=.c)
include ../../Makefile.cmd
include ../Makefile.ssh-common
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lcrypto
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lsunw_crypto
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/ssh-keysign/Makefile b/usr/src/cmd/ssh/ssh-keysign/Makefile
index e31ae681a1..649935a050 100644
--- a/usr/src/cmd/ssh/ssh-keysign/Makefile
+++ b/usr/src/cmd/ssh/ssh-keysign/Makefile
@@ -36,7 +36,7 @@ include ../Makefile.ssh-common
FILEMODE= 04555
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lcrypto
+LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lsunw_crypto
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/ssh/Makefile b/usr/src/cmd/ssh/ssh/Makefile
index 7bdd4f6be5..2d77334497 100644
--- a/usr/src/cmd/ssh/ssh/Makefile
+++ b/usr/src/cmd/ssh/ssh/Makefile
@@ -40,7 +40,7 @@ include ../Makefile.ssh-common
LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket \
-lnsl \
-lz \
- -lcrypto \
+ -lsunw_crypto \
-lgss
POFILE_DIR= ..
diff --git a/usr/src/cmd/ssh/sshd/Makefile b/usr/src/cmd/ssh/sshd/Makefile
index a52e9b1cc8..4c82633347 100644
--- a/usr/src/cmd/ssh/sshd/Makefile
+++ b/usr/src/cmd/ssh/sshd/Makefile
@@ -75,7 +75,7 @@ LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket \
-lpam \
-lbsm \
-lwrap \
- -lcrypto \
+ -lsunw_crypto \
-lgss \
-lcontract
MAPFILES = $(MAPFILE.INT) $(MAPFILE.NGB)
diff --git a/usr/src/cmd/ssh/sshd/auth2-pubkey.c b/usr/src/cmd/ssh/sshd/auth2-pubkey.c
index 658634c195..c1c5f540e4 100644
--- a/usr/src/cmd/ssh/sshd/auth2-pubkey.c
+++ b/usr/src/cmd/ssh/sshd/auth2-pubkey.c
@@ -26,6 +26,8 @@
* Use is subject to license terms.
*/
+#include <dlfcn.h>
+
#include "includes.h"
RCSID("$OpenBSD: auth2-pubkey.c,v 1.2 2002/05/31 11:35:15 markus Exp $");
@@ -54,6 +56,13 @@ extern ServerOptions options;
extern u_char *session_id2;
extern int session_id2_len;
+/* global plugin function requirements */
+static const char *RSA_SYM_NAME = "sshd_user_rsa_key_allowed";
+static const char *DSA_SYM_NAME = "sshd_user_rsa_key_allowed";
+typedef int (*RSA_SYM)(struct passwd *, RSA *, const char *);
+typedef int (*DSA_SYM)(struct passwd *, DSA *, const char *);
+
+
static void
userauth_pubkey(Authctxt *authctxt)
{
@@ -309,7 +318,98 @@ user_key_allowed2(struct passwd *pw, Key *key, char *file)
return found_key;
}
-/* check whether given key is in .ssh/authorized_keys* */
+/**
+ * Checks whether or not access is allowed based on a plugin specified
+ * in sshd_config (PubKeyPlugin).
+ *
+ * Note that this expects a symbol in the loaded library that takes
+ * the current user (pwd entry), the current RSA key and it's fingerprint.
+ * The symbol is expected to return 1 on success and 0 on failure.
+ *
+ * While we could optimize this code to dlopen once in the process' lifetime,
+ * sshd is already a slow beast, so this is really not a concern.
+ * The overhead is basically a rounding error compared to everything else, and
+ * it keeps this code minimally invasive.
+ */
+static int
+user_key_allowed_from_plugin(struct passwd *pw, Key *key)
+{
+ RSA_SYM rsa_sym = NULL;
+ DSA_SYM dsa_sym = NULL;
+ char *fp = NULL;
+ void *handle = NULL;
+ int success = 0;
+
+ if (options.pubkey_plugin == NULL || pw == NULL || key == NULL ||
+ (key->type != KEY_RSA && key->type != KEY_RSA1 &&
+ key->type != KEY_DSA && key->type != KEY_ECDSA))
+ return success;
+
+ handle = dlopen(options.pubkey_plugin, RTLD_NOW);
+ if ((handle == NULL)) {
+ debug("Unable to open library %s: %s", options.pubkey_plugin,
+ dlerror());
+ goto out;
+ }
+
+ fp = key_fingerprint(key, SSH_FP_MD5, SSH_FP_HEX);
+ if (fp == NULL) {
+ debug("failed to generate fingerprint");
+ goto out;
+ }
+
+ switch (key->type) {
+ case KEY_RSA1:
+ case KEY_RSA:
+ rsa_sym = (RSA_SYM)dlsym(handle, RSA_SYM_NAME);
+ if (rsa_sym == NULL) {
+ debug("Unable to resolve symbol %s: %s", RSA_SYM_NAME,
+ dlerror());
+ goto out;
+ }
+ debug2("Invoking %s from %s", RSA_SYM_NAME,
+ options.pubkey_plugin);
+ success = (*rsa_sym)(pw, key->rsa, fp);
+ break;
+ case KEY_DSA:
+ case KEY_ECDSA:
+ dsa_sym = (DSA_SYM)dlsym(handle, RSA_SYM_NAME);
+ if (dsa_sym == NULL) {
+ debug("Unable to resolve symbol %s: %s", DSA_SYM_NAME,
+ dlerror());
+ goto out;
+ }
+ debug2("Invoking %s from %s", DSA_SYM_NAME,
+ options.pubkey_plugin);
+ success = (*dsa_sym)(pw, key->dsa, fp);
+ break;
+ default:
+ debug2("user_key_plugins only support RSA keys");
+ }
+
+ debug("sshd_plugin returned: %d", success);
+
+out:
+ if (handle != NULL) {
+ dlclose(handle);
+ dsa_sym = NULL;
+ rsa_sym = NULL;
+ handle = NULL;
+ }
+
+ if (success)
+ verbose("Found matching %s key: %s", key_type(key), fp);
+
+ if (fp != NULL) {
+ xfree(fp);
+ fp = NULL;
+ }
+
+ return success;
+}
+
+
+/* check whether given key is in .ssh/authorized_keys or a plugin */
int
user_key_allowed(struct passwd *pw, Key *key)
{
@@ -329,6 +429,13 @@ user_key_allowed(struct passwd *pw, Key *key)
file = authorized_keys_file2(pw);
success = user_key_allowed2(pw, key, file);
xfree(file);
+
+ if (success)
+ return success;
+
+ /* try from a plugin */
+ success = user_key_allowed_from_plugin(pw, key);
+
return success;
}
diff --git a/usr/src/cmd/ssh/sshd/servconf.c b/usr/src/cmd/ssh/sshd/servconf.c
index 516466bbc1..16f1dcecf7 100644
--- a/usr/src/cmd/ssh/sshd/servconf.c
+++ b/usr/src/cmd/ssh/sshd/servconf.c
@@ -155,6 +155,7 @@ initialize_server_options(ServerOptions *options)
options->pre_userauth_hook = NULL;
options->pam_service_name = NULL;
options->pam_service_prefix = NULL;
+ options->pubkey_plugin = NULL;
}
#ifdef HAVE_DEFOPEN
@@ -419,13 +420,14 @@ typedef enum {
sPermitUserEnvironment, sUseLogin, sAllowTcpForwarding, sCompression,
sAllowUsers, sDenyUsers, sAllowGroups, sDenyGroups,
sIgnoreUserKnownHosts, sCiphers, sMacs, sProtocol, sPidFile,
- sGatewayPorts, sPubkeyAuthentication, sXAuthLocation, sSubsystem, sMaxStartups,
+ sGatewayPorts, sPubkeyAuthentication, sXAuthLocation, sSubsystem,
sBanner, sVerifyReverseMapping, sHostbasedAuthentication,
sHostbasedUsesNameFromPacketOnly, sClientAliveInterval,
sClientAliveCountMax, sAuthorizedKeysFile, sAuthorizedKeysFile2,
sMaxAuthTries, sMaxAuthTriesLog, sUsePrivilegeSeparation,
sLookupClientHostnames, sUseOpenSSLEngine, sChrootDirectory,
sPreUserauthHook, sMatch, sPAMServicePrefix, sPAMServiceName,
+ sMaxStartups, sPubKeyPlugin,
sDeprecated
} ServerOpCodes;
@@ -532,6 +534,7 @@ static struct {
{ "match", sMatch, SSHCFG_ALL },
{ "pamserviceprefix", sPAMServicePrefix, SSHCFG_GLOBAL },
{ "pamservicename", sPAMServiceName, SSHCFG_GLOBAL },
+ { "pubkeyplugin", sPubKeyPlugin, SSHCFG_ALL },
{ NULL, sBadOption, 0 }
};
@@ -1356,6 +1359,10 @@ parse_flag:
options->pam_service_name = xstrdup(arg);
break;
+ case sPubKeyPlugin:
+ charptr = &options->pubkey_plugin;
+ goto parse_filename;
+
default:
fatal("%s line %d: Missing handler for opcode %s (%d)",
filename, linenum, arg, opcode);
diff --git a/usr/src/cmd/ssh/sshd/sshlogin.c b/usr/src/cmd/ssh/sshd/sshlogin.c
index c21877355c..c2bd3bacb7 100644
--- a/usr/src/cmd/ssh/sshd/sshlogin.c
+++ b/usr/src/cmd/ssh/sshd/sshlogin.c
@@ -101,8 +101,7 @@ record_login(pid_t pid, const char *ttyname, const char *progname,
fatal_cleanup();
}
}
- remote_name_or_ip = get_remote_name_or_ip(utmp_len,
- options.verify_reverse_mapping);
+ remote_name_or_ip = get_remote_ipaddr();
initialized = 1;
}
diff --git a/usr/src/cmd/stat/Makefile b/usr/src/cmd/stat/Makefile
index 34149b2b37..faffc6a437 100644
--- a/usr/src/cmd/stat/Makefile
+++ b/usr/src/cmd/stat/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2011, 2012, Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# cmd/stat/Makefile
@@ -27,7 +27,14 @@
include ../Makefile.cmd
-SUBDIRS= arcstat iostat mpstat vmstat fsstat kstat
+SUBDIRS= arcstat \
+ fsstat \
+ iostat \
+ kstat \
+ mpstat \
+ vfsstat \
+ vmstat \
+ ziostat
all := TARGET = all
install := TARGET = install
diff --git a/usr/src/cmd/stat/arcstat/Makefile b/usr/src/cmd/stat/arcstat/Makefile
index 6ae60a8d3d..a98e2fee7e 100644
--- a/usr/src/cmd/stat/arcstat/Makefile
+++ b/usr/src/cmd/stat/arcstat/Makefile
@@ -11,6 +11,7 @@
#
# Copyright 2014 Adam Stevko. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
include $(SRC)/cmd/Makefile.cmd
diff --git a/usr/src/cmd/stat/arcstat/arcstat.pl b/usr/src/cmd/stat/arcstat/arcstat.pl
index 58491140c4..58491140c4 100755..100644
--- a/usr/src/cmd/stat/arcstat/arcstat.pl
+++ b/usr/src/cmd/stat/arcstat/arcstat.pl
diff --git a/usr/src/cmd/stat/vfsstat/Makefile b/usr/src/cmd/stat/vfsstat/Makefile
new file mode 100644
index 0000000000..04b5085243
--- /dev/null
+++ b/usr/src/cmd/stat/vfsstat/Makefile
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+
+PROG= vfsstat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/stat/vfsstat/vfsstat.pl b/usr/src/cmd/stat/vfsstat/vfsstat.pl
new file mode 100644
index 0000000000..a3780b8e63
--- /dev/null
+++ b/usr/src/cmd/stat/vfsstat/vfsstat.pl
@@ -0,0 +1,227 @@
+#!/usr/perl5/bin/perl -w
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011 Joyent, Inc.
+#
+# vfsstat - report VFS statistics per zone
+#
+# USAGE: vfsstat [-hIMrzZ] [interval [count]]
+# -h # help
+# -I # print results per interval (where applicable)
+# -M # print results in MB/s
+# -r # print data in comma-separated format
+# -z # hide zones with no VFS activity
+# -Z # print data for all zones
+#
+# eg, vfsstat # print summary since zone boot
+# vfsstat 1 # print continually every 1 second
+# vfsstat 1 5 # print 5 times, every 1 second
+#
+# NOTES:
+#
+# - The calculations and output fields emulate those from iostat(1M) as closely
+# as possible. When only one zone is actively performing disk I/O, the
+# results from iostat(1M) in the global zone and vfsstat in the local zone
+# should be almost identical. Note that many VFS read operations are handled
+# by the ARC, so vfsstat and iostat(1M) will be similar only when most
+# requests are missing in the ARC.
+#
+# - As with iostat(1M), a result of 100% for VFS read and write utilization does
+# not mean that the syscall layer is fully saturated. Instead, that
+# measurement just shows that at least one operation was pending over the last
+# quanta of time examined. Since the VFS layer can process more than one
+# operation concurrently, this measurement will frequently be 100% but the VFS
+# layer can still accept additional requests.
+#
+# - This script is based on Brendan Gregg's K9Toolkit examples:
+#
+# http://www.brendangregg.com/k9toolkit.html
+#
+
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+# Process command line args
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hIMrzZ') or usage();
+usage() if defined $main::opt_h;
+$main::opt_h = 0;
+
+my $USE_MB = defined $main::opt_M ? $main::opt_M : 0;
+my $USE_INTERVAL = defined $main::opt_I ? $main::opt_I : 0;
+my $USE_COMMA = defined $main::opt_r ? $main::opt_r : 0;
+my $HIDE_ZEROES = defined $main::opt_z ? $main::opt_z : 0;
+my $ALL_ZONES = defined $main::opt_Z ? $main::opt_Z : 0;
+
+my ($interval, $count);
+if ( defined($ARGV[0]) ) {
+ $interval = $ARGV[0];
+ $count = defined ($ARGV[1]) ? $ARGV[1] : 2**32;
+ usage() if ($interval == 0);
+} else {
+ $interval = 1;
+ $count = 1;
+}
+
+my $HEADER_FMT = $USE_COMMA ?
+ "r/%s,w/%s,%sr/%s,%sw/%s,ractv,wactv,read_t,writ_t,%%r,%%w," .
+ "d/%s,del_t,zone\n" :
+ " r/%s w/%s %sr/%s %sw/%s ractv wactv read_t writ_t " .
+ "%%r %%w d/%s del_t zone\n";
+my $DATA_FMT = $USE_COMMA ?
+ "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%d,%d,%.1f,%.1f,%s,%d\n" :
+ "%5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %6.1f %6.1f %3d %3d " .
+ "%5.1f %6.1f %s (%d)\n";
+
+my $BYTES_PREFIX = $USE_MB ? "M" : "k";
+my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024;
+my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s";
+my $NANOSEC = 1000000000;
+
+my @fields = ( 'reads', 'writes', 'nread', 'nwritten', 'rtime', 'wtime',
+ 'rlentime', 'wlentime', 'delay_cnt', 'delay_time', 'snaptime' );
+
+chomp(my $curzone = (`/sbin/zonename`));
+
+my %old = ();
+my $rows_printed = 0;
+
+for (my $ii = 0; $ii < $count; $ii++) {
+ # Read list of visible zones and their zone IDs
+ my @zones = ();
+ my %zoneids = ();
+ my $zoneadm = `zoneadm list -p | cut -d: -f1,2`;
+ @lines = split(/\n/, $zoneadm);
+ foreach $line (@lines) {
+ @tok = split(/:/, $line);
+ $zoneids->{$tok[1]} = $tok[0];
+ push(@zones, $tok[1]);
+ }
+
+ $Kstat->update();
+
+ # Print the column header every 20 rows
+ if ($rows_printed == 0 || $ALL_ZONES) {
+ printf($HEADER_FMT, $INTERVAL_SUFFIX, $INTERVAL_SUFFIX,
+ $BYTES_PREFIX, $INTERVAL_SUFFIX, $BYTES_PREFIX,
+ $INTERVAL_SUFFIX, $INTERVAL_SUFFIX);
+ }
+
+ $rows_printed = $rows_printed >= 20 ? 0 : $rows_printed + 1;
+
+ foreach $zone (@zones) {
+ if ((!$ALL_ZONES) && ($zone ne $curzone)) {
+ next;
+ }
+
+ if (! defined $old->{$zone}) {
+ $old->{$zone} = ();
+ foreach $field (@fields) { $old->{$zone}->{$field} = 0; }
+ }
+
+ #
+ # Kstats have a 30-character limit (KSTAT_STRLEN) on their
+ # names, so if the zone name exceeds that limit, use the first
+ # 30 characters.
+ #
+ my $trimmed_zone = substr($zone, 0, 30);
+ my $zoneid = $zoneids->{$zone};
+
+ print_stats($zone, $zoneid,
+ $Kstat->{'zone_vfs'}{$zoneid}{$trimmed_zone}, $old->{$zone});
+ }
+
+ sleep ($interval);
+}
+
+exit(0);
+
+sub print_stats {
+ my $zone = $_[0];
+ my $zoneid = $_[1];
+ my $data = $_[2];
+ my $old = $_[3];
+
+ my $etime = $data->{'snaptime'} -
+ ($old->{'snaptime'} > 0 ? $old->{'snaptime'} : $data->{'crtime'});
+
+ # Calculate basic statistics
+ my $rate_divisor = $USE_INTERVAL ? 1 : $etime;
+ my $reads = ($data->{'reads'} - $old->{'reads'}) / $rate_divisor;
+ my $writes = ($data->{'writes'} - $old->{'writes'}) / $rate_divisor;
+ my $nread = ($data->{'nread'} - $old->{'nread'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+ my $nwritten = ($data->{'nwritten'} - $old->{'nwritten'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+
+ # Calculate transactions per second
+ my $r_tps = ($data->{'reads'} - $old->{'reads'}) / $etime;
+ my $w_tps = ($data->{'writes'} - $old->{'writes'}) / $etime;
+
+ # Calculate average length of active queue
+ my $r_actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) /
+ $etime;
+ my $w_actv = (($data->{'wlentime'} - $old->{'wlentime'}) / $NANOSEC) /
+ $etime;
+
+ # Calculate average service time
+ # multiply by 1000 to convert to usecs for conssistency with del_t
+ my $read_t = ($r_tps > 0 ? $r_actv * (1000 / $r_tps) : 0.0) * 1000;
+ my $writ_t = ($w_tps > 0 ? $w_actv * (1000 / $w_tps) : 0.0) * 1000;
+
+ # Calculate I/O throttle delay metrics
+ my $delays = $data->{'delay_cnt'} - $old->{'delay_cnt'};
+ my $d_tps = $delays / $etime;
+ my $del_t = $delays > 0 ?
+ ($data->{'delay_time'} - $old->{'delay_time'}) / $delays : 0.0;
+
+ # Calculate the % time the VFS layer is active
+ my $r_b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) /
+ $etime) * 100;
+ my $w_b_pct = ((($data->{'wtime'} - $old->{'wtime'}) / $NANOSEC) /
+ $etime) * 100;
+
+ if (! $HIDE_ZEROES || $reads != 0.0 || $writes != 0.0 ||
+ $nread != 0.0 || $nwritten != 0.0) {
+ printf($DATA_FMT, $reads, $writes, $nread, $nwritten, $r_actv,
+ $w_actv, $read_t, $writ_t, $r_b_pct, $w_b_pct,
+ $d_tps, $del_t, substr($zone, 0, 8), $zoneid);
+ }
+
+ # Save current calculations for next loop
+ foreach (@fields) { $old->{$_} = $data->{$_}; }
+}
+
+sub usage {
+ print STDERR <<END;
+USAGE: vfsstat [-hIMrzZ] [interval [count]]
+ eg, vfsstat # print summary since zone boot
+ vfsstat 1 # print continually every 1 second
+ vfsstat 1 5 # print 5 times, every 1 second
+ vfsstat -I # print results per interval (where applicable)
+ vfsstat -M # print results in MB/s
+ vfsstat -r # print results in comma-separated format
+ vfsstat -z # hide zones with no VFS activity
+ vfsstat -Z # print results for all zones
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/stat/ziostat/Makefile b/usr/src/cmd/stat/ziostat/Makefile
new file mode 100644
index 0000000000..c338b59678
--- /dev/null
+++ b/usr/src/cmd/stat/ziostat/Makefile
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+
+PROG= ziostat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/stat/ziostat/ziostat.pl b/usr/src/cmd/stat/ziostat/ziostat.pl
new file mode 100755
index 0000000000..cf95d2f5a5
--- /dev/null
+++ b/usr/src/cmd/stat/ziostat/ziostat.pl
@@ -0,0 +1,204 @@
+#!/usr/perl5/bin/perl -w
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011 Joyent, Inc.
+#
+# ziostat - report I/O statistics per zone
+#
+# USAGE: ziostat [-hIMrzZ] [interval [count]]
+# -h # help
+# -I # print results per interval (where applicable)
+# -M # print results in MB/s
+# -r # print data in comma-separated format
+# -z # hide zones with no ZFS I/O activity
+# -Z # print data for all zones
+#
+# eg, ziostat # print summary since zone boot
+# ziostat 1 # print continually every 1 second
+# ziostat 1 5 # print 5 times, every 1 second
+#
+# NOTES:
+#
+# - The calculations and output fields emulate those from iostat(1M) as closely
+# as possible. When only one zone is actively performing disk I/O, the
+# results from iostat(1M) in the global zone and ziostat in the local zone
+# should be almost identical.
+#
+# - As with iostat(1M), a result of 100% for disk utilization does not mean that
+# the disk is fully saturated. Instead, that measurement just shows that at
+# least one operation was pending over the last quanta of time examined.
+# Since disk devices can process more than one operation concurrently, this
+# measurement will frequently be 100% but the disk can still offer higher
+# performance.
+#
+# - This script is based on Brendan Gregg's K9Toolkit examples:
+#
+# http://www.brendangregg.com/k9toolkit.html
+#
+
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+# Process command line args
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hIMrzZ') or usage();
+usage() if defined $main::opt_h;
+$main::opt_h = 0;
+
+my $USE_MB = defined $main::opt_M ? $main::opt_M : 0;
+my $USE_INTERVAL = defined $main::opt_I ? $main::opt_I : 0;
+my $USE_COMMA = defined $main::opt_r ? $main::opt_r : 0;
+my $HIDE_ZEROES = defined $main::opt_z ? $main::opt_z : 0;
+my $ALL_ZONES = defined $main::opt_Z ? $main::opt_Z : 0;
+
+my ($interval, $count);
+if ( defined($ARGV[0]) ) {
+ $interval = $ARGV[0];
+ $count = defined ($ARGV[1]) ? $ARGV[1] : 2**32;
+ usage() if ($interval == 0);
+} else {
+ $interval = 1;
+ $count = 1;
+}
+
+my $HEADER_FMT = $USE_COMMA ?
+ "r/%s,%sr/%s,actv,wsvc_t,asvc_t,%%b,zone\n" :
+ " r/%s %sr/%s actv wsvc_t asvc_t %%b zone\n";
+my $DATA_FMT = $USE_COMMA ?
+ "%.1f,%.1f,%.1f,%.1f,%.1f,%d,%s,%d\n" :
+ " %6.1f %6.1f %6.1f %6.1f %6.1f %3d %s (%d)\n";
+
+my $BYTES_PREFIX = $USE_MB ? "M" : "k";
+my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024;
+my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s";
+my $NANOSEC = 1000000000;
+
+my @fields = ( 'reads', 'nread', 'waittime', 'rtime', 'rlentime', 'snaptime' );
+
+chomp(my $curzone = (`/sbin/zonename`));
+
+# Read list of visible zones and their zone IDs
+my @zones = ();
+my %zoneids = ();
+my $zoneadm = `zoneadm list -p | cut -d: -f1,2`;
+@lines = split(/\n/, $zoneadm);
+foreach $line (@lines) {
+ @tok = split(/:/, $line);
+ $zoneids->{$tok[1]} = $tok[0];
+ push(@zones, $tok[1]);
+}
+
+my %old = ();
+my $rows_printed = 0;
+
+$Kstat->update();
+
+for (my $ii = 0; $ii < $count; $ii++) {
+ # Print the column header every 20 rows
+ if ($rows_printed == 0 || $ALL_ZONES) {
+ printf($HEADER_FMT, $INTERVAL_SUFFIX, $BYTES_PREFIX,
+ $INTERVAL_SUFFIX, $INTERVAL_SUFFIX);
+ }
+
+ $rows_printed = $rows_printed >= 20 ? 0 : $rows_printed + 1;
+
+ foreach $zone (@zones) {
+ if ((!$ALL_ZONES) && ($zone ne $curzone)) {
+ next;
+ }
+
+ if (! defined $old->{$zone}) {
+ $old->{$zone} = ();
+ foreach $field (@fields) { $old->{$zone}->{$field} = 0; }
+ }
+
+ #
+ # Kstats have a 30-character limit (KSTAT_STRLEN) on their
+ # names, so if the zone name exceeds that limit, use the first
+ # 30 characters.
+ #
+ my $trimmed_zone = substr($zone, 0, 30);
+ my $zoneid = $zoneids->{$zone};
+
+ print_stats($zone, $zoneid,
+ $Kstat->{'zone_zfs'}{$zoneid}{$trimmed_zone}, $old->{$zone});
+ }
+
+ sleep ($interval);
+ $Kstat->update();
+}
+
+sub print_stats {
+ my $zone = $_[0];
+ my $zoneid = $_[1];
+ my $data = $_[2];
+ my $old = $_[3];
+
+ my $etime = $data->{'snaptime'} -
+ ($old->{'snaptime'} > 0 ? $old->{'snaptime'} : $data->{'crtime'});
+
+ # Calculate basic statistics
+ my $rate_divisor = $USE_INTERVAL ? 1 : $etime;
+ my $reads = ($data->{'reads'} - $old->{'reads'}) / $rate_divisor;
+ my $nread = ($data->{'nread'} - $old->{'nread'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+
+ # Calculate overall transactions per second
+ my $ops = $data->{'reads'} - $old->{'reads'};
+ my $tps = $ops / $etime;
+
+ # Calculate average length of disk run queue
+ my $actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) /
+ $etime;
+
+ # Calculate average disk wait and service times
+ my $wsvc = $ops > 0 ? (($data->{'waittime'} - $old->{'waittime'}) /
+ 1000000) / $ops : 0.0;
+ my $asvc = $tps > 0 ? $actv * (1000 / $tps) : 0.0;
+
+ # Calculate the % time the disk run queue is active
+ my $b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) /
+ $etime) * 100;
+
+ if (! $HIDE_ZEROES || $reads != 0.0 || $nread != 0.0 ) {
+ printf($DATA_FMT, $reads, $nread, $actv, $wsvc, $asvc,
+ $b_pct, substr($zone, 0, 8), $zoneid);
+ }
+
+ # Save current calculations for next loop
+ foreach (@fields) { $old->{$_} = $data->{$_}; }
+}
+
+sub usage {
+ print STDERR <<END;
+USAGE: ziostat [-hIMrzZ] [interval [count]]
+ eg, ziostat # print summary since zone boot
+ ziostat 1 # print continually every 1 second
+ ziostat 1 5 # print 5 times, every 1 second
+ ziostat -I # print results per interval (where applicable)
+ ziostat -M # print results in MB/s
+ ziostat -r # print results in comma-separated format
+ ziostat -z # hide zones with no ZFS I/O activity
+ ziostat -Z # print results for all zones
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/svc/configd/rc_node.c b/usr/src/cmd/svc/configd/rc_node.c
index 3cc30e3e67..149f2a6cb5 100644
--- a/usr/src/cmd/svc/configd/rc_node.c
+++ b/usr/src/cmd/svc/configd/rc_node.c
@@ -23,6 +23,9 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* rc_node.c - In-memory SCF object management
diff --git a/usr/src/cmd/svc/milestone/net-routing-setup b/usr/src/cmd/svc/milestone/net-routing-setup
index 6ab1a6c7f0..b4ee7d39ac 100644
--- a/usr/src/cmd/svc/milestone/net-routing-setup
+++ b/usr/src/cmd/svc/milestone/net-routing-setup
@@ -21,11 +21,15 @@
#
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
# This script configures IP routing.
. /lib/svc/share/smf_include.sh
+set -o xtrace
+
#
# In a shared-IP zone we need this service to be up, but all of the work
# it tries to do is irrelevant (and will actually lead to the service
@@ -156,7 +160,8 @@ fi
# however, as persistent daemon state is now controlled by SMF.
#
ipv4_routing_set=`/usr/bin/svcprop -p routeadm/ipv4-routing-set $SMF_FMRI`
-if [ -z "$defrouters" ]; then
+smartos_param=`/usr/bin/bootparams | grep "^smartos"`
+if [ -z "$defrouters" ] && [ "$smartos_param" != "" ]; then
#
# Set default value for ipv4-routing to enabled. If routeadm -e/-d
# has not yet been run by the administrator, we apply this default.
@@ -210,5 +215,21 @@ if [ -f /etc/inet/static_routes ]; then
done
fi
+#
+# Read /etc/inet/static_routes.vmadm and add each route.
+#
+if [ -f /etc/inet/static_routes.vmadm ]; then
+ echo "Adding vmadm persistent routes:"
+ /usr/bin/egrep -v "^(#|$)" /etc/inet/static_routes.vmadm | while read line; do
+ /usr/sbin/route add $line
+ done
+fi
+
+#
+# Log the result
+#
+echo "Routing setup complete:"
+/usr/bin/netstat -rn
+
# Clear exit status.
exit $SMF_EXIT_OK
diff --git a/usr/src/cmd/svc/milestone/network-location.xml b/usr/src/cmd/svc/milestone/network-location.xml
index aad337f42f..709e9df8f3 100644
--- a/usr/src/cmd/svc/milestone/network-location.xml
+++ b/usr/src/cmd/svc/milestone/network-location.xml
@@ -106,7 +106,7 @@
-->
<dependency
name='manifest-import'
- grouping='require_all'
+ grouping='optional_all'
restart_on='none'
type='service'>
<service_fmri value='svc:/system/manifest-import:default' />
diff --git a/usr/src/cmd/svc/milestone/network-routing-setup.xml b/usr/src/cmd/svc/milestone/network-routing-setup.xml
index b34d578e2a..85a74756da 100644
--- a/usr/src/cmd/svc/milestone/network-routing-setup.xml
+++ b/usr/src/cmd/svc/milestone/network-routing-setup.xml
@@ -40,11 +40,19 @@
<!-- loopback/physical network configuration is required -->
<dependency
- name='network'
- grouping='optional_all'
+ name='loopback'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/network/loopback' />
+ </dependency>
+
+ <dependency
+ name='physical'
+ grouping='require_all'
restart_on='none'
type='service'>
- <service_fmri value='svc:/milestone/network' />
+ <service_fmri value='svc:/network/physical' />
</dependency>
<!-- usr filesystem required to run routing-related commands -->
diff --git a/usr/src/cmd/svc/milestone/network.xml b/usr/src/cmd/svc/milestone/network.xml
index 75b5578f44..48386ebf73 100644
--- a/usr/src/cmd/svc/milestone/network.xml
+++ b/usr/src/cmd/svc/milestone/network.xml
@@ -54,6 +54,14 @@
<service_fmri value='svc:/network/physical' />
</dependency>
+ <dependency
+ name='routing-setup'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/network/routing-setup' />
+ </dependency>
+
<exec_method
type='method'
name='start'
diff --git a/usr/src/cmd/svc/milestone/single-user.xml b/usr/src/cmd/svc/milestone/single-user.xml
index cbb93fd3b3..a9eb0cfa7e 100644
--- a/usr/src/cmd/svc/milestone/single-user.xml
+++ b/usr/src/cmd/svc/milestone/single-user.xml
@@ -95,7 +95,7 @@
<dependency
name='manifests'
- grouping='require_all'
+ grouping='optional_all'
restart_on='none'
type='service'>
<service_fmri value='svc:/system/manifest-import' />
diff --git a/usr/src/cmd/svc/shell/smf_include.sh b/usr/src/cmd/svc/shell/smf_include.sh
index d0dc387246..02c9532763 100644
--- a/usr/src/cmd/svc/shell/smf_include.sh
+++ b/usr/src/cmd/svc/shell/smf_include.sh
@@ -22,6 +22,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
smf_present () {
@@ -234,7 +235,12 @@ smf_kill_contract() {
# SMF_EXIT_ERR_OTHER, although not defined, encompasses all non-zero
# exit status values.
#
+# The SMF_EXIT_NODAEMON exit status should be used when a method does not
+# need to run any persistent process. This indicates success, abandons the
+# contract, and allows dependencies to be met.
+#
SMF_EXIT_OK=0
+SMF_EXIT_NODAEMON=94
SMF_EXIT_ERR_FATAL=95
SMF_EXIT_ERR_CONFIG=96
SMF_EXIT_MON_DEGRADE=97
diff --git a/usr/src/cmd/svc/startd/graph.c b/usr/src/cmd/svc/startd/graph.c
index 7fbf17a6ec..c8e0872ff8 100644
--- a/usr/src/cmd/svc/startd/graph.c
+++ b/usr/src/cmd/svc/startd/graph.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -141,6 +142,8 @@
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <fm/libfmevent.h>
#include <libscf.h>
#include <libscf_priv.h>
@@ -4875,6 +4878,20 @@ vertex_subgraph_dependencies_shutdown(scf_handle_t *h, graph_vertex_t *v,
was_up = up_state(old_state);
now_up = up_state(v->gv_state);
+ if (halting != -1 && old_state == RESTARTER_STATE_DISABLED &&
+ v->gv_state != RESTARTER_STATE_DISABLED) {
+ /*
+ * We're halting and we have a svc which is transitioning to
+ * offline in parallel. This leads to a race condition where
+ * gt_enter_offline might re-enable the svc after we disabled
+ * it. Since we're halting, we want to ensure no svc ever
+ * transitions out of the disabled state. In this case, modify
+ * the flags to keep us on the halting path.
+ */
+ was_up = 0;
+ now_up = 0;
+ }
+
if (!was_up && now_up) {
++non_subgraph_svcs;
} else if (was_up && !now_up) {
@@ -6827,6 +6844,7 @@ repository_event_thread(void *unused)
char *fmri = startd_alloc(max_scf_fmri_size);
char *pg_name = startd_alloc(max_scf_value_size);
int r;
+ int fd;
h = libscf_handle_create_bound_loop();
@@ -6849,6 +6867,14 @@ retry:
goto retry;
}
+ if ((fd = open("/etc/svc/volatile/startd.ready", O_RDONLY | O_CREAT,
+ S_IRUSR)) < 0) {
+ log_error(LOG_WARNING, "Couldn't create startd.ready file\n",
+ SCF_GROUP_FRAMEWORK, scf_strerror(scf_error()));
+ } else {
+ (void) close(fd);
+ }
+
/*CONSTCOND*/
while (1) {
ssize_t res;
diff --git a/usr/src/cmd/svc/startd/method.c b/usr/src/cmd/svc/startd/method.c
index cc9ce6768c..c3cd0144c1 100644
--- a/usr/src/cmd/svc/startd/method.c
+++ b/usr/src/cmd/svc/startd/method.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Joyent Inc.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/*
@@ -100,34 +100,18 @@ static uint_t method_events[] = {
* method_record_start(restarter_inst_t *)
* Record a service start for rate limiting. Place the current time
* in the circular array of instance starts.
+ *
+ * Save the critical_failure_period and critical_failure_allowed with either
+ * the defaults or the svc properties startd/critical_failure_count and
+ * startd/critical_failure_period.
+ * ri_crit_fail_allowed is capped at RINST_START_TIMES.
*/
static void
method_record_start(restarter_inst_t *inst)
{
- int index = inst->ri_start_index++ % RINST_START_TIMES;
-
- inst->ri_start_time[index] = gethrtime();
-}
-
-/*
- * method_rate_critical(restarter_inst_t *)
- * Return true if the average start interval is less than the permitted
- * interval. The implicit interval defaults to RINST_FAILURE_RATE_NS and
- * RINST_START_TIMES but may be overridden with the svc properties
- * startd/critical_failure_count and startd/critical_failure_period
- * which represent the number of failures to consider and the amount of
- * time in seconds in which that number may occur, respectively. Note that
- * this time is measured as of the transition to 'enabled' rather than wall
- * clock time.
- * Implicit success if insufficient measurements for an average exist.
- */
-int
-method_rate_critical(restarter_inst_t *inst)
-{
+ int index;
+ uint_t critical_failure_allowed = RINST_START_TIMES;
hrtime_t critical_failure_period;
- uint_t critical_failure_count = RINST_START_TIMES;
- uint_t n = inst->ri_start_index;
- hrtime_t avg_ns = 0;
uint64_t scf_fr, scf_st;
scf_propvec_t *prop = NULL;
scf_propvec_t restart_critical[] = {
@@ -151,17 +135,48 @@ method_rate_critical(restarter_inst_t *inst)
* in seconds but tracked in ns
*/
critical_failure_period = (hrtime_t)scf_fr * NANOSEC;
- critical_failure_count = (uint_t)scf_st;
+ critical_failure_allowed = (uint_t)scf_st;
+
+ if (critical_failure_allowed > RINST_START_TIMES)
+ critical_failure_allowed = RINST_START_TIMES;
+ if (critical_failure_allowed < 1)
+ critical_failure_allowed = 1;
+
}
- if (inst->ri_start_index < critical_failure_count)
+
+ inst->ri_crit_fail_allowed = critical_failure_allowed;
+ inst->ri_crit_fail_period = critical_failure_period;
+
+ index = inst->ri_start_index++ % critical_failure_allowed;
+ inst->ri_start_time[index] = gethrtime();
+}
+
+/*
+ * method_rate_critical(restarter_inst_t *)
+ * Return true if the number of failures within the interval
+ * ri_crit_fail_period exceeds ri_crit_fail_allowed. The allowed failure
+ * count defaults to RINST_START_TIMES and the implicit interval defaults
+ * to RINST_FAILURE_RATE_NS but may be overridden with the svc properties
+ * startd/critical_failure_count and startd/critical_failure_period which
+ * represent the acceptable number of failures and the amount of time in
+ * seconds in which that number may occur, respectively. Note that this time
+ * is measured as of the transition to 'enabled' rather than wall clock
+ * time. Implicitly not critical if insufficient failures have occured.
+ */
+int
+method_rate_critical(restarter_inst_t *inst)
+{
+ uint_t n = inst->ri_start_index;
+ uint_t fail_allowed = inst->ri_crit_fail_allowed;
+ hrtime_t diff_ns;
+
+ if (n < fail_allowed)
return (0);
- avg_ns =
- (inst->ri_start_time[(n - 1) % critical_failure_count] -
- inst->ri_start_time[n % critical_failure_count]) /
- (critical_failure_count - 1);
+ diff_ns = inst->ri_start_time[(n - 1) % fail_allowed] -
+ inst->ri_start_time[n % fail_allowed];
- return (avg_ns < critical_failure_period);
+ return (diff_ns < inst->ri_crit_fail_period);
}
/*
@@ -989,7 +1004,8 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
goto contract_out;
}
- if (!WIFEXITED(ret_status)) {
+ if (!WIFEXITED(ret_status) &&
+ WEXITSTATUS(ret_status) != SMF_EXIT_NODAEMON) {
/*
* If method didn't exit itself (it was killed by an
* external entity, etc.), consider the entire
@@ -1018,7 +1034,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
}
*exit_code = WEXITSTATUS(ret_status);
- if (*exit_code != 0) {
+ if (*exit_code != 0 && *exit_code != SMF_EXIT_NODAEMON) {
log_error(LOG_WARNING,
"%s: Method \"%s\" failed with exit status %d.\n",
inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
@@ -1027,6 +1043,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
"%d.", mname, *exit_code);
+ /* Note: we will take this path for SMF_EXIT_NODAEMON */
if (*exit_code != 0)
goto contract_out;
@@ -1073,7 +1090,10 @@ assured_kill:
}
contract_out:
- /* Abandon contracts for transient methods & methods that fail. */
+ /*
+ * Abandon contracts for transient methods, methods that exit with
+ * SMF_EXIT_NODAEMON & methods that fail.
+ */
transient = method_is_transient(inst, type);
if ((transient || *exit_code != 0 || result != 0) &&
(restarter_is_kill_method(method) < 0))
@@ -1169,7 +1189,7 @@ retry:
r = method_run(&inst, info->sf_method_type, &exit_code);
- if (r == 0 && exit_code == 0) {
+ if (r == 0 && (exit_code == 0 || exit_code == SMF_EXIT_NODAEMON)) {
/* Success! */
assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
@@ -1187,6 +1207,12 @@ retry:
else
method_remove_contract(inst, B_TRUE, B_TRUE);
}
+
+ /*
+ * For methods that exit with SMF_EXIT_NODAEMON, we already
+ * called method_remove_contract in method_run.
+ */
+
/*
* We don't care whether the handle was rebound because this is
* the last thing we do with it.
diff --git a/usr/src/cmd/svc/startd/startd.c b/usr/src/cmd/svc/startd/startd.c
index 6e3ea9876b..c3705c0c5c 100644
--- a/usr/src/cmd/svc/startd/startd.c
+++ b/usr/src/cmd/svc/startd/startd.c
@@ -42,6 +42,136 @@
* engine commands by executing methods, updating the repository, and sending
* feedback (mostly state updates) to the graph engine.
*
+ * Overview of the SMF Architecture
+ *
+ * There are a few different components that make up SMF and are responsible
+ * for different pieces of functionality that are used:
+ *
+ * svc.startd(1M): A daemon that is in charge of starting, stopping, and
+ * restarting services and instances
+ * svc.configd: A daemon that manages the repository that stores information,
+ * property groups, and state of the different services and instances
+ * libscf(3LIB): A C library that provides the glue for communicating,
+ * accessing, and updating information about services and instances
+ * svccfg(1M): A utility to add and remove services as well as change the
+ * properties associated with different services and instances.
+ * svcadm(1M): A utility to control the different instance of a service. You
+ * can use this to enable and disable them among some other useful things.
+ * svcs(1): A utility that reports on the status of various services on the
+ * system
+ *
+ * The following block diagram explains how these components communicate:
+ *
+ * The SMF Block Diagram
+ * Repository
+ * This attempts to show ___________ __________
+ * the relations between | | SQL | |
+ * the different pieces | configd |<----------->| SQLite |
+ * that make SMF work and | | Transaction | |
+ * users/administrators ----------- ----------
+ * call into. /|\ /|\
+ * | |
+ * door_call(3C)| | door_call(3C)
+ * | |
+ * \|/ \|/
+ * ____________ __________ __________ ____________
+ * | | | | | | | svccfg |
+ * | startd |<--->| libscf | | libscf |<---->| svcadm |
+ * | | | (3LIB) | | (3LIB) | | svcs |
+ * ------------ ---------- ---------- ------------
+ * /|\ /|\
+ * | | fork(2)/exec(2)
+ * | | libcontract(3LIB)
+ * \|/ \|/ Various System/User services
+ * ---------------------------------------------------------------------
+ * | system/filesystem/local:default system/coreadm:default |
+ * | network/lookpback:default system/zones:default |
+ * | network/ntp:default system/cron:default |
+ * | smartdc/agent/ca/cainstsvc:default network/ssh:default |
+ * | appliance/kit/akd:default system/svc/restarter:default |
+ * ---------------------------------------------------------------------
+ *
+ * Chatting with configd and sharing repository information
+ *
+ * As you run commands with svcs, svccfg, and svcadm, they are all creating a
+ * libscf handle to communicate with configd. As calls are made via libscf they
+ * ultimately go and talk to configd to get information. However, how we
+ * actually are talking to configd is not as straightforward as it appears.
+ *
+ * When configd starts up it creates a door located at
+ * /etc/svc/volatile/repository_door. This door runs the routine called
+ * main_switcher() from usr/src/cmd/svc/configd/maindoor.c. When you first
+ * invoke svc(cfg|s|adm), one of the first things that occurs is creating a
+ * scf_handle_t and binding it to configd by calling scf_handle_bind(). This
+ * function makes a door call to configd and gets returned a new file
+ * descriptor. This file descriptor is itself another door which calls into
+ * configd's client_switcher(). This is the door that is actually used when
+ * getting and fetching properties, and many other useful things.
+ *
+ * svc.startd needs a way to notice the changes that occur to the repository.
+ * For example, if you enabled a service that was not previously running, it's
+ * up to startd to notice that this has happened, check dependencies, and
+ * eventually start up the service. The way it gets these notifications is via
+ * a thread who's sole purpose in life is to call _scf_notify_wait(). This
+ * function acts like poll(2) but for changes that occur in the repository.
+ * Once this thread gets the event, it dispatches the event appropriately.
+ *
+ * The Events of svc.startd
+ *
+ * svc.startd has to handle a lot of complexity. Understanding how you go from
+ * getting the notification that a service was enabled to actually enabling it
+ * is not obvious from a cursory glance. The first thing to keep in mind is
+ * that startd maintains a graph of all the related services and instances so
+ * it can keep track of what is enabled, what dependencies exist, etc. all so
+ * that it can answer the question of what is affected by a change. Internally
+ * there are a lot of different queues for events, threads to process these
+ * queues, and different paths to have events enter these queues. What follows
+ * is a diagram that attempts to explain some of those paths, though it's
+ * important to note that for some of these pieces, such as the graph and
+ * vertex events, there are many additional ways and code paths these threads
+ * and functions can take. And yes, restarter_event_enqueue() is not the same
+ * thing as restarter_queue_event().
+ *
+ * Threads/Functions Queues Threads/Functions
+ *
+ * called by various
+ * ------------------ --------- ---------------
+ * --->| graph_protocol | graph_event | graph | graph_event_ | graph_event |
+ * --->| _send_event() |------------>| event |----------------->| _thread |
+ * ------------------ _enqueue() | queue | dequeue() ---------------
+ * --------- |
+ * _scf_notify_wait() vertex_send_event()|
+ * | \|/
+ * | -------------------- ----------------------
+ * |->| repository_event | vertex_send_event() | restarter_protocol |
+ * | _thread |----------------------------->| _send_event() |
+ * -------------------- ----------------------
+ * | | out to other
+ * restarter_ restarter_ | | restarters
+ * event_dequeue() ------------- event_ | | not startd
+ * |----------------| restarter |<------------| |------------->
+ * \|/ | event | enqueue()
+ * ------------------- | queue | |------------------>
+ * | restarter_event | ------------- ||----------------->
+ * | _thread | |||---------------->
+ * ------------------- ||| start/stop inst
+ * | ---------------- ----------------------
+ * | | instance | | restarter_process_ |
+ * |-------------->| event |------>| events |
+ * restarter_ | queue | | per-instance lwp |
+ * queue_event() ---------------- ----------------------
+ * ||| various funcs
+ * ||| controlling
+ * ||| instance state
+ * |||--------------->
+ * ||---------------->
+ * |----------------->
+ *
+ * What's important to take away is that there is a queue for each instance on
+ * the system that handles events related to dealing directly with that
+ * instance and that events can be added to it because of changes to properties
+ * that are made to configd and acted upon asynchronously by startd.
+ *
* Error handling
*
* In general, when svc.startd runs out of memory it reattempts a few times,
diff --git a/usr/src/cmd/svc/startd/startd.h b/usr/src/cmd/svc/startd/startd.h
index c1062e45e0..e204fb829f 100644
--- a/usr/src/cmd/svc/startd/startd.h
+++ b/usr/src/cmd/svc/startd/startd.h
@@ -405,7 +405,7 @@ typedef enum {
#define RINST_RETAKE_MASK 0x0f000000
-#define RINST_START_TIMES 5 /* failures to consider */
+#define RINST_START_TIMES 10 /* up to 10 fails to consider */
#define RINST_FAILURE_RATE_NS 600000000000LL /* 1 failure/10 minutes */
#define RINST_WT_SVC_FAILURE_RATE_NS NANOSEC /* 1 failure/second */
@@ -427,6 +427,8 @@ typedef struct restarter_inst {
hrtime_t ri_start_time[RINST_START_TIMES];
uint_t ri_start_index; /* times started */
+ uint_t ri_crit_fail_allowed;
+ hrtime_t ri_crit_fail_period;
uu_list_node_t ri_link;
pthread_mutex_t ri_lock;
diff --git a/usr/src/cmd/svc/svcadm/Makefile b/usr/src/cmd/svc/svcadm/Makefile
index cc0cc160bf..1a6a0dd35c 100644
--- a/usr/src/cmd/svc/svcadm/Makefile
+++ b/usr/src/cmd/svc/svcadm/Makefile
@@ -21,6 +21,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012, Joyent, Inc. All rights reserved.
#
PROG = svcadm
@@ -49,7 +50,11 @@ $(PROG): $(OBJS)
$(POFILE): $(POFILES)
cat $(POFILES) > $(POFILE)
-install: all $(ROOTUSRSBINPROG)
+install: all $(ROOTSBINPROG) $(ROOTUSRSBINPROG)
+
+$(ROOTUSRSBINPROG):
+ -$(RM) $@
+ -$(SYMLINK) ../../sbin/$(PROG) $@
clean:
$(RM) $(OBJS)
diff --git a/usr/src/cmd/svc/svccfg/svccfg_libscf.c b/usr/src/cmd/svc/svccfg/svccfg_libscf.c
index b43dedbd81..077a77f114 100644
--- a/usr/src/cmd/svc/svccfg/svccfg_libscf.c
+++ b/usr/src/cmd/svc/svccfg/svccfg_libscf.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
*/
@@ -44,6 +45,7 @@
#include <stdarg.h>
#include <string.h>
#include <strings.h>
+#include <time.h>
#include <unistd.h>
#include <wait.h>
#include <poll.h>
@@ -241,6 +243,9 @@ static const char *emsg_dpt_no_dep;
static int li_only = 0;
static int no_refresh = 0;
+/* how long in ns we should wait between checks for a pg */
+static uint64_t pg_timeout = 100 * (NANOSEC / MILLISEC);
+
/* import globals, to minimize allocations */
static scf_scope_t *imp_scope = NULL;
static scf_service_t *imp_svc = NULL, *imp_tsvc = NULL;
@@ -6751,6 +6756,205 @@ connaborted:
}
/*
+ * When an instance is imported we end up telling configd about it. Once we tell
+ * configd about these changes, startd eventually notices. If this is a new
+ * instance, the manifest may not specify the SCF_PG_RESTARTER (restarter)
+ * property group. However, many of the other tools expect that this property
+ * group exists and has certain values.
+ *
+ * These values are added asynchronously by startd. We should not return from
+ * this routine until we can verify that the property group we need is there.
+ *
+ * Before we go ahead and verify this, we have to ask ourselves an
+ * important question: Is the early manifest service currently running?
+ * Because if is running and invoked us, then the service will never get
+ * a restarter property because svc.startd is blocked on EMI finishing
+ * before it lets itself fully connect to svc.configd. Of course, this
+ * means that this race condition is in fact impossible to 100%
+ * eliminate.
+ *
+ * svc.startd makes sure that EMI only runs once and has succeeded by
+ * checking the state of the EMI instance. If it is online it bails out
+ * and makes sure that it doesn't run again. In this case, we're going
+ * to do something similar, only if the state is online, then we're
+ * going to actually verify. EMI always has to be present, but it
+ * can be explicitly disabled to reduce the amount of damage it can cause. If
+ * EMI has been disabled then we no longer have to worry about the implicit race
+ * condition and can go ahead and check things. If EMI is in some tate that
+ * isn't online or disabled and isn't runinng, then we assume that things are
+ * rather bad and we're not going to get in your way, even if the rest of SMF
+ * does.
+ *
+ * Returns 0 on success or returns an errno.
+ */
+#ifndef NATIVE_BUILD
+static int
+lscf_instance_verify(scf_scope_t *scope, entity_t *svc, entity_t *inst)
+{
+ int ret, err;
+ struct timespec ts;
+ char *emi_state;
+
+ /*
+ * smf_get_state does not distinguish between its different failure
+ * modes: memory allocation failures and SMF internal failures.
+ */
+ if ((emi_state = smf_get_state(SCF_INSTANCE_EMI)) == NULL)
+ return (EAGAIN);
+
+ /*
+ * As per the block comment for this function check the state of EMI
+ */
+ if (strcmp(emi_state, SCF_STATE_STRING_ONLINE) != 0 &&
+ strcmp(emi_state, SCF_STATE_STRING_DISABLED) != 0) {
+ warn(gettext("Not validating instance %s:%s because EMI's "
+ "state is %s\n"), svc->sc_name, inst->sc_name, emi_state);
+ free(emi_state);
+ return (0);
+ }
+
+ free(emi_state);
+
+ /*
+ * First we have to get the property.
+ */
+ if ((ret = scf_scope_get_service(scope, svc->sc_name, imp_svc)) != 0) {
+ ret = scf_error();
+ warn(gettext("Failed to look up service: %s\n"), svc->sc_name);
+ return (ret);
+ }
+
+ /*
+ * We should always be able to get the instance. It should already
+ * exist because we just created it or got it. There probably is a
+ * slim chance that someone may have come in and deleted it though from
+ * under us.
+ */
+ if ((ret = scf_service_get_instance(imp_svc, inst->sc_name, imp_inst))
+ != 0) {
+ ret = scf_error();
+ warn(gettext("Failed to verify instance: %s\n"), inst->sc_name);
+ switch (ret) {
+ case SCF_ERROR_DELETED:
+ err = ENODEV;
+ break;
+ case SCF_ERROR_CONNECTION_BROKEN:
+ warn(gettext("Lost repository connection\n"));
+ err = ECONNABORTED;
+ break;
+ case SCF_ERROR_NOT_FOUND:
+ warn(gettext("Instance \"%s\" disappeared out from "
+ "under us.\n"), inst->sc_name);
+ err = ENOENT;
+ break;
+ default:
+ bad_error("scf_service_get_instance", ret);
+ }
+
+ return (err);
+ }
+
+ /*
+ * An astute observer may want to use _scf_wait_pg which would notify us
+ * of a property group change, unfortunately that does not work if the
+ * property group in question does not exist. So instead we have to
+ * manually poll and ask smf the best way to get to it.
+ */
+ while ((ret = scf_instance_get_pg(imp_inst, SCF_PG_RESTARTER, imp_pg))
+ != SCF_SUCCESS) {
+ ret = scf_error();
+ if (ret != SCF_ERROR_NOT_FOUND) {
+ warn(gettext("Failed to get restarter property "
+ "group for instance: %s\n"), inst->sc_name);
+ switch (ret) {
+ case SCF_ERROR_DELETED:
+ err = ENODEV;
+ break;
+ case SCF_ERROR_CONNECTION_BROKEN:
+ warn(gettext("Lost repository connection\n"));
+ err = ECONNABORTED;
+ break;
+ default:
+ bad_error("scf_service_get_instance", ret);
+ }
+
+ return (err);
+ }
+
+ ts.tv_sec = pg_timeout / NANOSEC;
+ ts.tv_nsec = pg_timeout % NANOSEC;
+
+ (void) nanosleep(&ts, NULL);
+ }
+
+ /*
+ * svcadm also expects that the SCF_PROPERTY_STATE property is present.
+ * So in addition to the property group being present, we need to wait
+ * for the property to be there in some form.
+ *
+ * Note that a property group is a frozen snapshot in time. To properly
+ * get beyond this, you have to refresh the property group each time.
+ */
+ while ((ret = scf_pg_get_property(imp_pg, SCF_PROPERTY_STATE,
+ imp_prop)) != 0) {
+
+ ret = scf_error();
+ if (ret != SCF_ERROR_NOT_FOUND) {
+ warn(gettext("Failed to get property %s from the "
+ "restarter property group of instance %s\n"),
+ SCF_PROPERTY_STATE, inst->sc_name);
+ switch (ret) {
+ case SCF_ERROR_CONNECTION_BROKEN:
+ warn(gettext("Lost repository connection\n"));
+ err = ECONNABORTED;
+ break;
+ case SCF_ERROR_DELETED:
+ err = ENODEV;
+ break;
+ default:
+ bad_error("scf_pg_get_property", ret);
+ }
+
+ return (err);
+ }
+
+ ts.tv_sec = pg_timeout / NANOSEC;
+ ts.tv_nsec = pg_timeout % NANOSEC;
+
+ (void) nanosleep(&ts, NULL);
+
+ ret = scf_instance_get_pg(imp_inst, SCF_PG_RESTARTER, imp_pg);
+ if (ret != SCF_SUCCESS) {
+ warn(gettext("Failed to get restarter property "
+ "group for instance: %s\n"), inst->sc_name);
+ switch (ret) {
+ case SCF_ERROR_DELETED:
+ err = ENODEV;
+ break;
+ case SCF_ERROR_CONNECTION_BROKEN:
+ warn(gettext("Lost repository connection\n"));
+ err = ECONNABORTED;
+ break;
+ default:
+ bad_error("scf_service_get_instance", ret);
+ }
+
+ return (err);
+ }
+ }
+
+ /*
+ * We don't have to free the property groups or other values that we got
+ * because we stored them in global variables that are allocated and
+ * freed by the routines that call into these functions. Unless of
+ * course the rest of the code here that we are basing this on is
+ * mistaken.
+ */
+ return (0);
+}
+#endif
+
+/*
* If the service is missing, create it, import its properties, and import the
* instances. Since the service is brand new, it should be empty, and if we
* run into any existing entities (SCF_ERROR_EXISTS), abort.
@@ -6834,6 +7038,7 @@ lscf_service_import(void *v, void *pvt)
int fresh = 0;
scf_snaplevel_t *running;
int have_ge = 0;
+ boolean_t retried = B_FALSE;
const char * const ts_deleted = gettext("Temporary service svc:/%s "
"was deleted unexpectedly.\n");
@@ -6889,6 +7094,7 @@ lscf_service_import(void *v, void *pvt)
return (UU_WALK_ERROR);
}
+retry:
if (scf_scope_add_service(imp_scope, imp_tsname, imp_tsvc) != 0) {
switch (scf_error()) {
case SCF_ERROR_CONNECTION_BROKEN:
@@ -6898,6 +7104,11 @@ lscf_service_import(void *v, void *pvt)
return (stash_scferror(lcbdata));
case SCF_ERROR_EXISTS:
+ if (!retried) {
+ lscf_delete(imp_tsname, 0);
+ retried = B_TRUE;
+ goto retry;
+ }
warn(gettext(
"Temporary service \"%s\" must be deleted before "
"this manifest can be imported.\n"), imp_tsname);
@@ -8122,7 +8333,36 @@ lscf_bundle_import(bundle_t *bndl, const char *filename, uint_t flags)
goto progress;
result = 0;
+
+ /*
+ * This snippet of code assumes that we are running svccfg as we
+ * normally do -- witih svc.startd running. Of course, that is
+ * not actually the case all the time because we also use a
+ * varient of svc.configd and svcccfg which are only meant to
+ * run during the build process. During this time we have no
+ * svc.startd, so this check would hang the build process.
+ */
+#ifndef NATIVE_BUILD
+ /*
+ * Verify that the restarter group is preset
+ */
+ for (svc = uu_list_first(bndl->sc_bundle_services);
+ svc != NULL;
+ svc = uu_list_next(bndl->sc_bundle_services, svc)) {
+
+ insts = svc->sc_u.sc_service.sc_service_instances;
+
+ for (inst = uu_list_first(insts);
+ inst != NULL;
+ inst = uu_list_next(insts, inst)) {
+ if (lscf_instance_verify(imp_scope, svc,
+ inst) != 0)
+ goto progress;
+ }
+ }
+#endif
goto out;
+
}
if (uu_error() != UU_ERROR_CALLBACK_FAILED)
@@ -10630,6 +10870,10 @@ int
lscf_service_export(char *fmri, const char *filename, int flags)
{
struct export_args args;
+ char *fmridup;
+ const char *scope, *svc, *inst;
+ size_t cblen = 3 * max_scf_name_len;
+ char *canonbuf = alloca(cblen);
int ret, err;
lscf_prep_hndl();
@@ -10638,6 +10882,29 @@ lscf_service_export(char *fmri, const char *filename, int flags)
args.filename = filename;
args.flags = flags;
+ /*
+ * If some poor user has passed an exact instance FMRI, of the sort
+ * one might cut and paste from svcs(1) or an error message, warn
+ * and chop off the instance instead of failing.
+ */
+ fmridup = alloca(strlen(fmri) + 1);
+ (void) strcpy(fmridup, fmri);
+ if (strncmp(fmridup, SCF_FMRI_SVC_PREFIX,
+ sizeof (SCF_FMRI_SVC_PREFIX) -1) == 0 &&
+ scf_parse_svc_fmri(fmridup, &scope, &svc, &inst, NULL, NULL) == 0 &&
+ inst != NULL) {
+ (void) strlcpy(canonbuf, "svc:/", cblen);
+ if (strcmp(scope, SCF_FMRI_LOCAL_SCOPE) != 0) {
+ (void) strlcat(canonbuf, "/", cblen);
+ (void) strlcat(canonbuf, scope, cblen);
+ }
+ (void) strlcat(canonbuf, svc, cblen);
+ fmri = canonbuf;
+
+ warn(gettext("Only services may be exported; ignoring "
+ "instance portion of argument.\n"));
+ }
+
err = 0;
if ((ret = scf_walk_fmri(g_hndl, 1, (char **)&fmri,
SCF_WALK_SERVICE | SCF_WALK_NOINSTANCE, export_callback,
diff --git a/usr/src/cmd/svc/svcs/Makefile b/usr/src/cmd/svc/svcs/Makefile
index 0e9fc52652..2ea89818c0 100644
--- a/usr/src/cmd/svc/svcs/Makefile
+++ b/usr/src/cmd/svc/svcs/Makefile
@@ -34,7 +34,7 @@ include ../../Makefile.cmd
include ../../Makefile.ctf
POFILE = $(PROG)_all.po
-LDLIBS += -lcontract -lscf -luutil -lumem -lnvpair -lzonecfg
+LDLIBS += -lcontract -lscf -luutil -lumem -lnvpair -lzonecfg -lsasl
CPPFLAGS += -I ../common
lint := LINTFLAGS = -mux
diff --git a/usr/src/cmd/svc/svcs/explain.c b/usr/src/cmd/svc/svcs/explain.c
index 42fca80172..eed9733abc 100644
--- a/usr/src/cmd/svc/svcs/explain.c
+++ b/usr/src/cmd/svc/svcs/explain.c
@@ -200,6 +200,7 @@ static char *emsg_invalid_dep;
extern scf_handle_t *h;
extern char *g_zonename;
+extern char *g_zonealias;
/* ARGSUSED */
static int
@@ -2000,6 +2001,9 @@ print_service(inst_t *svcp, int verbose)
if (g_zonename != NULL)
(void) printf(gettext(" Zone: %s\n"), g_zonename);
+ if (g_zonealias != NULL)
+ (void) printf(gettext(" Alias: %s\n"), g_zonealias);
+
stime = svcp->stime.tv_sec;
tmp = localtime(&stime);
diff --git a/usr/src/cmd/svc/svcs/svcs.c b/usr/src/cmd/svc/svcs/svcs.c
index c54f4bd12d..b4882d1776 100644
--- a/usr/src/cmd/svc/svcs/svcs.c
+++ b/usr/src/cmd/svc/svcs/svcs.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -59,6 +59,7 @@
#include <sys/ctfs.h>
#include <sys/stat.h>
+#include <sasl/saslutil.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
@@ -134,6 +135,9 @@ static int first_paragraph = 1; /* For -l mode. */
static char *common_name_buf; /* Sized for maximal length value. */
char *locale; /* Current locale. */
char *g_zonename; /* zone being operated upon */
+char *g_zonealias; /* alias for zone, if any */
+static char g_aliasdec[MAXPATHLEN / 4 * 3]; /* decoded zone alias buffer */
+static char g_aliasbuf[MAXPATHLEN]; /* base64 encoded zone alias buffer */
/*
* Pathname storage for path generated from the fmri.
@@ -240,7 +244,25 @@ ht_free(void)
static void
ht_init(void)
{
- assert(ht_buckets == NULL);
+ if (ht_buckets != NULL) {
+ /*
+ * If we already have a hash table (e.g., because we are
+ * processing multiple zones), destroy it before creating
+ * a new one.
+ */
+ struct ht_elem *elem, *next;
+ int i;
+
+ for (i = 0; i < ht_buckets_num; i++) {
+ for (elem = ht_buckets[i]; elem != NULL; elem = next) {
+ next = elem->next;
+ free((char *)elem->fmri);
+ free(elem);
+ }
+ }
+
+ free(ht_buckets);
+ }
ht_buckets_num = 8;
ht_buckets = safe_malloc(sizeof (*ht_buckets) * ht_buckets_num);
@@ -553,7 +575,7 @@ get_restarter_time_prop(scf_instance_t *inst, const char *pname,
int r;
r = inst_get_single_val(inst, SCF_PG_RESTARTER, pname, SCF_TYPE_TIME,
- tvp, NULL, ok_if_empty ? EMPTY_OK : 0, 0, 1);
+ tvp, 0, ok_if_empty ? EMPTY_OK : 0, 0, 1);
return (r == 0 ? 0 : -1);
}
@@ -1650,7 +1672,7 @@ sprint_stime(char **buf, scf_walkinfo_t *wip)
SCF_PROPERTY_STATE_TIMESTAMP, &tv, 0);
} else {
r = pg_get_single_val(wip->pg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0);
+ SCF_TYPE_TIME, &tv, 0, 0);
}
if (r != 0) {
@@ -1702,7 +1724,7 @@ sortkey_stime(char *buf, int reverse, scf_walkinfo_t *wip)
SCF_PROPERTY_STATE_TIMESTAMP, &tv, 0);
else
r = pg_get_single_val(wip->pg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0);
+ SCF_TYPE_TIME, &tv, 0, 0);
if (r == 0) {
int64_t sec;
@@ -2514,7 +2536,7 @@ print_detailed(void *unused, scf_walkinfo_t *wip)
gettext("next_state"), buf);
if (pg_get_single_val(rpg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0) == 0) {
+ SCF_TYPE_TIME, &tv, 0, 0) == 0) {
stime = tv.tv_sec;
tmp = localtime(&stime);
for (tbsz = 50; ; tbsz *= 2) {
@@ -3663,6 +3685,24 @@ again:
assert(opt_zone == NULL || zids == NULL);
if (opt_zone == NULL) {
+ zone_status_t status;
+
+ if (zone_getattr(zids[zent], ZONE_ATTR_STATUS,
+ &status, sizeof (status)) < 0 ||
+ status != ZONE_IS_RUNNING) {
+ /*
+ * If this zone is not running or we cannot
+ * get its status, we do not want to attempt
+ * to bind an SCF handle to it, lest we
+ * accidentally interfere with a zone that
+ * is not yet running by looking up a door
+ * to its svc.configd (which could potentially
+ * block a mount with an EBUSY).
+ */
+ zent++;
+ goto nextzone;
+ }
+
if (getzonenamebyid(zids[zent++],
zonename, sizeof (zonename)) < 0) {
uu_warn(gettext("could not get name for "
@@ -3685,18 +3725,46 @@ again:
uu_die(gettext("invalid zone '%s'\n"), g_zonename);
scf_value_destroy(zone);
+
+ /*
+ * On SmartOS, there may be a base64-encoded string attribute
+ * named 'alias' associated with this zone. This alias is
+ * useful, so we attempt to make it available when we are
+ * displaying -xZ output. If it's not available or not
+ * decodable, we just ignore it.
+ */
+ if (g_zonename != NULL) {
+ unsigned len;
+ struct zone_attrtab zattrs;
+ zone_dochandle_t zhdl = zonecfg_init_handle();
+
+ bzero(&zattrs, sizeof (zattrs));
+ (void) strcpy(zattrs.zone_attr_name, "alias");
+
+ if (zhdl != NULL &&
+ zonecfg_get_handle(g_zonename, zhdl) == Z_OK &&
+ zonecfg_lookup_attr(zhdl, &zattrs) == Z_OK &&
+ zonecfg_get_attr_string(&zattrs, g_aliasbuf,
+ sizeof (g_aliasbuf)) == Z_OK &&
+ sasl_decode64(g_aliasbuf, strlen(g_aliasbuf),
+ g_aliasdec, sizeof (g_aliasdec), &len) == SASL_OK) {
+ g_aliasdec[len] = '\0';
+ g_zonealias = g_aliasdec;
+ } else {
+ g_zonealias = NULL;
+ }
+ zonecfg_fini_handle(zhdl);
+ }
}
if (scf_handle_bind(h) == -1) {
if (g_zonename != NULL) {
- uu_warn(gettext("Could not bind to repository "
+ if (show_zones)
+ goto nextzone;
+
+ uu_die(gettext("Could not bind to repository "
"server for zone %s: %s\n"), g_zonename,
scf_strerror(scf_error()));
-
- if (!show_zones)
- return (UU_EXIT_FATAL);
-
- goto nextzone;
}
uu_die(gettext("Could not bind to repository server: %s. "
@@ -3755,7 +3823,7 @@ again:
if (opt_mode == 'L') {
if ((err = scf_walk_fmri(h, argc, argv, SCF_WALK_MULTIPLE,
- print_log, NULL, &exit_status, uu_warn)) != 0) {
+ print_log, NULL, errarg, errfunc)) != 0) {
uu_warn(gettext("failed to iterate over "
"instances: %s\n"), scf_strerror(err));
exit_status = UU_EXIT_FATAL;
diff --git a/usr/src/cmd/svr4pkg/pkgadd/Makefile b/usr/src/cmd/svr4pkg/pkgadd/Makefile
index 66e6abb737..b1b4168a7c 100644
--- a/usr/src/cmd/svr4pkg/pkgadd/Makefile
+++ b/usr/src/cmd/svr4pkg/pkgadd/Makefile
@@ -35,7 +35,7 @@ ROOTLINKS= $(ROOTUSRSBIN)/pkgask
include $(SRC)/cmd/svr4pkg/Makefile.svr4pkg
LDLIBS += -lpkg -linstzones -ladm
-LDLIBS += -lcrypto -lwanboot
+LDLIBS += -lsunw_crypto -lwanboot
.KEEP_STATE:
diff --git a/usr/src/cmd/svr4pkg/pkgadm/Makefile b/usr/src/cmd/svr4pkg/pkgadm/Makefile
index 620e32cf0d..c4970c3b91 100644
--- a/usr/src/cmd/svr4pkg/pkgadm/Makefile
+++ b/usr/src/cmd/svr4pkg/pkgadm/Makefile
@@ -35,7 +35,7 @@ OBJS= addcert.o \
include $(SRC)/cmd/svr4pkg/Makefile.svr4pkg
-LDLIBS += -lpkg -ladm -lcrypto -lgen
+LDLIBS += -lpkg -ladm -lsunw_crypto -lgen
.KEEP_STATE:
all: $(PROG)
diff --git a/usr/src/cmd/tail/Makefile b/usr/src/cmd/tail/Makefile
index 293920cfd1..e660cedf2d 100644
--- a/usr/src/cmd/tail/Makefile
+++ b/usr/src/cmd/tail/Makefile
@@ -21,6 +21,7 @@ OBJS= forward.o misc.o read.o reverse.o tail.o
SRCS= $(OBJS:%.o=%.c)
include ../Makefile.cmd
+include ../Makefile.ctf
CLOBBERFILES= $(PROG)
CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
@@ -43,6 +44,10 @@ $(PROG): $(OBJS)
$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
install: all .WAIT $(ROOTPROG) $(ROOTXPG4PROG)
$(ROOTXPG4PROG):
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 069268dc05..4e453da0c1 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -23,7 +23,7 @@
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved.
*/
@@ -703,6 +703,8 @@ const struct ioc {
/* /dev/poll ioctl() control codes */
{ (uint_t)DP_POLL, "DP_POLL", NULL },
{ (uint_t)DP_ISPOLLED, "DP_ISPOLLED", NULL },
+ { (uint_t)DP_PPOLL, "DP_PPOLL", NULL },
+ { (uint_t)DP_EPOLLCOMPAT, "DP_EPOLLCOMPAT", NULL },
/* the old /proc ioctl() control codes */
#define PIOC ('q'<<8)
{ (uint_t)(PIOC|1), "PIOCSTATUS", NULL },
diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c
index f49197a7f6..f6e4fd0bb8 100644
--- a/usr/src/cmd/truss/print.c
+++ b/usr/src/cmd/truss/print.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -870,7 +871,9 @@ prt_mc4(private_t *pri, int raw, long val) /* print memcntl() (4th) argument */
return;
case MC_SYNC:
- if ((val & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) == 0) {
+ if ((val &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC))
+ == 0) {
*(s = pri->code_buf) = '\0';
if (val & MS_SYNC)
(void) strlcat(s, "|MS_SYNC", CBSIZE);
@@ -879,6 +882,9 @@ prt_mc4(private_t *pri, int raw, long val) /* print memcntl() (4th) argument */
if (val & MS_INVALIDATE)
(void) strlcat(s, "|MS_INVALIDATE",
CBSIZE);
+ if (val & MS_INVALCURPROC)
+ (void) strlcat(s, "|MS_INVALCURPROC",
+ CBSIZE);
}
break;
@@ -2440,7 +2446,10 @@ prt_zga(private_t *pri, int raw, long val)
case ZONE_ATTR_BOOTARGS: s = "ZONE_ATTR_BOOTARGS"; break;
case ZONE_ATTR_BRAND: s = "ZONE_ATTR_BRAND"; break;
case ZONE_ATTR_FLAGS: s = "ZONE_ATTR_FLAGS"; break;
- case ZONE_ATTR_PHYS_MCAP: s = "ZONE_ATTR_PHYS_MCAP"; break;
+ case ZONE_ATTR_DID: s = "ZONE_ATTR_DID"; break;
+ case ZONE_ATTR_PMCAP_NOVER: s = "ZONE_ATTR_PMCAP_NOVER"; break;
+ case ZONE_ATTR_PMCAP_PAGEOUT: s = "ZONE_ATTR_PMCAP_PAGEOUT";
+ break;
}
}
diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c
index 3cd07c698b..f2cbebf8f3 100644
--- a/usr/src/cmd/truss/systable.c
+++ b/usr/src/cmd/truss/systable.c
@@ -28,6 +28,9 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
@@ -1715,9 +1718,10 @@ const char * const afcodes[] = {
"POLICY", /* 29 */
"RDS", /* 30 */
"TRILL", /* 31 */
- "PACKET" /* 32 */
+ "PACKET", /* 32 */
+ "LX_NETLINK" /* 33 */
};
-#if MAX_AFCODES != 33
+#if MAX_AFCODES != 34
#error Need to update address-family table
#endif
diff --git a/usr/src/cmd/vi/port/Makefile b/usr/src/cmd/vi/port/Makefile
index 268dd752a6..8e223d98e8 100644
--- a/usr/src/cmd/vi/port/Makefile
+++ b/usr/src/cmd/vi/port/Makefile
@@ -75,6 +75,9 @@ $(XPG6) := CFLAGS += -DXPG4 -DXPG6 -I$(SRC)/lib/libc/inc
CPPFLAGS += -DUSG -DSTDIO -DVMUNIX -DTABS=8 -DSINGLE -DTAG_STACK
+# vi intentionally uses foo[-1] as a sentinal value to q*column()
+$(__GNUC4)CERRWARN += -_gcc=-Wno-array-bounds
+
# vi maintains its own versions of various routines from libc and libcurses,
# so localize all symbols to avoid name space collisions.
LDFLAGS += $(MAPFILE.NGB:%=-M%)
diff --git a/usr/src/cmd/vi/port/ex_cmdsub.c b/usr/src/cmd/vi/port/ex_cmdsub.c
index 0260d334fe..00bdcefccb 100644
--- a/usr/src/cmd/vi/port/ex_cmdsub.c
+++ b/usr/src/cmd/vi/port/ex_cmdsub.c
@@ -1735,7 +1735,7 @@ char *prompt;
/* In ex mode, let the system hassle with setting no echo */
if (!inopen)
- return (unsigned char *)getpass(prompt);
+ return (unsigned char *)getpass((const char *)prompt);
viprintf("%s", prompt); flush();
for (p=pbuf; (c = getkey())!='\n' && c!=EOF && c!='\r';) {
if (p < &pbuf[8])
diff --git a/usr/src/cmd/vndadm/Makefile b/usr/src/cmd/vndadm/Makefile
new file mode 100644
index 0000000000..aa9c22d296
--- /dev/null
+++ b/usr/src/cmd/vndadm/Makefile
@@ -0,0 +1,65 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+PROG= vndadm
+OBJS = vndadm.o
+SRCS = $(OBJS:%.o=../%.c)
+
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+CLEANFILES += $(OBJS)
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lvnd
+LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+all := TARGET += all
+clean := TARGET += clean
+clobber := TARGET += clobber
+install := TARGET += install
+lint := TARGET += lint
+
+SUBDIRS = test
+
+.KEEP_STATE:
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean: $(SUBDIRS)
+ -$(RM) $(CLEANFILES)
+
+lint: lint_PROG $(SUBDIRS)
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+clobber: clean $(SUBDIRS)
+ $(RM) $(PROG)
+
+install: $(PROG) $(ROOTUSRSBINPROG) $(SUBDIRS)
+
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/Makefile b/usr/src/cmd/vndadm/test/Makefile
new file mode 100644
index 0000000000..12ef2c3a3c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = scripts tst
+
+include Makefile.subdirs
+include Makefile.com
diff --git a/usr/src/cmd/vndadm/test/Makefile.com b/usr/src/cmd/vndadm/test/Makefile.com
new file mode 100644
index 0000000000..cb096952ca
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.com
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/Makefile.master
+include $(SRC)/cmd/Makefile.cmd
+
+#
+# Force c99 for everything
+#
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+#
+# Deal with odd lint bits.
+#
+LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2
+
+#
+# Install related definitions
+#
+ROOTOPTPKG = $(ROOT)/opt/vndtest
+ROOTBIN = $(ROOTOPTPKG)/bin
+ROOTTST = $(ROOTOPTPKG)/tst
+ROOTTSTDIR = $(ROOTTST)/$(TSTDIR)
+ROOTTSTEXES = $(EXETESTS:%=$(ROOTTSTDIR)/%)
+ROOTTSTSH = $(SHTESTS:%=$(ROOTTSTDIR)/%)
+ROOTOUT = $(OUTFILES:%=$(ROOTTSTDIR)/%)
+ROOTTESTS = $(ROOTTSTEXES) $(ROOTTSTSH) $(ROOTOUT)
+FILEMODE = 0555
+LDLIBS = $(LDLIBS.cmd)
+LINTEXE = $(EXETESTS:%.exe=%.exe.ln)
diff --git a/usr/src/cmd/vndadm/test/Makefile.subdirs b/usr/src/cmd/vndadm/test/Makefile.subdirs
new file mode 100644
index 0000000000..957448c23b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.subdirs
@@ -0,0 +1,29 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+all := TARGET += all
+clean := TARGET += clean
+clobber := TARGET += clobber
+install := TARGET += install
+lint := TARGET += lint
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/cmd/vndadm/test/Makefile.targ b/usr/src/cmd/vndadm/test/Makefile.targ
new file mode 100644
index 0000000000..bcbd3c8f35
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.targ
@@ -0,0 +1,59 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+$(ROOTOPTPKG):
+ $(INS.dir)
+
+$(ROOTBIN): $(ROOTOPTPKG)
+ $(INS.dir)
+
+$(ROOTBIN)/%: %.ksh $(ROOTBIN)
+ $(INS.rename)
+
+$(ROOTTST): $(ROOTOPTPKG)
+ $(INS.dir)
+
+$(ROOTTSTDIR): $(ROOTTST)
+ $(INS.dir)
+
+$(ROOTTSTDIR)/%.ksh: %.ksh $(ROOTTSTDIR)
+ $(INS.file)
+
+$(ROOTTSTDIR)/%.out: %.out $(ROOTTSTDIR)
+ $(INS.file)
+
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+%.exe: %.o $(SUPOBJS)
+ $(LINK.c) -o $@ $< $(SUPOBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+$(ROOTTSTDIR)/%.exe: %.exe $(ROOTTSTDIR)
+ $(INS.file)
+
+all: install
+
+%.exe.ln: %.c $(SUPOBJS)
+ $(LINT.c) $< $(LDLIBS)
+
+lint: $(LINTEXE)
+
+clean:
+ -$(RM) *.o $(CLEANFILES)
+
+clobber: clean
+ -$(RM) $(CLOBBERFILES)
diff --git a/usr/src/cmd/vndadm/test/scripts/Makefile b/usr/src/cmd/vndadm/test/scripts/Makefile
new file mode 100644
index 0000000000..d0f58918f9
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/scripts/Makefile
@@ -0,0 +1,28 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+SRCS = vndtest
+SCRIPTS = $(SRCS:%=$(ROOTBIN)/%)
+
+SCRIPTS := FILEMODE = 0555
+CLOBBERFILES = $(SCRIPTS)
+
+install: $(SCRIPTS)
+
+lint:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/scripts/vndtest.ksh b/usr/src/cmd/vndadm/test/scripts/vndtest.ksh
new file mode 100755
index 0000000000..1167a64802
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/scripts/vndtest.ksh
@@ -0,0 +1,300 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc.
+#
+
+#
+# vnd test suite driver
+#
+unalias -a
+
+vt_arg0=$(basename $0)
+vt_root="$(dirname $0)/.."
+vt_ksh="/usr/bin/ksh"
+vt_outdir=
+vt_keep=
+vt_all=
+vt_tests=
+vt_stub=
+vt_vnics="vndtest1 vndtest2 vndtest3 vndtest4 vndtest5"
+vt_tnum=0
+vt_tfail=0
+vt_tsuc=0
+
+function usage
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] || echo "$msg" 2>&1
+ cat <<USAGE >&2
+Usage: $vt_arg0 [ -o dir ] [ -k ] [ -a | test ... ]
+
+ -o dir Sets 'dir' as the output directory
+ -a Runs all tests, ignores tests passed in
+ -k Keep output from all tests, not just failures
+ -m mdb binary to test
+USAGE
+ exit 2
+}
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+function setup_outdir
+{
+ vt_outdir="$vt_outdir/$vt_arg0.$$"
+ mkdir -p $vt_outdir || fatal "failed to make output dir $vt_outdir"
+}
+
+function setup_etherstub
+{
+ vt_ether="vndstub$$"
+
+ dladm create-etherstub -t $vt_ether || \
+ fatal "failed to create etherstub"
+}
+
+function cleanup_vnd
+{
+ typeset over=$1
+ typeset vnddevs vn
+
+ vnddevs=$(vndadm list -p -d: -o datalink,name)
+ [[ $? -eq 0 ]] || fatal "failed to list vnics"
+ for v in $vnddevs; do
+ vn=$(echo $v | awk 'BEGIN{ FS=":"}
+ { if ($1 == targ) { print $2 } }' targ=$over)
+ [[ -z "$vn" ]] && continue
+ vndadm destroy $vn || fatal "failed to destroy $vn"
+ done
+}
+
+function create_vnics
+{
+ for n in $vt_vnics; do
+ dladm create-vnic -t -l $vt_ether $n || fatal \
+ "failed to create vnic $n over $vt_ether"
+ done
+}
+
+function cleanup_vnics
+{
+ typeset nics vn
+
+ nics=$(dladm show-vnic -p -o over,link)
+ [[ $? -eq 0 ]] || fatal "failed to list vnics"
+ for n in $nics; do
+ vn=$(echo $n | awk 'BEGIN{ FS=":"}
+ { if ($1 == targ) { print $2 } }' targ=$vt_ether )
+ [[ -z "$vn" ]] && continue
+ cleanup_vnd $vn
+ #
+ # There may or may not be an IP device on our nics...
+ #
+ ifconfig $vn down unplumb 2>/dev/null || /bin/true
+ dladm delete-vnic $vn || fatal "failed to delete vnic $n"
+ done
+
+}
+
+function cleanup_etherstub
+{
+ cleanup_vnics
+ dladm delete-etherstub -t $vt_ether || \
+ fatal "failed to delete etherstub"
+}
+
+function run_single
+{
+ typeset name=$1
+ typeset expect base ext exe command odir res reason
+ typeset iserr
+
+ [[ -z "$name" ]] && fail "missing test to run"
+ base=${name##*/}
+ ext=${base##*.}
+ expect=${base%%.*}
+ odir="$vt_outdir/current"
+ [[ -z "$ext" ]] && fatal "found test without ext: $name"
+ [[ -z "$expect" ]] && fatal "found test without prefix: $name"
+
+ [[ "$expect" == "create" || "$expect" == "ecreate" ]] && create_vnics
+ if [[ "$expect" == "err" || "$expect" == "ecreate" ]]; then
+ iserr="yup"
+ else
+ iserr=""
+ fi
+
+ case "$ext" in
+ "ksh")
+ command="$vt_ksh ./$base"
+ ;;
+ "exe")
+ command="./$base"
+ ;;
+ "out")
+ #
+ # This is the file format for checking output against.
+ #
+ return 0
+ ;;
+ *)
+ echo "skipping test $name (unknown extensino)"
+ return 0
+ ;;
+ esac
+
+ echo "Executing test $name ... \c"
+ mkdir -p "$odir" >/dev/null || fatal "can't make output directory"
+ cd $(dirname $name) || fatal "failed to enter test directory"
+ $command $vt_vnics > "$odir/stdout" 2>"$odir/stderr"
+ res=$?
+ cd - > /dev/null || fatal "failed to leave test directory"
+
+ if [[ -f "$name.out" ]] && \
+ ! diff "$name.out" "$odir/stdout" >/dev/null; then
+ cp $name.out $odir/$base.out
+ reason="stdout mismatch"
+ elif [[ -n "$iserr" && $res -eq 0 ]]; then
+ reason="test exited $res, not non-zero"
+ elif [[ -z "$iserr" && $res -ne 0 ]]; then
+ reason="test exited $res, not zero"
+ fi
+
+ if [[ -n "$reason" ]]; then
+ echo "$reason"
+ ((vt_tfail++))
+ mv "$odir" "$vt_outdir/failure.$vt_tfail" || fatal \
+ "failed to move test output directory"
+ cp "$name" "$vt_outdir/failure.$vt_tfail/$(basename $name)" || \
+ fatal "failed to copy test into output directory"
+ else
+ echo "passed"
+ ((vt_tsuc++))
+ mv "$odir" "$vt_outdir/success.$vt_tsuc" || fatal \
+ "failed to move test directory"
+ fi
+
+ [[ "$expect" == "create" || "$expect" == "ecreate" ]] && cleanup_vnics
+
+ ((vt_tnum++))
+}
+
+function run_all
+{
+ typeset tests t dir
+
+ cd $vt_root || fatal "failed to enter root test directory"
+ tests=$(ls -1 */*/@(ecreate|create|tst|err).*.@(ksh|exe))
+ cd - > /dev/null
+ for t in $tests; do
+ run_single $t
+ done
+}
+
+function welcome
+{
+ cat <<WELCOME
+Starting tests...
+output directory: $vt_outdir
+WELCOME
+}
+
+function cleanup
+{
+ [[ -n "$vt_keep" ]] && return
+ rm -rf "$vt_outdir"/success.* || fatal \
+ "failed to remove successful test cases"
+ if [[ $vt_tfail -eq 0 ]]; then
+ rmdir "$vt_outdir" || fatal \
+ "failed to remove test output directory"
+ fi
+}
+
+function goodbye
+{
+ cat <<EOF
+
+-------------
+Results
+-------------
+
+Tests passed: $vt_tsuc
+Tests failed: $vt_tfail
+Tests ran: $vt_tnum
+
+EOF
+ if [[ $vt_tfail -eq 0 ]]; then
+ echo "Congrats, vnd isn't completely broken, the tests pass".
+ else
+ echo "Some tests failed, you have some work to do."
+ fi
+}
+
+while getopts ":ahko:m:" c $@; do
+ case "$c" in
+ a)
+ vt_all="y"
+ ;;
+ k)
+ vt_keep="y"
+ ;;
+ o)
+ vt_outdir="$OPTARG"
+ ;;
+ h)
+ usage
+ ;;
+ :)
+ usage "option requires an argument -- $OPTARG"
+ ;;
+ *)
+ usage "invalid option -- $OPTARG"
+ ;;
+ esac
+done
+
+shift $((OPTIND-1))
+
+[[ $(zonename) != "global" ]] && fatal "vndtest only runs in the global zone"
+
+[[ -z "$vt_all" && $# == 0 ]] && usage "no tests to run"
+
+[[ -z "$vt_outdir" ]] && vt_outdir="$PWD"
+
+setup_outdir
+setup_etherstub
+welcome
+
+if [[ ! -z "$vt_all" ]]; then
+ run_all
+else
+ for t in $@; do
+ [[ -f $t ]] || fatal "cannot find test $t"
+ run_single $t
+ done
+fi
+
+cleanup_etherstub
+goodbye
+cleanup
+
+#
+# Exit 1 if we have tests that return non-zero
+#
+[[ $vt_tfai -eq 0 ]]
diff --git a/usr/src/cmd/vndadm/test/tst/Makefile b/usr/src/cmd/vndadm/test/tst/Makefile
new file mode 100644
index 0000000000..9b1ba29429
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = cmd dld ioctl lib
+
+include ../Makefile.subdirs
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/Makefile b/usr/src/cmd/vndadm/test/tst/cmd/Makefile
new file mode 100644
index 0000000000..1ca20bf749
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/Makefile
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = cmd
+COMMONSH = cmd.common.ksh
+SHTESTS = $(COMMONSH) \
+ create.list.ksh \
+ create.sdev.ksh \
+ create.setbuf.ksh \
+ ecreate.destroy.ksh \
+ ecreate.setbadprop.ksh \
+ ecreate.setbadvalue.ksh \
+ ecreate.setbuftoobig.ksh \
+ ecreate.setrdonlyprop.ksh
+
+OUTFILES = create.list.ksh.out
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh b/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh
new file mode 100644
index 0000000000..31e4e8bf5c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh
@@ -0,0 +1,33 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Common ksh-based utilities
+#
+
+vt_arg0=$(basename $0)
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+[[ -z "$1" ]] && fatal "missing required vnic"
+[[ -z "$2" ]] && fatal "missing required vnic"
+[[ -z "$3" ]] && fatal "missing required vnic"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh
new file mode 100644
index 0000000000..fdec9a85be
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh
@@ -0,0 +1,30 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Basic device listing
+#
+
+. ./cmd.common.ksh
+
+#
+# Use what we hope is a relatively unique name
+#
+cl_name="triforceofcourage0"
+vndadm create -l $1 $cl_name || fatal "failed to create vnd device"
+vndadm list -p -o name,zone $cl_name
+vndadm list -p -d: -o zone,name $cl_name
+vndadm destroy $cl_name || fatal "failed to destroy vnd device"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out
new file mode 100644
index 0000000000..d208b38aab
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out
@@ -0,0 +1,2 @@
+triforceofcourage0 global
+global:triforceofcourage0
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh
new file mode 100644
index 0000000000..b816ade1de
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh
@@ -0,0 +1,25 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Verify that our sdev links exist
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd"
+[[ -c /dev/vnd/$1 ]] || fatal "missing link"
+[[ -c /dev/vnd/zone/$(zonename)/$1 ]] || fatal "missing per-zone link"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh
new file mode 100644
index 0000000000..d50edbead4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh
@@ -0,0 +1,34 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Set and validate the buffer size properties. Valiate that we can set
+# the value using the various number analogues, eg. 1024K, etc.
+#
+set -o pipefail
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxbuf=1M
+cur=$(vndadm get -p $1 rxbuf | nawk '{ print $4 }')
+[[ $? -eq 0 ]] || fatal "failed to get rxbuf"
+[[ $cur -eq 1048576 ]] || fatal "rxbuf is $cur, not 1M"
+
+vndadm set $1 txbuf=1024K
+cur=$(vndadm get -p $1 rxbuf | nawk '{ print $4 }')
+[[ $? -eq 0 ]] || fatal "failed to get txbuf"
+[[ $cur -eq 1048576 ]] || fatal "txbuf is $cur, not 1M"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh
new file mode 100644
index 0000000000..e3c4931018
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh
@@ -0,0 +1,25 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that destroy on a previously destroyed link fails
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm destroy $1 || fatal "failed to destroy vnd device"
+vndadm destroy $1
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh
new file mode 100644
index 0000000000..30c27575b1
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a non-existant proprety
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 ganon=ganondorf
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh
new file mode 100644
index 0000000000..056b24a817
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set something to a garbage value
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxbuf=hello
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh
new file mode 100644
index 0000000000..551e20461c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a buffer value to a ridiculous size
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxsize=1T
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh
new file mode 100644
index 0000000000..4beb53e227
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a read only property.
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 mintu=100
diff --git a/usr/src/cmd/vndadm/test/tst/dld/Makefile b/usr/src/cmd/vndadm/test/tst/dld/Makefile
new file mode 100644
index 0000000000..3088812630
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/Makefile
@@ -0,0 +1,27 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = dld
+COMMONSH = dld.common.ksh
+SHTESTS = $(COMMONSH) \
+ ecreate.ipfirst.ksh \
+ ecreate.vndfirst.ksh \
+ create.reuse.ksh
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh b/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh
new file mode 100644
index 0000000000..bc2ffde7f6
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh
@@ -0,0 +1,31 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can reuse a data link
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+vndadm destroy $dld_nic || fatal "failed to bring down vnd"
+ifconfig $dld_nic plumb up || fatal "failed to bring up IP"
+ifconfig $dld_nic down unplumb || fatal "failed to bring down IP"
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+vndadm destroy $dld_nic || fatal "failed to bring down vnd"
diff --git a/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh b/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh
new file mode 100644
index 0000000000..7a2e0a8e2b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh
@@ -0,0 +1,29 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Common ksh-based utilities
+#
+
+vt_arg0=$(basename $0)
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
diff --git a/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh b/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh
new file mode 100644
index 0000000000..e6409781cb
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh
@@ -0,0 +1,27 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure vnd fails to come up when IP is up
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+ifconfig $dld_nic plumb up || fatal "failed to bring up IP"
+vndadm create $dld_nic
diff --git a/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh b/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh
new file mode 100644
index 0000000000..ee7a13c09c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh
@@ -0,0 +1,27 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure IP fails to come up when vnd is up
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+ifconfig $dld_nic plumb up
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/Makefile b/usr/src/cmd/vndadm/test/tst/ioctl/Makefile
new file mode 100644
index 0000000000..fe074f32b0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = ioctl
+EXETESTS = \
+ create.attach.exe \
+ create.attachnolink.exe \
+ create.badlinkname.exe \
+ create.doublelink.exe \
+ create.gioctlattach.exe \
+ create.link.exe \
+ create.linkexists.exe \
+ create.ngioctlfault.exe \
+ create.nopriv1.exe \
+ create.nopriv2.exe \
+ create.nopriv3.exe \
+ create.nopriv4.exe \
+ create.olink.exe \
+ create.olinknopriv.exe \
+ create.rmenolink.exe \
+ tst.attachrdonly.exe \
+ tst.basicopenctl.exe \
+ tst.badioctl.exe \
+ tst.gioctlfault.exe \
+ tst.gioctlnattach.exe \
+ tst.openctlbadflags.exe
+SHTESTS = \
+ tst.iocsize.ksh
+SUPBOBJS =
+
+CLOBBERFILES = $(EXETESTS)
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c
new file mode 100644
index 0000000000..d7bca5cce3
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Simply attach a nic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c
new file mode 100644
index 0000000000..43c6c99af5
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c
@@ -0,0 +1,67 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Try to attach to a non-existant vnic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ /*
+ * All datalink names have numbers, so we can pick a datalink which
+ * doesn't exist by not using numbers...
+ */
+ (void) strlcpy(via.via_name, "enolink", VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(via.via_errno == VND_E_NODATALINK);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c
new file mode 100644
index 0000000000..e3a067d5ce
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c
@@ -0,0 +1,119 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Test that we can't link a nic with invalid names
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static const char *names[] = {
+ /* Reserved names */
+ "ctl",
+ "zone",
+ /* Invalid characters */
+ "The fight of the century",
+ "Link/Ganon",
+ "happens@7pm",
+ "#testing",
+ "asdf!!",
+ "power&courage&wisdom",
+ "over9000?",
+ "you're",
+ "100$",
+ "(function",
+ "x)",
+ "2^128",
+ "1++",
+ "No.",
+ "99%",
+ "*****",
+ "r|m",
+ "=0",
+ "`p0",
+ "goodbye~",
+ "however;",
+ "\"hesaid",
+ "shesaid\'",
+ /* emoji pile of poo */
+ "\xF0\x9F\x92\xA9",
+ NULL
+};
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret, i;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ for (i = 0; names[i] != NULL; i++) {
+ (void) strlcpy(vil.vil_name, names[i], VND_NAMELEN);
+ (void) fprintf(stderr, "Trying to create [%s]\n", names[i]);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_BADNAME);
+ }
+
+ /* Finally, the missing null terminator */
+ for (i = 0; i < VND_NAMELEN; i++)
+ vil.vil_name[i] = 'a';
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_BADNAME);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == -1);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c
new file mode 100644
index 0000000000..dcf4f311e9
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c
@@ -0,0 +1,82 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Link a nic, first should work, second will fail.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_LINKED);
+ viu.viu_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c
new file mode 100644
index 0000000000..3d6f43377b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that we can't run global ioctls on an attached handle
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ via.via_name[0] = 'a';
+ via.via_name[1] = '\0';
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(via.via_errno == VND_E_ATTACHED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c
new file mode 100644
index 0000000000..16569d58cd
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Link a nic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c
new file mode 100644
index 0000000000..4e3be0db5d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c
@@ -0,0 +1,90 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Try to create two devices with the same link name.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, fd2, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+ fd2 = open(VND_PATH, O_RDWR);
+ assert(fd2 > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(via.via_name, argv[2], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd2, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd2, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_LINKEXISTS);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c
new file mode 100644
index 0000000000..bf174f1a8f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c
@@ -0,0 +1,96 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Pass bad addresses to all of our non-global ioctls
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static int requests[] = {
+ VND_IOC_LINK,
+ VND_IOC_UNLINK,
+ VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF,
+ VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF,
+ VND_IOC_GETMINTU,
+ VND_IOC_GETMAXTU,
+ VND_IOC_GETMAXBUF,
+ -1
+};
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret, i;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ for (i = 0; requests[i] != -1; i++) {
+ ret = ioctl(fd, requests[i], (void *)(uintptr_t)i);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ }
+
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c
new file mode 100644
index 0000000000..6d5ad0eec2
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_CONFIG
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <string.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c
new file mode 100644
index 0000000000..6b38f159a0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c
new file mode 100644
index 0000000000..a8c43fc46d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c
@@ -0,0 +1,70 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_CONFIG and PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c
new file mode 100644
index 0000000000..aed0204544
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c
@@ -0,0 +1,75 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to link a device without PRIV_NET_CONFIG
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c
new file mode 100644
index 0000000000..2db8ecc95f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to open a device without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, fd2, ret;
+ priv_set_t *ps;
+ char *path;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ (void) asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(path != NULL);
+ fd2 = open(path, O_RDWR);
+ assert(fd2 == -1);
+ assert(errno == EPERM);
+
+ free(path);
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c
new file mode 100644
index 0000000000..0f9292bbae
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Open a /dev/vnd/%s link
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ char *path;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ ret = asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(ret != -1);
+
+ ret = open(path, O_RDONLY);
+ assert(ret > 0);
+ assert(close(ret) == 0);
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c
new file mode 100644
index 0000000000..338218e751
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c
@@ -0,0 +1,83 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to open a /dev/vnd/%s without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <priv.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ char *path;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ ret = asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(ret != -1);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ ret = open(path, O_RDWR);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c
new file mode 100644
index 0000000000..d44e6512a7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that unlink fails when we're not linked.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == -1);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c
new file mode 100644
index 0000000000..29def6182d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach when /dev/vnd/ctl is opened read only.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EBADF);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c
new file mode 100644
index 0000000000..f26722f035
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c
@@ -0,0 +1,79 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Throw a bunch of bad ioctls at us and make sure that we get ENOTTY.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+
+/*
+ * We're including a bunch of bad header files that have ioctl numbers that we
+ * know we shouldn't.
+ */
+#include <sys/ipd.h>
+#include <sys/dtrace.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+/*
+ * A series of bad requests
+ */
+static int requests[] = {
+ 0,
+ 1,
+ 42,
+ 169,
+ 4096,
+ INT_MAX,
+ IPDIOC_CORRUPT,
+ IPDIOC_REMOVE,
+ DTRACEIOC_CONF,
+ DTRACEIOC_REPLICATE,
+ -1
+};
+
+int
+main(void)
+{
+ int fd, i;
+
+ fd = open(VND_PATH, O_RDONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ for (i = 0; requests[i] != -1; i++) {
+ int ret;
+ ret = ioctl(fd, requests[i], NULL);
+ assert(ret == -1);
+ assert(errno == ENOTTY);
+ }
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c
new file mode 100644
index 0000000000..852ad5550f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that we can do a basic open of the device for read, write, and
+ * read/write.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd;
+
+ fd = open(VND_PATH, O_RDONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read/write: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s write only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c
new file mode 100644
index 0000000000..b581b5dd4c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Pass pointers to arbitrary addresses and make sure we properly get EFAULT for
+ * all the global ioctls.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd, ret;
+ vnd_ioc_attach_t *via;
+ vnd_ioc_list_t *vil;
+ vnd_ioc_buf_t *vib;
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s r/w: %s\n", VND_PATH,
+ strerror(errno));
+ return (1);
+ }
+
+ via = (vnd_ioc_attach_t *)(uintptr_t)23;
+ vil = (vnd_ioc_list_t *)(uintptr_t)42;
+ vib = (vnd_ioc_buf_t *)(uintptr_t)169;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_LIST, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_GETMAXBUF, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+
+ ret = ioctl(fd, VND_IOC_ATTACH, via);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_LIST, vil);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_GETMAXBUF, vib);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c
new file mode 100644
index 0000000000..98acffa194
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c
@@ -0,0 +1,100 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Here we test that all the ioctls which require us to be on a local device
+ * fail to work. Specifically, the errno should be VND_E_NOTATTACHED
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static int vib_ioc[] = {
+ VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF,
+ VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF,
+ VND_IOC_GETMINTU,
+ VND_IOC_GETMAXTU,
+ -1
+};
+
+int
+main(void)
+{
+ int fd, ret, i;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+ vnd_ioc_buf_t vib;
+ frameio_t *fio;
+ char buf[1];
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s r/w: %s\n", VND_PATH,
+ strerror(errno));
+ return (1);
+ }
+
+ bzero(&vil, sizeof (vnd_ioc_link_t));
+ vil.vil_name[0] = 'a';
+ bzero(&viu, sizeof (vnd_ioc_unlink_t));
+ bzero(&vib, sizeof (vnd_ioc_buf_t));
+ fio = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ assert(fio != NULL);
+ fio->fio_version = FRAMEIO_CURRENT_VERSION;
+ fio->fio_nvpf = 1;
+ fio->fio_nvecs = 1;
+ fio->fio_vecs[0].fv_buf = buf;
+ fio->fio_vecs[0].fv_buflen = 1;
+
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(vil.vil_errno == VND_E_NOTATTACHED);
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ for (i = 0; vib_ioc[i] != -1; i++) {
+ bzero(&vib, sizeof (vib));
+ ret = ioctl(fd, vib_ioc[i], &vib);
+ assert(vib.vib_errno == VND_E_NOTATTACHED);
+ }
+
+ /* The frameio ioctls only use standard errnos */
+ ret = ioctl(fd, VND_IOC_FRAMEIO_READ, fio);
+ assert(ret == -1);
+ assert(errno == ENXIO);
+ ret = ioctl(fd, VND_IOC_FRAMEIO_WRITE, fio);
+ assert(ret == -1);
+ assert(errno == ENXIO);
+
+ free(fio);
+ assert(close(fd) == 0);
+
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh b/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh
new file mode 100644
index 0000000000..9b30043d47
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh
@@ -0,0 +1,54 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Ensure structure sizes for both ILP32 and LP64 are the same
+#
+
+vt_arg0=$(basename $0)
+vt_structs="vnd_ioc_attach_t vnd_ioc_link_t vnd_ioc_unlink_t"
+vt_structs="$vt_structs vnd_ioc_nonblock_t vnd_ioc_buf_t vnd_ioc_info_t"
+
+vt_t32="/tmp/vnd.iocsize.32.$$"
+vt_t64="/tmp/vnd.iocsize.64.$$"
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+function dump_types
+{
+ typeset file=$1
+ typeset lib=$2
+ typeset t
+
+ for t in $vn_structs; do
+ mdb -e \'::print -at $t\' $lib >> $file || fatal \
+ "failed to dump type $t from $lib"
+ done
+}
+
+rm -f $vt_t32 $vt_t64 || fatal "failed to cleanup old temp files"
+touch $vt_t32 $vt_t64 || fatal "failed to create temp files"
+
+dump_types $vt_t32 /usr/lib/libvnd.so.1
+dump_types $vt_t64 /usr/lib/64/libvnd.so.1
+
+diff $vt_t32 $vt_t64
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c
new file mode 100644
index 0000000000..65e48029b7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c
@@ -0,0 +1,88 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't open the vnd control device with invalid flags.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd;
+
+ fd = open(VND_PATH, O_RDONLY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/Makefile b/usr/src/cmd/vndadm/test/tst/lib/Makefile
new file mode 100644
index 0000000000..d7a1ed8fa5
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = lib
+EXETESTS = \
+ create.basic.exe \
+ create.badlink.exe \
+ create.badpropid.exe \
+ create.badpropsize.exe \
+ create.badzone.exe \
+ create.enomem.exe \
+ create.frameioeagain.exe \
+ create.open.exe \
+ create.propiter.exe \
+ create.proprdonly.exe \
+ err.badclose.exe \
+ tst.badopen.exe \
+ tst.strerror.exe \
+ tst.strsyserror.exe
+OUTFILES = tst.strerror.exe.out
+SHTESTS =
+SUPBOBJS =
+
+CLOBBERFILES = $(EXETESTS)
+
+include ../../Makefile.com
+
+LDLIBS += -lvnd
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c b/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c
new file mode 100644
index 0000000000..aefec3ed44
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't create something in the context of a datalink that
+ * doesn't exist.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ vhp = vnd_create(NULL, "foobar", "foobar", &vnderr, &syserr);
+ (void) printf("%d, %d\n", vnderr, syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NODATALINK);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c b/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c
new file mode 100644
index 0000000000..15334fa31c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't get and set nonexisting properties.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MAX, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MAX + 5, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MAX, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MAX + 5, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_writeable(VND_PROP_MAX, NULL);
+ assert(ret == -1);
+
+ ret = vnd_prop_writeable(VND_PROP_MAX + 5, NULL);
+ assert(ret == -1);
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c b/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c
new file mode 100644
index 0000000000..d5fefd3764
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Validate that we can't set properties with bogus sizes.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret, i;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ ret = vnd_prop_get(vhp, i, NULL, INT32_MAX);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROPSIZE);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, i, NULL, INT32_MAX);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROPSIZE);
+ assert(vnd_syserrno(vhp) == 0);
+ }
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c b/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c
new file mode 100644
index 0000000000..30f9612963
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c
@@ -0,0 +1,43 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't create something in the context of a zone that
+ * doesn't exist.
+ */
+
+#include <assert.h>
+#include <sys/zone.h>
+#include <string.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ char zname[ZONENAME_MAX+4];
+ vnd_handle_t *vhp;
+
+ (void) memset(zname, 'a', sizeof (zname));
+ zname[ZONENAME_MAX+3] = '\0';
+
+ vhp = vnd_create(zname, "foobar", "foobar", &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NOZONE);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.basic.c b/usr/src/cmd/vndadm/test/tst/lib/create.basic.c
new file mode 100644
index 0000000000..5335f8cbb4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.basic.c
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Simple create and destroy.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c b/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c
new file mode 100644
index 0000000000..9203e369ae
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c
@@ -0,0 +1,91 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that we can't allocate a handle when in an ENOMEM situation.
+ */
+
+#include <procfs.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/sysmacros.h>
+#include <assert.h>
+#include <strings.h>
+
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int fd;
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ pstatus_t status;
+ void *addr;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open("/proc/self/status", O_RDONLY);
+ if (fd < 0)
+ exit(1);
+ if (read(fd, &status, sizeof (status)) != sizeof (status))
+ exit(1);
+
+ addr = mmap((caddr_t)P2ROUNDUP(status.pr_brkbase +
+ status.pr_brksize, 0x1000), 0x1000,
+ PROT_READ, MAP_ANON | MAP_FIXED | MAP_PRIVATE, -1, 0);
+ if (addr == (void *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* malloc an approximate size of the vnd_handle_t */
+ for (;;) {
+ void *buf;
+
+ buf = malloc(8);
+ if (buf == NULL)
+ break;
+ }
+
+ for (;;) {
+ void *buf;
+
+ buf = malloc(4);
+ if (buf == NULL)
+ break;
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NOMEM);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c b/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c
new file mode 100644
index 0000000000..6cb14fb7df
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c
@@ -0,0 +1,80 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Create a datalink, set it to non-blocking mode and ensure that we get EAGAIN
+ * from frame I/O calls. Note that if this test is not plumbed up over an
+ * etherstub, then it is likely that other traffic will appear on the device and
+ * this will fail. Note that the test suite always creates these devices over an
+ * etherstub.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret, fd;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ frameio_t *fio;
+ char buf[1520];
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ fd = vnd_pollfd(vhp);
+ ret = fcntl(fd, F_SETFL, O_NONBLOCK);
+ assert(ret == 0);
+
+ fio = malloc(sizeof (frameio_t) +
+ sizeof (framevec_t));
+ assert(fio != NULL);
+ fio->fio_version = FRAMEIO_CURRENT_VERSION;
+ fio->fio_nvpf = 1;
+ fio->fio_nvecs = 1;
+
+ fio->fio_vecs[0].fv_buf = buf;
+ fio->fio_vecs[0].fv_buflen = sizeof (buf);
+
+ ret = vnd_frameio_read(vhp, fio);
+ (void) printf("%d, %d\n", ret, errno);
+ assert(ret == -1);
+ assert(errno == EAGAIN);
+
+ vnd_close(vhp);
+ free(fio);
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.open.c b/usr/src/cmd/vndadm/test/tst/lib/create.open.c
new file mode 100644
index 0000000000..9cb1d7e40e
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.open.c
@@ -0,0 +1,56 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure we can open a created datalink.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp, *vhp2;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ vhp2 = vnd_open(NULL, argv[1], &vnderr, &syserr);
+ assert(vhp2 != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ vnd_close(vhp2);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c b/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c
new file mode 100644
index 0000000000..a0b46180f7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c
@@ -0,0 +1,79 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that vnd_prop_iter sees all props;
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <libvnd.h>
+
+static boolean_t *g_props;
+
+/* ARGSUSED */
+static int
+prop_cb(vnd_handle_t *vhp, vnd_prop_t prop, void *unused)
+{
+ assert(prop < VND_PROP_MAX);
+ g_props[prop] = B_TRUE;
+
+ return (0);
+}
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, i, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ g_props = malloc(sizeof (boolean_t) * VND_PROP_MAX);
+ if (g_props == NULL) {
+ (void) fprintf(stderr, "failed to alloc memory for %d "
+ "boolean_t\n", VND_PROP_MAX);
+ return (1);
+ }
+ for (i = 0; i < VND_PROP_MAX; i++)
+ g_props[i] = B_FALSE;
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_iter(vhp, prop_cb, NULL);
+ assert(ret == 0);
+
+ for (i = 0; i < VND_PROP_MAX; i++)
+ assert(g_props[i] == B_TRUE);
+
+ free(g_props);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c b/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c
new file mode 100644
index 0000000000..18b1f7d58d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Validate that we can't set read only properties
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ vnd_prop_buf_t vpb;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MINTU, &vpb,
+ sizeof (vnd_prop_buf_t));
+ assert(ret == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MINTU, &vpb,
+ sizeof (vnd_prop_buf_t));
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_PROPRDONLY);
+ assert(vnd_syserrno(vhp) == 0);
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c b/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c
new file mode 100644
index 0000000000..8c832506a0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c
@@ -0,0 +1,33 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This program should segfault.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp = (void *)0x42;
+ vnd_close(vhp);
+ /* This should not be reached */
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c b/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c
new file mode 100644
index 0000000000..4f67ce79ed
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure we can't open a vnd device that doesn't exist
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, argv[1], &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_SYS);
+ assert(syserr == ENOENT);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c
new file mode 100644
index 0000000000..a99a9ecbf6
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c
@@ -0,0 +1,30 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that all the error strings we care about match what we expect.
+ */
+
+#include <stdio.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int i;
+ for (i = 0; i <= VND_E_UNKNOWN + 1; i++)
+ (void) printf("[%s]\n", vnd_strerror(i));
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out
new file mode 100644
index 0000000000..83dbcdfdb4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out
@@ -0,0 +1,37 @@
+[no error]
+[not enough memory available]
+[no such datalink]
+[datalink not of type DL_ETHER]
+[unknown dlpi failure]
+[DL_ATTACH_REQ failed]
+[DL_BIND_REQ failed]
+[DL_PROMISCON_REQ failed]
+[DLD_CAPAB_DIRECT enable failed]
+[bad datalink capability]
+[bad datalink subcapability]
+[bad dld version]
+[failed to create kstats]
+[no such vnd link]
+[netstack doesn't exist]
+[device already associated]
+[device already attached]
+[device already linked]
+[invalid name]
+[permission denied]
+[no such zone]
+[failed to initialize vnd stream module]
+[device not attached]
+[device not linked]
+[another device has the same link name]
+[failed to create minor node]
+[requested buffer size is too large]
+[requested buffer size is too small]
+[unable to obtain exclusive access to dlpi link, link busy]
+[DLD direct capability not supported over data link]
+[invalid property size]
+[invalid property]
+[property is read only]
+[unexpected system error]
+[capabilities invalid, pass-through module detected]
+[unknown error]
+[unknown error]
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c b/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c
new file mode 100644
index 0000000000..b95e6372e4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that the error message from libvnd's strsyserrno is the same as the
+ * underlying strerror function's. It should be. We'll just check an assortment
+ * of errnos.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int i;
+ const char *vnd, *libc;
+ for (i = 0; i < 42; i++) {
+ vnd = vnd_strsyserror(i);
+ libc = strerror(i);
+ if ((vnd != NULL && libc == NULL) ||
+ (vnd == NULL && libc != NULL)) {
+ (void) fprintf(stderr, "errno %d, vnd: %p, libc: %p",
+ i, (void *)vnd, (void *)libc);
+ return (1);
+ }
+ if (vnd != NULL && strcmp(vnd, libc) != 0) {
+ (void) fprintf(stderr,
+ "errno %d: libc and vnd disagree.\n", i);
+ (void) fprintf(stderr, "vnd: %s\n", vnd);
+ (void) fprintf(stderr, "libc: %s\n", libc);
+ return (1);
+ }
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/vndadm.c b/usr/src/cmd/vndadm/vndadm.c
new file mode 100644
index 0000000000..6811663696
--- /dev/null
+++ b/usr/src/cmd/vndadm/vndadm.c
@@ -0,0 +1,872 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include <libvnd.h>
+
+typedef int (*vndadm_print_t)(vnd_handle_t *, vnd_prop_t);
+typedef int (*vndadm_parse_t)(char *, void **, size_t *);
+
+typedef struct vndadm_proptbl {
+ const char *vp_name;
+ vndadm_print_t vp_print;
+ vndadm_parse_t vp_parse;
+} vndadm_proptbl_t;
+
+/*
+ * Forwards
+ */
+static int usage(const char *, ...);
+static int vndadm_print_size(vnd_handle_t *, vnd_prop_t);
+static int vndadm_print_number(vnd_handle_t *, vnd_prop_t);
+static int vndadm_parse_size(char *, void **, size_t *);
+
+/*
+ * Globals
+ */
+static char *vnd_pname;
+
+static void
+vnd_vwarn(vnd_errno_t verr, int syserr, const char *format, va_list alist)
+{
+ (void) fprintf(stderr, "%s: ", vnd_pname);
+ (void) vfprintf(stderr, format, alist);
+ if (strchr(format, '\n') == NULL) {
+ (void) fprintf(stderr, ": %s\n", verr != VND_E_SYS ?
+ vnd_strerror(verr) : vnd_strsyserror(syserr));
+ }
+}
+
+static void
+vnd_libwarn(vnd_errno_t verr, int syserr, const char *format, ...)
+{
+ va_list alist;
+
+ va_start(alist, format);
+ vnd_vwarn(verr, syserr, format, alist);
+ va_end(alist);
+}
+
+static void
+vnd_warn(const char *format, ...)
+{
+ va_list alist;
+
+ va_start(alist, format);
+ vnd_vwarn(0, 0, format, alist);
+ va_end(alist);
+}
+
+static vndadm_proptbl_t vndadm_propname_tbl[] = {
+ { "rxbuf", vndadm_print_size,
+ vndadm_parse_size }, /* VND_PROP_RXBUF */
+ { "txbuf", vndadm_print_size,
+ vndadm_parse_size }, /* VND_PROP_TXBUF */
+ { "maxsize", vndadm_print_size, NULL }, /* VND_PROP_MAXBUF */
+ { "mintu", vndadm_print_number, NULL }, /* VND_PROP_MINTU */
+ { "maxtu", vndadm_print_number, NULL }, /* VND_PROP_MAXTU */
+ NULL /* VND_PROP_MAX */
+};
+
+static const char *
+vndadm_prop_to_name(vnd_prop_t prop)
+{
+ if (prop > VND_PROP_MAX)
+ return (NULL);
+
+ return (vndadm_propname_tbl[prop].vp_name);
+}
+
+static vnd_prop_t
+vndadm_name_to_prop(const char *name)
+{
+ int i;
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ if (strcmp(name, vndadm_propname_tbl[i].vp_name) == 0)
+ return (i);
+ }
+
+ return (VND_PROP_MAX);
+}
+
+static int
+vndadm_print_size(vnd_handle_t *vhp, vnd_prop_t prop)
+{
+ vnd_prop_buf_t buf;
+
+ if (vnd_prop_get(vhp, prop, &buf, sizeof (buf)) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to get property %s", vndadm_prop_to_name(prop));
+ return (1);
+ }
+
+ (void) printf("%lld", buf.vpb_size);
+ return (0);
+}
+
+static int
+vndadm_print_number(vnd_handle_t *vhp, vnd_prop_t prop)
+{
+ vnd_prop_buf_t buf;
+
+ if (vnd_prop_get(vhp, prop, &buf, sizeof (buf)) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to get property %s", vndadm_prop_to_name(prop));
+ return (1);
+ }
+
+ (void) printf("%lld", buf.vpb_size);
+ return (0);
+}
+
+static int
+vndadm_parse_size(char *str, void **bufp, size_t *sizep)
+{
+ char *end;
+ unsigned long long val, orig;
+ vnd_prop_buf_t *buf;
+
+ errno = 0;
+ val = strtoull(str, &end, 10);
+ if (errno != 0) {
+ vnd_warn("%s: not a number\n", str);
+ return (1);
+ }
+
+ orig = val;
+ switch (*end) {
+ case 'g':
+ case 'G':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ /*FALLTHRU*/
+ case 'm':
+ case 'M':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ /*FALLTHRU*/
+ case 'k':
+ case 'K':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ end++;
+ break;
+ default:
+ break;
+ }
+
+ if (*end == 'b' || *end == 'B')
+ end++;
+ if (*end != '\0') {
+ vnd_warn("%s: not a number", str);
+ return (1);
+ }
+
+ buf = malloc(sizeof (vnd_prop_buf_t));
+ if (buf == NULL) {
+ vnd_warn("failed to allocate memory for setting a property");
+ return (1);
+ }
+
+ buf->vpb_size = val;
+ *bufp = buf;
+ *sizep = sizeof (vnd_prop_buf_t);
+
+ return (0);
+
+overflow:
+ vnd_warn("value overflowed: %s\n", str);
+ return (1);
+}
+
+static void
+vndadm_create_usage(FILE *out)
+{
+ (void) fprintf(out, "\tcreate:\t\t[-z zonename] -l datalink name\n");
+}
+
+static int
+vndadm_create(int argc, char *argv[])
+{
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *datalink = NULL;
+ const char *linkname = NULL;
+ const char *zonename = NULL;
+ vnd_handle_t *vhp;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:l:")) != -1) {
+ switch (c) {
+ case 'l':
+ datalink = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ return (usage("missing required link name\n"));
+ } else if (argc > 1) {
+ return (usage("create: too many arguments for link name, "
+ "pick one\n"));
+ }
+ linkname = argv[0];
+ if (datalink == NULL)
+ datalink = linkname;
+
+ vhp = vnd_create(zonename, datalink, linkname, &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr,
+ "failed to create datapath link %s", linkname);
+ return (1);
+ }
+
+ vnd_close(vhp);
+ return (0);
+}
+
+static void
+vndadm_destroy_usage(FILE *out)
+{
+ (void) fprintf(out, "\tdestroy:\t[-z zonename] [link]...\n");
+}
+
+static int
+vndadm_destroy(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:")) != -1) {
+ switch (c) {
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1) {
+ return (usage("extraneous arguments\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ if (vnd_unlink(vhp) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to destroy link %s", argv[0]);
+ return (1);
+ }
+
+ vnd_close(vhp);
+ return (0);
+}
+
+static void
+vndadm_list_usage(FILE *out)
+{
+ (void) fprintf(out, "\tlist:\t\t[-p] [-d delim] [-o field,...] "
+ "[-z zonename] [link]...\n");
+}
+
+#define VNDADM_LIST_NFIELDS 3
+
+typedef struct vndadm_list_cb {
+ int vsc_argc;
+ char **vsc_argv;
+ int vsc_found;
+ boolean_t vsc_parse;
+ const char *vsc_delim;
+ int vsc_order[VNDADM_LIST_NFIELDS];
+ int vsc_last;
+ zoneid_t vsc_zid;
+} vndadm_list_cb_t;
+
+typedef struct vndadm_list_field {
+ const char *vlf_name;
+ const char *vlf_header;
+ int vlf_size;
+ void (*vlf_print)(struct vndadm_list_field *, vnd_info_t *, boolean_t);
+ void (*vlf_parse)(struct vndadm_list_field *, vnd_info_t *, boolean_t);
+} vndadm_list_field_t;
+
+static void
+vlf_print_link(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ if (last == B_TRUE) {
+ (void) printf("%s", viip->vi_name);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, viip->vi_name);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_link(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ (void) printf("%s", viip->vi_name);
+}
+
+static void
+vlf_print_datalink(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ if (last == B_TRUE) {
+ (void) printf("%s", viip->vi_datalink);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, viip->vi_datalink);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_datalink(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ (void) printf("%s", viip->vi_datalink);
+}
+
+static void
+vlf_print_zone(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ char buf[ZONENAME_MAX];
+
+ if (getzonenamebyid(viip->vi_zone, buf, sizeof (buf)) <= 0)
+ (void) strlcpy(buf, "<unknown>", sizeof (buf));
+
+ if (last == B_TRUE) {
+ (void) printf("%s", buf);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, buf);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_zone(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ char buf[ZONENAME_MAX];
+
+ if (getzonenamebyid(viip->vi_zone, buf, sizeof (buf)) <= 0)
+ (void) strlcpy(buf, "<unknown>", sizeof (buf));
+
+ (void) printf("%s", buf);
+}
+
+static vndadm_list_field_t vlf_tbl[] = {
+ { "name", "NAME", 16, vlf_print_link, vlf_parse_link },
+ { "datalink", "DATALINK", 16, vlf_print_datalink, vlf_parse_datalink },
+ { "zone", "ZONENAME", 32, vlf_print_zone, vlf_parse_zone },
+ { NULL }
+};
+
+
+static int
+vndadm_list_f(vnd_info_t *viip, void *arg)
+{
+ int i;
+ boolean_t found;
+ vndadm_list_cb_t *vscp = arg;
+
+ if (vscp->vsc_zid != ALL_ZONES && vscp->vsc_zid != viip->vi_zone)
+ return (0);
+
+ if (vscp->vsc_argc != 0) {
+ found = B_FALSE;
+ for (i = 0; i < vscp->vsc_argc; i++) {
+ if (strcmp(viip->vi_name, vscp->vsc_argv[i]) == 0) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE)
+ return (0);
+ vscp->vsc_found++;
+ }
+
+ for (i = 0; i < VNDADM_LIST_NFIELDS && vscp->vsc_order[i] != -1; i++) {
+ boolean_t last = i == vscp->vsc_last;
+ if (vscp->vsc_parse == B_TRUE)
+ vlf_tbl[vscp->vsc_order[i]].vlf_parse(
+ &vlf_tbl[vscp->vsc_order[i]], viip, last);
+ else
+ vlf_tbl[vscp->vsc_order[i]].vlf_print(
+ &vlf_tbl[vscp->vsc_order[i]], viip, last);
+
+ if (last == B_FALSE)
+ (void) printf("%s", vscp->vsc_delim);
+ }
+ (void) printf("\n");
+
+ return (0);
+}
+
+static int
+vndadm_list(int argc, char *argv[])
+{
+ int c, i, syserr;
+ vnd_errno_t vnderr;
+ boolean_t parse = B_FALSE;
+ const char *zonename = NULL, *delim = NULL;
+ char *fields = NULL;
+ vndadm_list_cb_t vsc;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":pd:o:z:")) != -1) {
+ switch (c) {
+ case 'p':
+ parse = B_TRUE;
+ break;
+ case 'd':
+ delim = optarg;
+ break;
+ case 'o':
+ fields = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ vsc.vsc_argc = argc;
+ vsc.vsc_argv = argv;
+ vsc.vsc_found = 0;
+ if (zonename != NULL) {
+ vsc.vsc_zid = getzoneidbyname(zonename);
+ if (vsc.vsc_zid == -1) {
+ vnd_warn("no such zone: %s\n", zonename);
+ return (1);
+ }
+ } else {
+ vsc.vsc_zid = ALL_ZONES;
+ }
+
+ /* Sanity check parseable related stuff */
+ if (delim != NULL && parse == B_FALSE) {
+ return (usage("-d cannot be used without -p\n"));
+ }
+
+ if (parse == B_TRUE && fields == NULL) {
+ return (usage("-p cannot be used without -o\n"));
+ }
+
+ /* validate our fields, if any */
+ if (fields != NULL) {
+ char *c, *n;
+ int floc = 0;
+
+ c = fields;
+ for (;;) {
+ if (floc >= VNDADM_LIST_NFIELDS) {
+ return (usage("too many fields specified "
+ "for -o\n"));
+ }
+
+ n = strchr(c, ',');
+ if (n != NULL)
+ *n = '\0';
+
+ for (i = 0; i < VNDADM_LIST_NFIELDS; i++) {
+ if (strcasecmp(c, vlf_tbl[i].vlf_name) == 0)
+ break;
+ }
+ if (i == VNDADM_LIST_NFIELDS) {
+ vnd_warn("invalid field for -o: %s\nvalid "
+ "fields are:", c);
+ for (i = 0; i < VNDADM_LIST_NFIELDS; i++)
+ vnd_warn(" %s", vlf_tbl[i].vlf_name);
+ vnd_warn("\n");
+ return (usage(NULL));
+ }
+ vsc.vsc_order[floc] = i;
+ floc++;
+
+ if (n == NULL)
+ break;
+ c = n + 1;
+ }
+
+ vsc.vsc_last = floc - 1;
+ while (floc < VNDADM_LIST_NFIELDS)
+ vsc.vsc_order[floc++] = -1;
+ } else {
+ vsc.vsc_order[0] = 0;
+ vsc.vsc_order[1] = 1;
+ vsc.vsc_order[2] = 2;
+ }
+
+ vsc.vsc_parse = parse;
+ vsc.vsc_delim = delim;
+ if (vsc.vsc_delim == NULL)
+ vsc.vsc_delim = " ";
+
+ if (vsc.vsc_parse != B_TRUE) {
+ for (i = 0; i < VNDADM_LIST_NFIELDS && vsc.vsc_order[i] != -1;
+ i++) {
+ if (i + 1 == VNDADM_LIST_NFIELDS) {
+ (void) printf("%s\n",
+ vlf_tbl[vsc.vsc_order[i]].vlf_header);
+ continue;
+ }
+ (void) printf("%-*s ",
+ vlf_tbl[vsc.vsc_order[i]].vlf_size,
+ vlf_tbl[vsc.vsc_order[i]].vlf_header);
+ }
+ }
+
+ if (vnd_walk(vndadm_list_f, &vsc, &vnderr, &syserr) != 0) {
+ vnd_libwarn(vnderr, syserr, "failed to walk vnd links");
+ return (1);
+ }
+
+ if (argc > 0 && vsc.vsc_found == 0) {
+ vnd_warn("no links matched requested names\n");
+ return (1);
+ }
+
+ return (0);
+}
+
+typedef struct vndadm_get {
+ boolean_t vg_parse;
+ const char *vg_delim;
+ const char *vg_link;
+ int vg_argc;
+ char **vg_argv;
+} vndadm_get_t;
+
+static int
+vndadm_get_cb(vnd_handle_t *vhp, vnd_prop_t prop, void *arg)
+{
+ boolean_t writeable;
+ const char *perm;
+ vndadm_get_t *vgp = arg;
+ const char *name = vndadm_prop_to_name(prop);
+
+ /* Verify if this is a prop we're supposed to print */
+ if (vgp->vg_argc > 0) {
+ int i;
+ boolean_t found = B_FALSE;
+ for (i = 0; i < vgp->vg_argc; i++) {
+ if (strcmp(name, vgp->vg_argv[i]) == 0) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE)
+ return (0);
+ }
+
+ if (vnd_prop_writeable(prop, &writeable) != 0)
+ abort();
+
+ perm = writeable ? "rw" : "r-";
+
+ if (vgp->vg_parse == B_TRUE) {
+ (void) printf("%s%s%s%s%s%s", vgp->vg_link, vgp->vg_delim,
+ name, vgp->vg_delim, perm, vgp->vg_delim);
+ } else {
+ (void) printf("%-13s %-16s %-5s ", vgp->vg_link, name, perm);
+ }
+
+ if (vndadm_propname_tbl[prop].vp_print != NULL) {
+ if (vndadm_propname_tbl[prop].vp_print(vhp, prop) != 0)
+ return (1);
+ } else {
+ (void) printf("-");
+ }
+ (void) printf("\n");
+ return (0);
+}
+
+static int
+vndadm_get(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ boolean_t parse = B_FALSE;
+ vndadm_get_t vg;
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL, *delim = NULL;
+
+ if (argc <= 0) {
+ return (usage("get requires a link name\n"));
+ }
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":pd:z:")) != -1) {
+ switch (c) {
+ case 'p':
+ parse = B_TRUE;
+ break;
+ case 'd':
+ delim = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ return (usage("missing required link\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ vg.vg_argc = argc - 1;
+ vg.vg_argv = argv + 1;
+ vg.vg_link = argv[0];
+ vg.vg_parse = parse;
+ vg.vg_delim = delim != NULL ? delim : " ";
+ if (vg.vg_parse == B_FALSE)
+ (void) printf("%-13s %-16s %-5s %s\n", "LINK", "PROPERTY",
+ "PERM", "VALUE");
+
+ if (vnd_prop_iter(vhp, vndadm_get_cb, &vg) != 0)
+ return (1);
+
+ return (0);
+}
+
+static void
+vndadm_get_usage(FILE *out)
+{
+ (void) fprintf(out,
+ "\tget:\t\t[-p] [-d delim] [-z zonename] link [prop]...\n");
+}
+
+static int
+vndadm_set(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ int c, i, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:")) != -1) {
+ switch (c) {
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2) {
+ return (usage("missing arguments to set\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ for (i = 1; i < argc; i++) {
+ char *eq, *key, *value;
+ boolean_t writeable;
+ vnd_prop_t prop;
+ void *buf;
+ size_t psize;
+ int ret;
+
+ key = argv[i];
+ eq = strchr(key, '=');
+ if (eq == NULL) {
+ vnd_warn("invalid property name=value: %s\n", key);
+ return (1);
+ }
+ *eq = '\0';
+ value = eq + 1;
+ if (*value == '\0') {
+ vnd_warn("property value required for %s\n", key);
+ return (1);
+ }
+ prop = vndadm_name_to_prop(key);
+ if (prop == VND_PROP_MAX) {
+ vnd_warn("unknown property: %s\n", key);
+ return (1);
+ }
+
+ if (vnd_prop_writeable(prop, &writeable) != 0)
+ abort();
+ if (writeable != B_TRUE) {
+ vnd_warn("property %s is read-only\n", key);
+ return (1);
+ }
+ assert(vndadm_propname_tbl[prop].vp_parse != NULL);
+
+ /*
+ * vp_parse functions should say what explicitly is invalid. We
+ * should indicate that the property failed.
+ */
+ ret = vndadm_propname_tbl[prop].vp_parse(value, &buf, &psize);
+ if (ret != 0) {
+ vnd_warn("failed to set property %s\n", key);
+ return (1);
+ }
+
+ ret = vnd_prop_set(vhp, prop, buf, psize);
+ free(buf);
+ if (ret != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to set property %s", key);
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+static void
+vndadm_set_usage(FILE *out)
+{
+ (void) fprintf(out, "\tset:\t\t[-z zonename] link prop=val...\n");
+}
+
+typedef struct vnd_cmdtab {
+ const char *vc_name;
+ int (*vc_op)(int, char *[]);
+ void (*vc_usage)(FILE *);
+} vnd_cmdtab_t;
+
+static vnd_cmdtab_t vnd_tab[] = {
+ { "create", vndadm_create, vndadm_create_usage },
+ { "destroy", vndadm_destroy, vndadm_destroy_usage },
+ { "list", vndadm_list, vndadm_list_usage },
+ { "get", vndadm_get, vndadm_get_usage },
+ { "set", vndadm_set, vndadm_set_usage },
+ { NULL, NULL }
+};
+
+static int
+usage(const char *format, ...)
+{
+ vnd_cmdtab_t *tab;
+ const char *help = "usage: %s <subcommand> <args> ...\n";
+
+ if (format != NULL) {
+ va_list alist;
+
+ va_start(alist, format);
+ (void) fprintf(stderr, "%s: ", vnd_pname);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+ }
+ (void) fprintf(stderr, help, vnd_pname);
+ for (tab = vnd_tab; tab->vc_name != NULL; tab++)
+ tab->vc_usage(stderr);
+
+ return (2);
+}
+
+int
+main(int argc, char *argv[])
+{
+ vnd_cmdtab_t *tab;
+
+ vnd_pname = basename(argv[0]);
+ if (argc < 2) {
+ return (usage(NULL));
+ }
+
+ for (tab = vnd_tab; tab->vc_name != NULL; tab++) {
+ if (strcmp(argv[1], tab->vc_name) == 0) {
+ argc -= 2; argv += 2;
+ assert(argc >= 0);
+ return (tab->vc_op(argc, argv));
+ }
+ }
+
+ return (usage("unknown sub-command '%s'\n", argv[1]));
+}
diff --git a/usr/src/cmd/vndstat/Makefile b/usr/src/cmd/vndstat/Makefile
new file mode 100644
index 0000000000..c77eef3887
--- /dev/null
+++ b/usr/src/cmd/vndstat/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+PROG= vndstat
+
+include ../Makefile.cmd
+
+LDLIBS += -lkstat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+ $(RM) $(PROG)
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndstat/vndstat.c b/usr/src/cmd/vndstat/vndstat.c
new file mode 100644
index 0000000000..6f6c76fc12
--- /dev/null
+++ b/usr/src/cmd/vndstat/vndstat.c
@@ -0,0 +1,542 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/kstat.h>
+#include <kstat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <alloca.h>
+#include <signal.h>
+#include <sys/varargs.h>
+#include <sys/int_limits.h>
+#include <sys/sysmacros.h>
+
+#define KSTAT_FIELD_USEINSTANCE 0x01
+#define KSTAT_FIELD_NODELTA 0x02
+#define KSTAT_FIELD_FILLER 0x04
+#define KSTAT_FIELD_STRING 0x08
+#define KSTAT_FIELD_UNIT 0x10
+#define KSTAT_FIELD_LJUST 0x20
+
+typedef struct kstat_field {
+ char *ksf_header; /* header for field */
+ char *ksf_name; /* name of stat, if any */
+ int ksf_width; /* width for field in output line */
+ uint32_t ksf_flags; /* flags for this field, if any */
+ char *ksf_suffix; /* optional suffix for units */
+ int ksf_hint; /* index hint for field in kstat */
+} kstat_field_t;
+
+typedef struct kstat_instance {
+ char ksi_name[KSTAT_STRLEN]; /* name of the underlying kstat */
+ int ksi_instance; /* instance identifer of this kstat */
+ kstat_t *ksi_ksp; /* pointer to the kstat */
+ uint64_t *ksi_data[2]; /* pointer to two generations of data */
+ hrtime_t ksi_snaptime[2]; /* hrtime for data generations */
+ int ksi_gen; /* current generation */
+ struct kstat_instance *ksi_next; /* next in instance list */
+} kstat_instance_t;
+
+const char *g_cmd = "vndstat";
+
+static void
+kstat_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+ uint64_t n = num;
+ int index = 0;
+ char u;
+
+ while (n >= 1024) {
+ n /= 1024;
+ index++;
+ }
+
+ u = " KMGTPE"[index];
+
+ if (index == 0) {
+ (void) snprintf(buf, buflen, "%llu", n);
+ } else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+ /*
+ * If this is an even multiple of the base, always display
+ * without any decimal precision.
+ */
+ (void) snprintf(buf, buflen, "%llu%c", n, u);
+ } else {
+ /*
+ * We want to choose a precision that reflects the best choice
+ * for fitting in 5 characters. This can get rather tricky when
+ * we have numbers that are very close to an order of magnitude.
+ * For example, when displaying 10239 (which is really 9.999K),
+ * we want only a single place of precision for 10.0K. We could
+ * develop some complex heuristics for this, but it's much
+ * easier just to try each combination in turn.
+ */
+ int i;
+ for (i = 2; i >= 0; i--) {
+ if (snprintf(buf, buflen, "%.*f%c", i,
+ (double)num / (1ULL << 10 * index), u) <= 5)
+ break;
+ }
+ }
+}
+
+static void
+fatal(char *fmt, ...)
+{
+ va_list ap;
+ int error = errno;
+
+ va_start(ap, fmt);
+
+ (void) fprintf(stderr, "%s: ", g_cmd);
+ /*LINTED*/
+ (void) vfprintf(stderr, fmt, ap);
+
+ if (fmt[strlen(fmt) - 1] != '\n')
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+
+ exit(EXIT_FAILURE);
+}
+
+int
+kstat_field_hint(kstat_t *ksp, kstat_field_t *field)
+{
+ kstat_named_t *nm = KSTAT_NAMED_PTR(ksp);
+ int i;
+
+ assert(ksp->ks_type == KSTAT_TYPE_NAMED);
+
+ for (i = 0; i < ksp->ks_ndata; i++) {
+ if (strcmp(field->ksf_name, nm[i].name) == 0)
+ return (field->ksf_hint = i);
+ }
+
+ fatal("could not find field '%s' in %s:%d\n",
+ field->ksf_name, ksp->ks_name, ksp->ks_instance);
+
+ return (0);
+}
+
+int
+kstat_instances_compare(const void *lhs, const void *rhs)
+{
+ kstat_instance_t *l = *((kstat_instance_t **)lhs);
+ kstat_instance_t *r = *((kstat_instance_t **)rhs);
+ int rval;
+
+ if ((rval = strcmp(l->ksi_name, r->ksi_name)) != 0)
+ return (rval);
+
+ if (l->ksi_instance < r->ksi_instance)
+ return (-1);
+
+ if (l->ksi_instance > r->ksi_instance)
+ return (1);
+
+ return (0);
+}
+
+void
+kstat_instances_update(kstat_ctl_t *kcp, kstat_instance_t **head,
+ boolean_t (*interested)(kstat_t *))
+{
+ int ninstances = 0, i;
+ kstat_instance_t **sorted, *ksi, *next;
+ kstat_t *ksp;
+ kid_t kid;
+
+ if ((kid = kstat_chain_update(kcp)) == 0 && *head != NULL)
+ return;
+
+ if (kid == -1)
+ fatal("failed to update kstat chain");
+
+ for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next)
+ ksi->ksi_ksp = NULL;
+
+ for (ksp = kcp->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
+ kstat_instance_t *last = NULL;
+
+ if (!interested(ksp))
+ continue;
+
+ /*
+ * Now look to see if we have this instance and name. (Yes,
+ * this is a linear search; we're assuming that this list is
+ * modest in size.)
+ */
+ for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next) {
+ last = ksi;
+
+ if (ksi->ksi_instance != ksp->ks_instance)
+ continue;
+
+ if (strcmp(ksi->ksi_name, ksp->ks_name) != 0)
+ continue;
+
+ ksi->ksi_ksp = ksp;
+ ninstances++;
+ break;
+ }
+
+ if (ksi != NULL)
+ continue;
+
+ if ((ksi = malloc(sizeof (kstat_instance_t))) == NULL)
+ fatal("could not allocate memory for stat instance");
+
+ bzero(ksi, sizeof (kstat_instance_t));
+ (void) strlcpy(ksi->ksi_name, ksp->ks_name, KSTAT_STRLEN);
+ ksi->ksi_instance = ksp->ks_instance;
+ ksi->ksi_ksp = ksp;
+ ksi->ksi_next = NULL;
+
+ if (last == NULL) {
+ assert(*head == NULL);
+ *head = ksi;
+ } else {
+ last->ksi_next = ksi;
+ }
+
+ ninstances++;
+ }
+
+ /*
+ * Now we know how many instances we have; iterate back over them,
+ * pruning the stale ones and adding the active ones to a holding
+ * array in which to sort them.
+ */
+ sorted = (void *)alloca(ninstances * sizeof (kstat_instance_t *));
+ ninstances = 0;
+
+ for (ksi = *head; ksi != NULL; ksi = next) {
+ next = ksi->ksi_next;
+
+ if (ksi->ksi_ksp == NULL) {
+ free(ksi);
+ } else {
+ sorted[ninstances++] = ksi;
+ }
+ }
+
+ if (ninstances == 0) {
+ *head = NULL;
+ return;
+ }
+
+ qsort(sorted, ninstances, sizeof (kstat_instance_t *),
+ kstat_instances_compare);
+
+ *head = sorted[0];
+
+ for (i = 0; i < ninstances; i++) {
+ ksi = sorted[i];
+ ksi->ksi_next = i < ninstances - 1 ? sorted[i + 1] : NULL;
+ }
+}
+
+void
+kstat_instances_read(kstat_ctl_t *kcp, kstat_instance_t *instances,
+ kstat_field_t *fields)
+{
+ kstat_instance_t *ksi;
+ int i, nfields;
+
+ for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++)
+ continue;
+
+ for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) {
+ kstat_t *ksp = ksi->ksi_ksp;
+
+ if (ksp == NULL)
+ continue;
+
+ if (kstat_read(kcp, ksp, NULL) == -1) {
+ if (errno == ENXIO) {
+ /*
+ * Our kstat has been removed since the update;
+ * NULL it out to prevent us from trying to read
+ * it again (and to indicate that it should not
+ * be displayed) and drive on.
+ */
+ ksi->ksi_ksp = NULL;
+ continue;
+ }
+
+ fatal("failed to read kstat %s:%d",
+ ksi->ksi_name, ksi->ksi_instance);
+ }
+
+ if (ksp->ks_type != KSTAT_TYPE_NAMED) {
+ fatal("%s:%d is not a named kstat", ksi->ksi_name,
+ ksi->ksi_instance);
+ }
+
+ if (ksi->ksi_data[0] == NULL) {
+ size_t size = nfields * sizeof (uint64_t) * 2;
+ uint64_t *data;
+
+ if ((data = malloc(size)) == NULL)
+ fatal("could not allocate memory");
+
+ bzero(data, size);
+ ksi->ksi_data[0] = data;
+ ksi->ksi_data[1] = &data[nfields];
+ }
+
+ for (i = 0; i < nfields; i++) {
+ kstat_named_t *nm = KSTAT_NAMED_PTR(ksp);
+ kstat_field_t *field = &fields[i];
+ int hint = field->ksf_hint;
+
+ if (field->ksf_name == NULL)
+ continue;
+
+ if (hint < 0 || hint >= ksp->ks_ndata ||
+ strcmp(field->ksf_name, nm[hint].name) != 0) {
+ hint = kstat_field_hint(ksp, field);
+ }
+
+ if (field->ksf_flags & KSTAT_FIELD_STRING)
+ ksi->ksi_data[ksi->ksi_gen][i] =
+ (uint64_t)(uintptr_t)
+ nm[hint].value.str.addr.ptr;
+ else
+ ksi->ksi_data[ksi->ksi_gen][i] =
+ nm[hint].value.ui64;
+ }
+
+ ksi->ksi_snaptime[ksi->ksi_gen] = ksp->ks_snaptime;
+ ksi->ksi_gen ^= 1;
+ }
+}
+
+uint64_t
+kstat_instances_delta(kstat_instance_t *ksi, int i)
+{
+ int gen = ksi->ksi_gen;
+ uint64_t delta = ksi->ksi_data[gen ^ 1][i] - ksi->ksi_data[gen][i];
+ uint64_t tdelta = ksi->ksi_snaptime[gen ^ 1] - ksi->ksi_snaptime[gen];
+
+ return (((delta * (uint64_t)NANOSEC) + (tdelta / 2)) / tdelta);
+}
+
+void
+kstat_instances_print(kstat_instance_t *instances, kstat_field_t *fields,
+ boolean_t header)
+{
+ kstat_instance_t *ksi = instances;
+ int i, nfields;
+
+ for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++)
+ continue;
+
+ if (header) {
+ for (i = 0; i < nfields; i++) {
+ if (fields[i].ksf_flags & KSTAT_FIELD_LJUST) {
+ (void) printf("%s%c", fields[i].ksf_header,
+ i < nfields - 1 ? ' ' : '\n');
+ continue;
+ }
+ (void) printf("%*s%c", fields[i].ksf_width,
+ fields[i].ksf_header, i < nfields - 1 ? ' ' : '\n');
+ }
+ }
+
+ for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) {
+ if (ksi->ksi_snaptime[1] == 0 || ksi->ksi_ksp == NULL)
+ continue;
+
+ for (i = 0; i < nfields; i++) {
+ char trailer = i < nfields - 1 ? ' ' : '\n';
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_FILLER) {
+ (void) printf("%*s%c", fields[i].ksf_width,
+ fields[i].ksf_header, trailer);
+ continue;
+ }
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_STRING) {
+ (void) printf("%*s%c", fields[i].ksf_width,
+ (char *)(uintptr_t)ksi->ksi_data[
+ ksi->ksi_gen ^ 1][i],
+ trailer);
+ continue;
+ }
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_UNIT) {
+ char buf[128];
+ size_t flen = fields[i].ksf_width + 1;
+ const char *suffix = "";
+
+ if (fields[i].ksf_suffix != NULL) {
+ suffix = fields[i].ksf_suffix;
+ flen -= strlen(fields[i].ksf_suffix);
+ }
+
+ kstat_nicenum(fields[i].ksf_flags &
+ KSTAT_FIELD_NODELTA ?
+ ksi->ksi_data[ksi->ksi_gen ^ 1][i] :
+ kstat_instances_delta(ksi, i), buf,
+ MIN(sizeof (buf), flen));
+ (void) printf("%*s%s%c", flen - 1, buf,
+ suffix, trailer);
+ continue;
+ }
+
+ (void) printf("%*lld%c", fields[i].ksf_width,
+ fields[i].ksf_flags & KSTAT_FIELD_USEINSTANCE ?
+ ksi->ksi_instance :
+ fields[i].ksf_flags & KSTAT_FIELD_NODELTA ?
+ ksi->ksi_data[ksi->ksi_gen ^ 1][i] :
+ kstat_instances_delta(ksi, i), trailer);
+ }
+ }
+}
+
+static boolean_t
+interested(kstat_t *ksp)
+{
+ const char *module = "vnd";
+ const char *class = "net";
+
+ if (strcmp(ksp->ks_module, module) != 0)
+ return (B_FALSE);
+
+ if (strcmp(ksp->ks_class, class) != 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/* BEGIN CSTYLED */
+char *g_usage = "Usage: vndstat [interval [count]]\n"
+ "\n"
+ " Displays statistics for active vnd devices, with one line per device.\n"
+ " All statistics are reported as per-second rates.\n"
+ "\n"
+ " The columns are as follows:\n"
+ "\n"
+ " zone => name of the zone with the device\n"
+ " name => name of the vnd device\n"
+ " rx => bytes received\n"
+ " tx => bytes transmitted\n"
+ " drops => number of dropped packets\n"
+ " txfc => number of transmit flow control events\n"
+ "\n";
+/* END CSTYLED */
+
+void
+usage()
+{
+ (void) fprintf(stderr, "%s", g_usage);
+ exit(EXIT_FAILURE);
+}
+
+/*ARGSUSED*/
+void
+intr(int sig)
+{}
+
+/*ARGSUSED*/
+int
+main(int argc, char **argv)
+{
+ kstat_ctl_t *kcp;
+ kstat_instance_t *instances = NULL;
+ int i = 0;
+ int interval = 1;
+ int count = INT32_MAX;
+ struct itimerval itimer;
+ struct sigaction act;
+ sigset_t set;
+ char *endp;
+
+ kstat_field_t fields[] = {
+ { "name", "linkname", 6, KSTAT_FIELD_STRING },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "rx B/s", "rbytes", 8, KSTAT_FIELD_UNIT, "B/s" },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "tx B/s", "obytes", 8, KSTAT_FIELD_UNIT, "B/s" },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "drops", "total_drops", 5 },
+ { "txfc", "flowcontrol_events", 4 },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "zone", "zonename", 36,
+ KSTAT_FIELD_STRING | KSTAT_FIELD_LJUST },
+ { NULL }
+ };
+
+ if (argc > 1) {
+ interval = strtol(argv[1], &endp, 10);
+
+ if (*endp != '\0' || interval <= 0)
+ usage();
+ }
+
+ if (argc > 2) {
+ count = strtol(argv[2], &endp, 10);
+
+ if (*endp != '\0' || count <= 0)
+ usage();
+ }
+
+ if ((kcp = kstat_open()) == NULL)
+ fatal("could not open /dev/kstat");
+
+ (void) sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = intr;
+ (void) sigaction(SIGALRM, &act, NULL);
+
+ (void) sigemptyset(&set);
+ (void) sigaddset(&set, SIGALRM);
+ (void) sigprocmask(SIG_BLOCK, &set, NULL);
+
+ bzero(&itimer, sizeof (itimer));
+ itimer.it_value.tv_sec = interval;
+ itimer.it_interval.tv_sec = interval;
+
+ if (setitimer(ITIMER_REAL, &itimer, NULL) != 0) {
+ fatal("could not set timer to %d second%s", interval,
+ interval == 1 ? "" : "s");
+ }
+
+ (void) sigemptyset(&set);
+
+ for (;;) {
+ kstat_instances_update(kcp, &instances, interested);
+ kstat_instances_read(kcp, instances, fields);
+
+ if (i++ > 0) {
+ kstat_instances_print(instances, fields,
+ instances != NULL && instances->ksi_next == NULL ?
+ (((i - 2) % 20) == 0) : B_TRUE);
+ }
+
+ if (i > count)
+ break;
+
+ (void) sigsuspend(&set);
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index 109a2cf109..a2828db3e9 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -225,7 +225,7 @@ get_usage(zfs_help_t idx)
"<filesystem|volume>@<snap>[%<snap>][,...]\n"
"\tdestroy <filesystem|volume>#<bookmark>\n"));
case HELP_GET:
- return (gettext("\tget [-rHp] [-d max] "
+ return (gettext("\tget [-crHp] [-d max] "
"[-o \"all\" | field[,...]]\n"
"\t [-t type[,...]] [-s source[,...]]\n"
"\t <\"all\" | property[,...]> "
@@ -576,7 +576,7 @@ finish_progress(char *done)
}
/*
- * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
+ * zfs clone [-Fp] [-o prop=value] ... <snap> <fs | vol>
*
* Given an existing dataset, create a writable copy whose initial contents
* are the same as the source. The newly created dataset maintains a
@@ -584,12 +584,18 @@ finish_progress(char *done)
* the clone exists.
*
* The '-p' flag creates all the non-existing ancestors of the target first.
+ *
+ * The '-F' flag retries the zfs_mount() operation as long as zfs_mount() is
+ * still returning EBUSY. Any callers which specify -F should be careful to
+ * ensure that no other process has a persistent hold on the mountpoint's
+ * directory.
*/
static int
zfs_do_clone(int argc, char **argv)
{
zfs_handle_t *zhp = NULL;
boolean_t parents = B_FALSE;
+ boolean_t keeptrying = B_FALSE;
nvlist_t *props;
int ret = 0;
int c;
@@ -598,8 +604,11 @@ zfs_do_clone(int argc, char **argv)
nomem();
/* check options */
- while ((c = getopt(argc, argv, "o:p")) != -1) {
+ while ((c = getopt(argc, argv, "Fo:p")) != -1) {
switch (c) {
+ case 'F':
+ keeptrying = B_TRUE;
+ break;
case 'o':
if (parseprop(props))
return (1);
@@ -660,9 +669,14 @@ zfs_do_clone(int argc, char **argv)
clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
if (clone != NULL) {
- if (zfs_get_type(clone) != ZFS_TYPE_VOLUME)
- if ((ret = zfs_mount(clone, NULL, 0)) == 0)
+ if (zfs_get_type(clone) != ZFS_TYPE_VOLUME) {
+ while ((ret = zfs_mount(clone, NULL, 0)) != 0) {
+ if (!keeptrying || errno != EBUSY)
+ break;
+ }
+ if (ret == 0)
ret = zfs_share(clone);
+ }
zfs_close(clone);
}
}
@@ -886,12 +900,13 @@ badusage:
}
/*
- * zfs destroy [-rRf] <fs, vol>
+ * zfs destroy [-rRfF] <fs, vol>
* zfs destroy [-rRd] <snap>
*
* -r Recursively destroy all children
* -R Recursively destroy all dependents, including clones
* -f Force unmounting of any dependents
+ * -F Continue retrying on seeing EBUSY
* -d If we can't destroy now, mark for deferred destruction
*
* Destroys the given dataset. By default, it will unmount any filesystems,
@@ -901,6 +916,7 @@ badusage:
typedef struct destroy_cbdata {
boolean_t cb_first;
boolean_t cb_force;
+ boolean_t cb_wait;
boolean_t cb_recurse;
boolean_t cb_error;
boolean_t cb_doclones;
@@ -984,13 +1000,18 @@ out:
static int
destroy_callback(zfs_handle_t *zhp, void *data)
{
- destroy_cbdata_t *cb = data;
+ destroy_cbdata_t *cbp = data;
+ struct timespec ts;
+ int err = 0;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 500 * (NANOSEC / MILLISEC);
const char *name = zfs_get_name(zhp);
- if (cb->cb_verbose) {
- if (cb->cb_parsable) {
+ if (cbp->cb_verbose) {
+ if (cbp->cb_parsable) {
(void) printf("destroy\t%s\n", name);
- } else if (cb->cb_dryrun) {
+ } else if (cbp->cb_dryrun) {
(void) printf(gettext("would destroy %s\n"),
name);
} else {
@@ -1005,13 +1026,10 @@ destroy_callback(zfs_handle_t *zhp, void *data)
*/
if (strchr(zfs_get_name(zhp), '/') == NULL &&
zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- zfs_close(zhp);
- return (0);
- }
- if (cb->cb_dryrun) {
- zfs_close(zhp);
- return (0);
+ goto out;
}
+ if (cbp->cb_dryrun)
+ goto out;
/*
* We batch up all contiguous snapshots (even of different
@@ -1020,23 +1038,66 @@ destroy_callback(zfs_handle_t *zhp, void *data)
* because we must delete a clone before its origin.
*/
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
- fnvlist_add_boolean(cb->cb_batchedsnaps, name);
- } else {
- int error = zfs_destroy_snaps_nvl(g_zfs,
- cb->cb_batchedsnaps, B_FALSE);
- fnvlist_free(cb->cb_batchedsnaps);
- cb->cb_batchedsnaps = fnvlist_alloc();
-
- if (error != 0 ||
- zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
- zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
- zfs_close(zhp);
- return (-1);
+ fnvlist_add_boolean(cbp->cb_batchedsnaps, name);
+ goto out;
+ }
+
+ if (cbp->cb_wait)
+ libzfs_print_on_error(g_zfs, B_FALSE);
+
+ /*
+ * Unless instructed to retry on EBUSY, bail out on the first error.
+ * When retrying, try every 500ms until either succeeding or seeing a
+ * non-EBUSY error code.
+ */
+ while ((err = zfs_destroy_snaps_nvl(g_zfs,
+ cbp->cb_batchedsnaps, B_FALSE)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ nanosleep(&ts, NULL);
+ continue;
+ }
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
+ }
+
+ fnvlist_free(cbp->cb_batchedsnaps);
+ cbp->cb_batchedsnaps = fnvlist_alloc();
+
+ if (err != 0)
+ goto out;
+
+ while ((err = zfs_unmount(zhp, NULL,
+ cbp->cb_force ? MS_FORCE : 0)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ (void) nanosleep(&ts, NULL);
+ continue;
}
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
}
+ if (err != 0)
+ goto out;
+
+ while ((err = zfs_destroy(zhp, cbp->cb_defer_destroy)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ (void) nanosleep(&ts, NULL);
+ continue;
+ }
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
+ }
+
+out:
+ libzfs_print_on_error(g_zfs, B_TRUE);
zfs_close(zhp);
- return (0);
+ return (err);
}
static int
@@ -1192,7 +1253,7 @@ zfs_do_destroy(int argc, char **argv)
zfs_type_t type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, "vpndfrR")) != -1) {
+ while ((c = getopt(argc, argv, "vpndfFrR")) != -1) {
switch (c) {
case 'v':
cb.cb_verbose = B_TRUE;
@@ -1211,6 +1272,9 @@ zfs_do_destroy(int argc, char **argv)
case 'f':
cb.cb_force = B_TRUE;
break;
+ case 'F':
+ cb.cb_wait = B_TRUE;
+ break;
case 'r':
cb.cb_recurse = B_TRUE;
break;
@@ -1581,8 +1645,11 @@ zfs_do_get(int argc, char **argv)
cb.cb_type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) {
+ while ((c = getopt(argc, argv, ":d:o:s:rt:Hcp")) != -1) {
switch (c) {
+ case 'c':
+ libzfs_set_cachedprops(g_zfs, B_TRUE);
+ break;
case 'p':
cb.cb_literal = B_TRUE;
break;
@@ -3008,6 +3075,7 @@ zfs_do_list(int argc, char **argv)
int types = ZFS_TYPE_DATASET;
boolean_t types_specified = B_FALSE;
char *fields = NULL;
+ zprop_list_t *pl;
list_cbdata_t cb = { 0 };
char *value;
int limit = 0;
@@ -3119,6 +3187,18 @@ zfs_do_list(int argc, char **argv)
!= 0)
usage(B_FALSE);
+ /*
+ * The default set of properties contains only properties which can be
+ * retrieved from the set of cached properties. If any user-specfied
+ * properties cannot be retrieved from that set, unset the cachedprops
+ * flags on the ZFS handle.
+ */
+ libzfs_set_cachedprops(g_zfs, B_TRUE);
+ for (pl = cb.cb_proplist; pl != NULL; pl = pl->pl_next) {
+ if (zfs_prop_cacheable(pl->pl_prop))
+ libzfs_set_cachedprops(g_zfs, B_FALSE);
+ }
+
cb.cb_first = B_TRUE;
ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c
index 227e5dffcb..a5bd206b11 100644
--- a/usr/src/cmd/zlogin/zlogin.c
+++ b/usr/src/cmd/zlogin/zlogin.c
@@ -23,6 +23,7 @@
* Copyright 2013 DEY Storage Systems, Inc.
* Copyright (c) 2014 Gary Mills
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -35,7 +36,9 @@
* loop between parent and child processes takes care of the interactive
* session. In this mode, login(1) (and its -c option, which means
* "already authenticated") is employed to take care of the initialization
- * of the user's session.
+ * of the user's session. Interactive login can also be forced when running
+ * a specific command by specifying the -i option; for example, the user
+ * could issue 'zlogin -i my-zone /bin/sh'.
*
* - "non-interactive login" is similar to su(1M); the user could issue
* 'zlogin my-zone ls -l' and the command would be run as specified.
@@ -122,7 +125,8 @@ static boolean_t forced_login = B_FALSE;
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
#endif
-#define SUPATH "/usr/bin/su"
+#define SUPATH1 "/usr/bin/su"
+#define SUPATH2 "/bin/su"
#define FAILSAFESHELL "/sbin/sh"
#define DEFAULTSHELL "/sbin/sh"
#define DEF_PATH "/usr/sbin:/usr/bin"
@@ -152,7 +156,7 @@ static boolean_t forced_login = B_FALSE;
static void
usage(void)
{
- (void) fprintf(stderr, gettext("usage: %s [ -nQCES ] [ -e cmdchar ] "
+ (void) fprintf(stderr, gettext("usage: %s [ -inQCES ] [ -e cmdchar ] "
"[-l user] zonename [command [args ...] ]\n"), pname);
exit(2);
}
@@ -1098,7 +1102,7 @@ zone_login_cmd(brand_handle_t bh, const char *login)
* checks).
*/
static char **
-prep_args(brand_handle_t bh, const char *login, char **argv)
+prep_args(brand_handle_t bh, char *zonename, const char *login, char **argv)
{
int argc = 0, a = 0, i, n = -1;
char **new_argv;
@@ -1128,11 +1132,37 @@ prep_args(brand_handle_t bh, const char *login, char **argv)
new_argv[a++] = FAILSAFESHELL;
} else {
+ struct stat sb;
+ char zonepath[MAXPATHLEN];
+ char supath[MAXPATHLEN];
+
n = 5;
if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
return (NULL);
- new_argv[a++] = SUPATH;
+ if (zone_get_zonepath(zonename, zonepath,
+ sizeof (zonepath)) != Z_OK) {
+ zerror(gettext("unable to determine zone "
+ "path"));
+ return (NULL);
+ }
+
+ (void) snprintf(supath, sizeof (supath), "%s/root/%s",
+ zonepath, SUPATH1);
+ if (stat(supath, &sb) == 0) {
+ new_argv[a++] = SUPATH1;
+ } else {
+ (void) snprintf(supath, sizeof (supath),
+ "%s/root/%s", zonepath, SUPATH2);
+ if (stat(supath, &sb) == 0) {
+ new_argv[a++] = SUPATH2;
+ } else {
+ zerror(gettext("unable to find 'su' "
+ "command"));
+ return (NULL);
+ }
+ }
+
if (strcmp(login, "root") != 0) {
new_argv[a++] = "-";
n++;
@@ -1729,6 +1759,7 @@ main(int argc, char **argv)
zoneid_t zoneid;
zone_state_t st;
char *login = "root";
+ int iflag = 0;
int lflag = 0;
int nflag = 0;
char *zonename = NULL;
@@ -1753,7 +1784,7 @@ main(int argc, char **argv)
(void) getpname(argv[0]);
username = get_username();
- while ((arg = getopt(argc, argv, "nECR:Se:l:Q")) != EOF) {
+ while ((arg = getopt(argc, argv, "inECR:Se:l:Q")) != EOF) {
switch (arg) {
case 'C':
console = 1;
@@ -1782,6 +1813,9 @@ main(int argc, char **argv)
case 'e':
set_cmdchar(optarg);
break;
+ case 'i':
+ iflag = 1;
+ break;
case 'l':
login = optarg;
lflag = 1;
@@ -1822,6 +1856,11 @@ main(int argc, char **argv)
}
+ if (iflag !=0 && nflag != 0) {
+ zerror(gettext("-i and -n flags are incompatible"));
+ usage();
+ }
+
if (failsafe != 0 && lflag != 0) {
zerror(gettext("-l may not be specified for failsafe login"));
usage();
@@ -1849,7 +1888,8 @@ main(int argc, char **argv)
/* zone name and process name, and possibly some args */
zonename = argv[optind];
proc_args = &argv[optind + 1];
- interactive = 0;
+ if (iflag && isatty(STDIN_FILENO))
+ interactive = 1;
} else {
usage();
}
@@ -2041,7 +2081,7 @@ main(int argc, char **argv)
return (1);
}
- if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
+ if ((new_args = prep_args(bh, zonename, login, proc_args)) == NULL) {
zperror(gettext("could not assemble new arguments"));
brand_close(bh);
return (1);
@@ -2232,8 +2272,18 @@ main(int argc, char **argv)
/*
* In failsafe mode, we don't use login(1), so don't try
* setting up a utmpx entry.
+ *
+ * A branded zone may have very different utmpx semantics.
+ * At the moment, we only have two brand types:
+ * Solaris-like (native, sn1) and Linux. In the Solaris
+ * case, we know exactly how to do the necessary utmpx
+ * setup. Fortunately for us, the Linux /bin/login is
+ * prepared to deal with a non-initialized utmpx entry, so
+ * we can simply skip it. If future brands don't fall into
+ * either category, we'll have to add a per-brand utmpx
+ * setup hook.
*/
- if (!failsafe)
+ if (!failsafe && (strcmp(zonebrand, "lx") != 0))
if (setup_utmpx(slaveshortname) == -1)
return (1);
diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile
index 2b01078aec..fba7809c71 100644
--- a/usr/src/cmd/zoneadm/Makefile
+++ b/usr/src/cmd/zoneadm/Makefile
@@ -21,14 +21,18 @@
#
# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
PROG= zoneadm
+SCRIPTS=
MANIFEST= zones.xml resource-mgmt.xml
SVCMETHOD= svc-zones svc-resource-mgmt
include ../Makefile.cmd
+include ../Makefile.ctf
+ROOTUSRSBINSCRIPTS= $(SCRIPTS:%=$(ROOTUSRSBIN)/%)
ROOTMANIFESTDIR= $(ROOTSVCSYSTEM)
OBJS= zoneadm.o zfs.o
@@ -42,13 +46,14 @@ CERRWARN += -_gcc=-Wno-uninitialized
.KEEP_STATE:
-all: $(PROG)
+all: $(PROG) $(SCRIPTS)
$(PROG): $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
$(POST_PROCESS)
-install: all $(ROOTUSRSBINPROG) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+install: all $(ROOTUSRSBINPROG) $(ROOTUSRSBINSCRIPTS) $(ROOTMANIFEST) \
+ $(ROOTSVCMETHOD)
check: $(PROG).c $(CHKMANIFEST)
$(CSTYLE) -pP $(SRCS:%=%)
@@ -58,7 +63,7 @@ $(POFILE): $(POFILES)
$(CAT) $(POFILES) > $@
clean:
- $(RM) $(OBJS) $(POFILES)
+ $(RM) $(OBJS) $(POFILES) $(SCRIPTS)
lint: lint_SRCS
diff --git a/usr/src/cmd/zoneadm/svc-zones b/usr/src/cmd/zoneadm/svc-zones
index 9d307835bd..30d54f5272 100644
--- a/usr/src/cmd/zoneadm/svc-zones
+++ b/usr/src/cmd/zoneadm/svc-zones
@@ -32,7 +32,7 @@
shutdown_zones()
{
zoneadm list -p | nawk -F: '{
- if ($2 != "global") {
+ if (($5 != "lx") && ($2 != "global")) {
print $2
}
}'
diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c
index d27b9c4678..78c165ffd1 100644
--- a/usr/src/cmd/zoneadm/zfs.c
+++ b/usr/src/cmd/zoneadm/zfs.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -968,6 +968,7 @@ create_zfs_zonepath(char *zonepath)
zfs_handle_t *zhp;
char zfs_name[MAXPATHLEN];
nvlist_t *props = NULL;
+ int i;
if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
return;
@@ -1004,9 +1005,20 @@ create_zfs_zonepath(char *zonepath)
nvlist_free(props);
- if (zfs_mount(zhp, NULL, 0) != 0) {
+ /*
+ * A monitoring tool might race with us and touch the mountpoint just
+ * as we're trying to mount, blocking the mount. We wait and retry a
+ * few times to workaround this race.
+ */
+ for (i = 0; i < 5; i++) {
+ if (zfs_mount(zhp, NULL, 0) == 0)
+ break;
(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
"%s\n"), zfs_name, libzfs_error_description(g_zfs));
+ (void) sleep(1);
+ }
+
+ if (i >= 5) {
(void) zfs_destroy(zhp, B_FALSE);
} else {
if (chmod(zonepath, S_IRWXU) != 0) {
diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c
index 2c25e18a53..6fdd00e39c 100644
--- a/usr/src/cmd/zoneadm/zoneadm.c
+++ b/usr/src/cmd/zoneadm/zoneadm.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014, Joyent Inc. All rights reserved.
*/
/*
@@ -100,6 +101,7 @@ typedef struct zone_entry {
char zroot[MAXPATHLEN];
char zuuid[UUID_PRINTABLE_STRING_LENGTH];
zone_iptype_t ziptype;
+ zoneid_t zdid;
} zone_entry_t;
#define CLUSTER_BRAND_NAME "cluster"
@@ -442,6 +444,7 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable)
}
if (!verbose) {
char *cp, *clim;
+ char zdid[80];
if (!parsable) {
(void) printf("%s\n", zent->zname);
@@ -457,8 +460,12 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable)
(void) printf("%.*s\\:", clim - cp, cp);
cp = clim + 1;
}
- (void) printf("%s:%s:%s:%s\n", cp, zent->zuuid, zent->zbrand,
- ip_type_str);
+ if (zent->zdid == -1)
+ zdid[0] = '\0';
+ else
+ (void) snprintf(zdid, sizeof (zdid), "%d", zent->zdid);
+ (void) printf("%s:%s:%s:%s:%s\n", cp, zent->zuuid, zent->zbrand,
+ ip_type_str, zdid);
return;
}
if (zent->zstate_str != NULL) {
@@ -553,6 +560,22 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
return (Z_OK);
}
+ if ((handle = zonecfg_init_handle()) == NULL) {
+ zperror2(zent->zname, gettext("could not init handle"));
+ return (Z_ERR);
+ }
+ if ((err = zonecfg_get_handle(zent->zname, handle)) != Z_OK) {
+ zperror2(zent->zname, gettext("could not get handle"));
+ zonecfg_fini_handle(handle);
+ return (Z_ERR);
+ }
+
+ if ((err = zonecfg_get_iptype(handle, &zent->ziptype)) != Z_OK) {
+ zperror2(zent->zname, gettext("could not get ip-type"));
+ zonecfg_fini_handle(handle);
+ return (Z_ERR);
+ }
+
/*
* There is a race condition where the zone could boot while
* we're walking the index file. In this case the zone state
@@ -573,25 +596,11 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
zent->ziptype = ZS_EXCLUSIVE;
else
zent->ziptype = ZS_SHARED;
- return (Z_OK);
}
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zperror2(zent->zname, gettext("could not init handle"));
- return (Z_ERR);
- }
- if ((err = zonecfg_get_handle(zent->zname, handle)) != Z_OK) {
- zperror2(zent->zname, gettext("could not get handle"));
- zonecfg_fini_handle(handle);
- return (Z_ERR);
- }
+ zent->zdid = zonecfg_get_did(handle);
- if ((err = zonecfg_get_iptype(handle, &zent->ziptype)) != Z_OK) {
- zperror2(zent->zname, gettext("could not get ip-type"));
- zonecfg_fini_handle(handle);
- return (Z_ERR);
- }
zonecfg_fini_handle(handle);
return (Z_OK);
@@ -1012,8 +1021,12 @@ validate_zonepath(char *path, int cmd_num)
(void) printf(gettext("WARNING: %s is on a temporary "
"file system.\n"), rpath);
}
- if (crosscheck_zonepaths(rpath) != Z_OK)
- return (Z_ERR);
+ if (cmd_num != CMD_BOOT && cmd_num != CMD_REBOOT &&
+ cmd_num != CMD_READY) {
+ /* we checked when we installed, no need to check each boot */
+ if (crosscheck_zonepaths(rpath) != Z_OK)
+ return (Z_ERR);
+ }
/*
* Try to collect and report as many minor errors as possible
* before returning, so the user can learn everything that needs
@@ -1177,6 +1190,7 @@ static int
ready_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1185,11 +1199,14 @@ ready_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_READY, CMD_READY);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_READY, CMD_READY);
return (Z_USAGE);
@@ -1206,6 +1223,7 @@ ready_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_READY;
+ zarg.debug = debug;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -1218,6 +1236,7 @@ boot_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
boolean_t force = B_FALSE;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1244,7 +1263,7 @@ boot_func(int argc, char *argv[])
* zoneadm -z myzone boot -- -s -v -m verbose.
*/
optind = 0;
- while ((arg = getopt(argc, argv, "?fs")) != EOF) {
+ while ((arg = getopt(argc, argv, "?fsX")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_BOOT, CMD_BOOT);
@@ -1256,6 +1275,9 @@ boot_func(int argc, char *argv[])
case 'f':
force = B_TRUE;
break;
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_BOOT, CMD_BOOT);
return (Z_USAGE);
@@ -1281,6 +1303,7 @@ boot_func(int argc, char *argv[])
if (verify_details(CMD_BOOT, argv) != Z_OK)
return (Z_ERR);
zarg.cmd = force ? Z_FORCEBOOT : Z_BOOT;
+ zarg.debug = debug;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -1796,6 +1819,7 @@ static int
halt_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1804,11 +1828,14 @@ halt_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_HALT, CMD_HALT);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_HALT, CMD_HALT);
return (Z_USAGE);
@@ -1834,6 +1861,7 @@ halt_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_HALT;
+ zarg.debug = debug;
return ((zonecfg_call_zoneadmd(target_zone, &zarg, locale,
B_TRUE) == 0) ? Z_OK : Z_ERR);
}
@@ -1911,6 +1939,7 @@ static int
reboot_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1919,11 +1948,14 @@ reboot_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_REBOOT, CMD_REBOOT);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_REBOOT, CMD_REBOOT);
return (Z_USAGE);
@@ -1958,6 +1990,7 @@ reboot_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_REBOOT;
+ zarg.debug = debug;
return ((zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) == 0)
? Z_OK : Z_ERR);
}
@@ -2185,6 +2218,10 @@ verify_fs_special(struct zone_fstab *fstab)
if (strcmp(fstab->zone_fs_type, MNTTYPE_ZFS) == 0)
return (verify_fs_zfs(fstab));
+ if (strcmp(fstab->zone_fs_type, MNTTYPE_HYPRLOFS) == 0 &&
+ strcmp(fstab->zone_fs_special, "swap") == 0)
+ return (Z_OK);
+
if (stat64(fstab->zone_fs_special, &st) != 0) {
(void) fprintf(stderr, gettext("could not verify fs "
"%s: could not access %s: %s\n"), fstab->zone_fs_dir,
@@ -2588,7 +2625,6 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[])
dladm_handle_t dh;
dladm_status_t status;
datalink_id_t linkid;
- char errmsg[DLADM_STRSIZE];
in_alt_root = zonecfg_in_alt_root();
if (in_alt_root)
@@ -2671,11 +2707,6 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[])
dladm_close(dh);
}
if (status != DLADM_STATUS_OK) {
- (void) fprintf(stderr,
- gettext("WARNING: skipping network "
- "interface '%s': %s\n"),
- nwiftab.zone_nwif_physical,
- dladm_status2str(status, errmsg));
break;
}
dl_owner_zid = ALL_ZONES;
@@ -2759,6 +2790,61 @@ no_net:
return (return_code);
}
+/*
+ * Called when readying or booting a zone. We double check that the zone's
+ * debug ID is set and is unique. This covers the case of pre-existing zones
+ * with no ID. Also, its possible that a zone was migrated to this host
+ * and as a result it has a duplicate ID. In this case we preserve the ID
+ * of the first zone we match on in the index file (since it was there before
+ * the current zone) and we assign a new unique ID to the current zone.
+ * Return true if we assigned a new ID, indicating that the zone configuration
+ * needs to be saved.
+ */
+static boolean_t
+verify_fix_did(zone_dochandle_t handle)
+{
+ zoneid_t mydid;
+ zone_entry_t zent;
+ FILE *cookie;
+ char *name;
+ boolean_t fix = B_FALSE;
+
+ mydid = zonecfg_get_did(handle);
+ if (mydid == -1) {
+ zonecfg_set_did(handle);
+ return (B_TRUE);
+ }
+
+ /* Get the full list of zones from the configuration. */
+ cookie = setzoneent();
+ while ((name = getzoneent(cookie)) != NULL) {
+ if (strcmp(target_zone, name) == 0) {
+ free(name);
+ break; /* Once we find our entry, stop. */
+ }
+
+ if (strcmp(name, "global") == 0 ||
+ lookup_zone_info(name, ZONE_ID_UNDEFINED, &zent) != Z_OK) {
+ free(name);
+ continue;
+ }
+
+ free(name);
+ if (zent.zdid == mydid) {
+ fix = B_TRUE;
+ break;
+ }
+ }
+ endzoneent(cookie);
+
+ if (fix) {
+ zonecfg_set_did(handle);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
static int
verify_details(int cmd_num, char *argv[])
{
@@ -2818,6 +2904,18 @@ verify_details(int cmd_num, char *argv[])
if (verify_handle(cmd_num, handle, argv) != Z_OK)
return_code = Z_ERR;
+ if (cmd_num == CMD_READY || cmd_num == CMD_BOOT) {
+ int vcommit = 0, obscommit = 0;
+
+ vcommit = verify_fix_did(handle);
+ obscommit = zonecfg_fix_obsolete(handle);
+
+ if (vcommit || obscommit)
+ if (zonecfg_save(handle) != Z_OK)
+ (void) fprintf(stderr, gettext("Could not save "
+ "updated configuration.\n"));
+ }
+
zonecfg_fini_handle(handle);
if (return_code == Z_ERR)
(void) fprintf(stderr,
@@ -2903,6 +3001,7 @@ install_func(int argc, char *argv[])
int status;
boolean_t do_postinstall = B_FALSE;
boolean_t brand_help = B_FALSE;
+ boolean_t do_dataset = B_TRUE;
char opts[128];
if (target_zone == NULL) {
@@ -2978,6 +3077,12 @@ install_func(int argc, char *argv[])
}
/* Ignore unknown options - may be brand specific. */
break;
+ case 'x':
+ if (strcmp(optarg, "nodataset") == 0) {
+ do_dataset = B_FALSE;
+ continue; /* internal arg, don't pass thru */
+ }
+ break;
default:
/* Ignore unknown options - may be brand specific. */
break;
@@ -3030,7 +3135,8 @@ install_func(int argc, char *argv[])
goto done;
}
- create_zfs_zonepath(zonepath);
+ if (do_dataset)
+ create_zfs_zonepath(zonepath);
}
status = do_subproc(cmdbuf);
@@ -3840,10 +3946,10 @@ cleanup_zonepath(char *zonepath, boolean_t all)
* exist if the zone was force-attached after a
* migration.
*/
- char *std_entries[] = {"dev", "lu", "root",
+ char *std_entries[] = {"dev", "lastexited", "lu", "root",
"SUNWattached.xml", NULL};
- /* (MAXPATHLEN * 3) is for the 3 std_entries dirs */
- char cmdbuf[sizeof (RMCOMMAND) + (MAXPATHLEN * 3) + 64];
+ /* (MAXPATHLEN * 5) is for the 5 std_entries dirs */
+ char cmdbuf[sizeof (RMCOMMAND) + (MAXPATHLEN * 5) + 64];
/*
* We shouldn't need these checks but lets be paranoid since we
@@ -4993,6 +5099,7 @@ uninstall_func(int argc, char *argv[])
if (zonecfg_ping_zoneadmd(target_zone) == Z_OK) {
zone_cmd_arg_t zarg;
zarg.cmd = Z_NOTE_UNINSTALLING;
+ zarg.debug = B_FALSE;
/* we don't care too much if this fails, just plow on */
(void) zonecfg_call_zoneadmd(target_zone, &zarg, locale,
B_TRUE);
@@ -5108,6 +5215,7 @@ mount_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = force ? Z_FORCEMOUNT : Z_MOUNT;
+ zarg.debug = B_FALSE;
zarg.bootbuf[0] = '\0';
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
@@ -5129,6 +5237,7 @@ unmount_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_UNMOUNT;
+ zarg.debug = B_FALSE;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -5350,7 +5459,7 @@ apply_func(int argc, char *argv[])
priv_set_t *privset;
zoneid_t zoneid;
zone_dochandle_t handle;
- struct zone_mcaptab mcap;
+ uint64_t mcap;
char pool_err[128];
zoneid = getzoneid();
@@ -5441,19 +5550,12 @@ apply_func(int argc, char *argv[])
}
/*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
+ * If a memory cap is configured, make sure the rcapd SMF service is
+ * enabled.
*/
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &mcap) == Z_OK) {
char smf_err[128];
- num = (uint64_t)strtoll(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(gettext("could not set zone memory cap"));
- res = Z_ERR;
- }
-
if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
zerror(gettext("enabling system/rcap service failed: "
"%s"), smf_err);
diff --git a/usr/src/cmd/zoneadm/zones.xml b/usr/src/cmd/zoneadm/zones.xml
index 9c8e305f89..1b6f8cd49b 100644
--- a/usr/src/cmd/zoneadm/zones.xml
+++ b/usr/src/cmd/zoneadm/zones.xml
@@ -54,11 +54,19 @@
<service_fmri value='svc:/milestone/multi-user-server' />
</dependency>
+ <dependency
+ name='metadata'
+ type='service'
+ grouping='require_all'
+ restart_on='none'>
+ <service_fmri value='svc:/system/smartdc/metadata' />
+ </dependency>
+
<exec_method
type='method'
name='start'
exec='/lib/svc/method/svc-zones %m'
- timeout_seconds='60'>
+ timeout_seconds='0'>
</exec_method>
<!--
diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile
index 8324f7fefa..e81e4631aa 100644
--- a/usr/src/cmd/zoneadmd/Makefile
+++ b/usr/src/cmd/zoneadmd/Makefile
@@ -18,57 +18,54 @@
#
# CDDL HEADER END
-
-#
-
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
PROG= zoneadmd
include ../Makefile.cmd
+include ../Makefile.ctf
-ROOTCMDDIR= $(ROOTLIB)/zones
-
-OBJS= zoneadmd.o zcons.o vplat.o
-SRCS = $(OBJS:.o=.c)
-POFILE=zoneadmd_all.po
-POFILES= $(OBJS:%.o=%.po)
+$(64ONLY)SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
-CFLAGS += $(CCVERBOSE)
-CERRWARN += -_gcc=-Wno-switch
-CERRWARN += -_gcc=-Wno-parentheses
-CERRWARN += -_gcc=-Wno-uninitialized
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
-LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
- -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
- -linetutil -lscf
XGETFLAGS += -a -x zoneadmd.xcl
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+
.KEEP_STATE:
.PARALLEL:
-all: $(PROG)
+all: $(SUBDIRS)
$(PROG): $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
$(POST_PROCESS)
-install: all $(ROOTCMD)
-
-$(POFILE): $(POFILES)
- $(RM) $@
- $(CAT) $(POFILES) > $@
+install: $(SUBDIRS)
+ -$(RM) $(ROOTUSRLIBZONES)/$(PROG)
+ -$(LN) $(ISAEXEC) $(ROOTUSRLIBZONES)/$(PROG)
-clean:
- $(RM) $(OBJS)
+$(POFILE):
-lint: lint_SRCS
+clean clobebr lint: $(SUBDIRS)
check:
- $(CSTYLE) -p -P $(SRCS:%=%)
+ $(CSTYLE) -p -P *.c
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
include ../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/Makefile.com b/usr/src/cmd/zoneadmd/Makefile.com
new file mode 100644
index 0000000000..162d1f0219
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/Makefile.com
@@ -0,0 +1,70 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+PROG= zoneadmd
+
+include ../../Makefile.cmd
+include ../../Makefile.ctf
+
+ROOTCMDDIR= $(ROOTLIB)/zones
+
+OBJS= zoneadmd.o zcons.o vplat.o mcap.o
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
+ -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
+ -linetutil -lproc -lscf
+
+.KEEP_STATE:
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+ROOTUSRLIBZONES32 = $(ROOTUSRLIBZONES)/$(MACH32)
+ROOTUSRLIBZONES64 = $(ROOTUSRLIBZONES)/$(MACH64)
+ROOTUSRLIBZONESPROG32 = $(ROOTUSRLIBZONES32)/$(PROG)
+ROOTUSRLIBZONESPROG64 = $(ROOTUSRLIBZONES64)/$(PROG)
+$(ROOTUSRLIBZONES32)/%: $(ROOTUSRLIBZONES32) %
+ $(INS.file)
+$(ROOTUSRLIBZONES64)/%: $(ROOTUSRLIBZONES64) %
+ $(INS.file)
+$(ROOTUSRLIBZONES32):
+ $(INS.dir)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) ../*.c $(LDLIBS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/amd64/Makefile b/usr/src/cmd/zoneadmd/amd64/Makefile
new file mode 100644
index 0000000000..75ac51db32
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRLIBZONES64) $(ROOTUSRLIBZONESPROG64)
diff --git a/usr/src/cmd/zoneadmd/i386/Makefile b/usr/src/cmd/zoneadmd/i386/Makefile
new file mode 100644
index 0000000000..a8764e0638
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+
+install: all $(ROOTUSRLIBZONES32) $(ROOTUSRLIBZONESPROG32)
diff --git a/usr/src/cmd/zoneadmd/mcap.c b/usr/src/cmd/zoneadmd/mcap.c
new file mode 100644
index 0000000000..44917b0024
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/mcap.c
@@ -0,0 +1,1193 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This file implements the code which runs a thread inside zoneadmd to cap
+ * the associated zone's physical memory. A thread to do this is started
+ * when the zone boots and is halted when the zone shuts down.
+ *
+ * Because of the way that the VM system is currently implemented, there is no
+ * way to go from the bottom up (page to process to zone). Thus, there is no
+ * obvious way to hook an rctl into the kernel's paging code to enforce a hard
+ * memory cap. Instead, we implement a soft physical memory cap which looks
+ * at the zone's overall rss and once it is over the cap, works from the top
+ * down (zone to process to page), looking at zone processes, to determine
+ * what to try to pageout to get the zone under its memory cap.
+ *
+ * The code uses the fast, cheap, but potentially very inaccurate sum of the
+ * rss values from psinfo_t to first approximate the zone's rss and will
+ * fallback to the vm_getusage syscall to determine the zone's rss if needed.
+ * It then checks the rss against the zone's zone.max-physical-memory rctl.
+ * Once the zone goes over its cap, then this thread will work through the
+ * zone's /proc process list, Pgrab-bing each process and stepping through the
+ * address space segments attempting to use pr_memcntl(...MS_INVALCURPROC...)
+ * to pageout pages, until the zone is again under its cap.
+ *
+ * Although zone memory capping is implemented as a soft cap by this user-level
+ * thread, the interfaces around memory caps that are exposed to the user are
+ * the standard ones; an rctl and kstats. This thread uses the rctl value
+ * to obtain the cap and works with the zone kernel code to update the kstats.
+ * If the implementation ever moves into the kernel, these exposed interfaces
+ * do not need to change.
+ *
+ * The thread adaptively sleeps, periodically checking the state of the
+ * zone. As the zone's rss gets closer to the cap, the thread will wake up
+ * more often to check the zone's status. Once the zone is over the cap,
+ * the thread will work to pageout until the zone is under the cap, as shown
+ * by updated vm_usage data.
+ *
+ * NOTE: The pagedata page maps (at least on x86) are not useful. Those flags
+ * are set by hrm_setbits() and on x86 that code path is only executed by
+ * segvn_pagelock -> hat_setstat -> hrm_setbits
+ * segvn_softunlock -^
+ * On SPARC there is an additional code path which may make this data
+ * useful (sfmmu_ttesync), but since it is not generic, we ignore the page
+ * maps. If we ever fix this issue, then we could generalize this mcap code to
+ * do more with the data on active pages.
+ *
+ * For debugging, touch the file {zonepath}/mcap_debug.log. This will
+ * cause the thread to start logging its actions into that file (it may take
+ * a minute or two if the thread is currently sleeping). Removing that
+ * file will cause logging to stop.
+ */
+
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libproc.h>
+#include <limits.h>
+#include <procfs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/priocntl.h>
+#include <dirent.h>
+#include <zone.h>
+#include <libzonecfg.h>
+#include <thread.h>
+#include <values.h>
+#include <sys/vm_usage.h>
+#include <sys/resource.h>
+#include <sys/debug.h>
+#include <synch.h>
+#include <wait.h>
+#include <libcontract.h>
+#include <libcontract_priv.h>
+#include <sys/contract/process.h>
+#include "zoneadmd.h"
+
+ /* round up to next y = 2^n */
+#define ROUNDUP(x, y) (((x) + ((y) - 1)) & ~((y) - 1))
+
+#define CAP_REFRESH ((uint64_t)300 * NANOSEC) /* every 5 minutes */
+
+/*
+ * zonecfg attribute tunables for memory capping.
+ * phys-mcap-cmd
+ * type: string
+ * specifies a command that can be run when over the cap
+ * phys-mcap-no-vmusage
+ * type: boolean
+ * true disables vm_getusage and just uses zone's proc. rss sum
+ * phys-mcap-no-pageout
+ * type: boolean
+ * true disables pageout when over
+ * phys-mcap-no-pf-throttle
+ * type: boolean
+ * true disables page fault throttling when over
+ */
+#define TUNE_CMD "phys-mcap-cmd"
+#define TUNE_NVMU "phys-mcap-no-vmusage"
+#define TUNE_NPAGE "phys-mcap-no-pageout"
+#define TUNE_NPFTHROT "phys-mcap-no-pf-throttle"
+
+/*
+ * These are only used in get_mem_info but global. We always need scale_rss and
+ * prev_fast_rss to be persistent but we also have the other two global so we
+ * can easily see these with mdb.
+ */
+uint64_t scale_rss = 0;
+uint64_t prev_fast_rss = 0;
+uint64_t fast_rss = 0;
+uint64_t accurate_rss = 0;
+
+static char zonename[ZONENAME_MAX];
+static char zonepath[MAXPATHLEN];
+static char zoneproc[MAXPATHLEN];
+static char debug_log[MAXPATHLEN];
+static zoneid_t zid;
+static mutex_t shutdown_mx;
+static cond_t shutdown_cv;
+static int shutting_down = 0;
+static thread_t mcap_tid;
+static FILE *debug_log_fp = NULL;
+static uint64_t zone_rss_cap; /* RSS cap(KB) */
+static char over_cmd[2 * BUFSIZ]; /* same size as zone_attr_value */
+static boolean_t skip_vmusage = B_FALSE;
+static boolean_t skip_pageout = B_FALSE;
+static boolean_t skip_pf_throttle = B_FALSE;
+
+static zlog_t *logp;
+
+static int64_t check_suspend();
+static void get_mcap_tunables();
+
+/*
+ * Structure to hold current state about a process address space that we're
+ * working on.
+ */
+typedef struct {
+ int pr_curr; /* the # of the mapping we're working on */
+ int pr_nmap; /* number of mappings in address space */
+ prmap_t *pr_mapp; /* process's map array */
+} proc_map_t;
+
+typedef struct zsd_vmusage64 {
+ id_t vmu_zoneid;
+ uint_t vmu_type;
+ id_t vmu_id;
+ /*
+ * An amd64 kernel will align the following uint64_t members, but a
+ * 32bit i386 process will not without help.
+ */
+ int vmu_align_next_members_on_8_bytes;
+ uint64_t vmu_rss_all;
+ uint64_t vmu_rss_private;
+ uint64_t vmu_rss_shared;
+ uint64_t vmu_swap_all;
+ uint64_t vmu_swap_private;
+ uint64_t vmu_swap_shared;
+} zsd_vmusage64_t;
+
+/*
+ * Output a debug log message.
+ */
+/*PRINTFLIKE1*/
+static void
+debug(char *fmt, ...)
+{
+ va_list ap;
+
+ if (debug_log_fp == NULL)
+ return;
+
+ va_start(ap, fmt);
+ (void) vfprintf(debug_log_fp, fmt, ap);
+ va_end(ap);
+ (void) fflush(debug_log_fp);
+}
+
+/*
+ * Like sleep(3C) but can be interupted by cond_signal which is posted when
+ * we're shutting down the mcap thread.
+ */
+static void
+sleep_shutdown(int secs)
+{
+ timestruc_t to;
+
+ to.tv_sec = secs;
+ to.tv_nsec = 0;
+
+ (void) mutex_lock(&shutdown_mx);
+ if (!shutting_down)
+ (void) cond_reltimedwait(&shutdown_cv, &shutdown_mx, &to);
+ (void) mutex_unlock(&shutdown_mx);
+}
+
+static boolean_t
+proc_issystem(pid_t pid)
+{
+ char pc_clname[PC_CLNMSZ];
+
+ if (priocntl(P_PID, pid, PC_GETXPARMS, NULL, PC_KY_CLNAME, pc_clname,
+ PC_KY_NULL) != -1)
+ return (strcmp(pc_clname, "SYS") == 0);
+
+ return (B_TRUE);
+}
+
+/*
+ * Fork a child that enters the zone and runs the "phys-mcap-cmd" command.
+ */
+static void
+run_over_cmd()
+{
+ int ctfd;
+ int err;
+ pid_t childpid;
+ siginfo_t info;
+ ctid_t ct;
+
+ /*
+ * Before we enter the zone, we need to create a new process contract
+ * for the child, as required by zone_enter().
+ */
+ if ((ctfd = open64("/system/contract/process/template", O_RDWR)) == -1)
+ return;
+ if (ct_tmpl_set_critical(ctfd, 0) != 0 ||
+ ct_tmpl_set_informative(ctfd, 0) != 0 ||
+ ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR) != 0 ||
+ ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY) != 0 ||
+ ct_tmpl_activate(ctfd) != 0) {
+ (void) close(ctfd);
+ return;
+ }
+
+ childpid = fork();
+ switch (childpid) {
+ case -1:
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ break;
+ case 0: /* Child */
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ if (zone_enter(zid) == -1)
+ _exit(errno);
+ err = system(over_cmd);
+ _exit(err);
+ break;
+ default: /* Parent */
+ if (contract_latest(&ct) == -1)
+ ct = -1;
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ err = waitid(P_PID, childpid, &info, WEXITED);
+ (void) contract_abandon_id(ct);
+ if (err == -1 || info.si_status != 0)
+ debug("over_cmd failed");
+ break;
+ }
+}
+
+/*
+ * Get the next mapping.
+ */
+static prmap_t *
+nextmapping(proc_map_t *pmp)
+{
+ if (pmp->pr_mapp == NULL || pmp->pr_curr >= pmp->pr_nmap)
+ return (NULL);
+
+ return (&pmp->pr_mapp[pmp->pr_curr++]);
+}
+
+/*
+ * Initialize the proc_map_t to access the first mapping of an address space.
+ */
+static prmap_t *
+init_map(proc_map_t *pmp, pid_t pid)
+{
+ int fd;
+ int res;
+ struct stat st;
+ char pathbuf[MAXPATHLEN];
+
+ bzero(pmp, sizeof (proc_map_t));
+ pmp->pr_nmap = -1;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/map", zoneproc, pid);
+ if ((fd = open(pathbuf, O_RDONLY, 0)) < 0)
+ return (NULL);
+
+redo:
+ errno = 0;
+ if (fstat(fd, &st) != 0)
+ goto done;
+
+ if ((pmp->pr_mapp = malloc(st.st_size)) == NULL) {
+ debug("cannot malloc() %ld bytes for xmap", st.st_size);
+ goto done;
+ }
+ (void) bzero(pmp->pr_mapp, st.st_size);
+
+ errno = 0;
+ if ((res = pread(fd, pmp->pr_mapp, st.st_size, 0)) != st.st_size) {
+ free(pmp->pr_mapp);
+ pmp->pr_mapp = NULL;
+ if (res > 0 || errno == E2BIG) {
+ goto redo;
+ } else {
+ debug("pid %ld cannot read xmap\n", pid);
+ goto done;
+ }
+ }
+
+ pmp->pr_nmap = st.st_size / sizeof (prmap_t);
+
+done:
+ (void) close(fd);
+ return (nextmapping(pmp));
+}
+
+/*
+ * Attempt to invalidate the entire mapping from within the given process's
+ * address space. May return nonzero with errno as:
+ * ESRCH - process not found
+ * ENOMEM - segment not found
+ * EINVAL - mapping exceeds a single segment
+ */
+static int
+pageout_mapping(pid_t pid, prmap_t *pmp)
+{
+ int res;
+
+ if (pmp->pr_mflags & MA_ISM || pmp->pr_mflags & MA_SHM)
+ return (0);
+
+ errno = 0;
+ res = syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, pmp->pr_vaddr,
+ pmp->pr_size);
+
+ return (res);
+}
+
+/*
+ * Work through a process paging out mappings until the whole address space was
+ * examined or the excess is < 0. Return our estimate of the updated excess.
+ */
+static int64_t
+pageout_process(pid_t pid, int64_t excess)
+{
+ int psfd;
+ prmap_t *pmap;
+ proc_map_t cur;
+ int res;
+ int64_t sum_d_rss, d_rss;
+ int64_t old_rss;
+ int map_cnt;
+ psinfo_t psinfo;
+ char pathbuf[MAXPATHLEN];
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo", zoneproc,
+ pid);
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) < 0)
+ return (excess);
+
+ cur.pr_mapp = NULL;
+
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) != sizeof (psinfo))
+ goto done;
+
+ old_rss = (int64_t)psinfo.pr_rssize;
+ map_cnt = 0;
+
+ /* If unscannable, skip it. */
+ if (psinfo.pr_nlwp == 0 || proc_issystem(pid)) {
+ debug("pid %ld: system process, skipping %s\n",
+ pid, psinfo.pr_psargs);
+ goto done;
+ }
+
+ /* If tiny RSS (16KB), skip it. */
+ if (old_rss <= 16) {
+ debug("pid %ld: skipping, RSS %lldKB %s\n",
+ pid, old_rss, psinfo.pr_psargs);
+ goto done;
+ }
+
+ /* Get segment residency information. */
+ pmap = init_map(&cur, pid);
+
+ /* Skip process if it has no mappings. */
+ if (pmap == NULL) {
+ debug("pid %ld: map unreadable; ignoring\n", pid);
+ goto done;
+ }
+
+ debug("pid %ld: nmap %d sz %dKB rss %lldKB %s\n",
+ pid, cur.pr_nmap, psinfo.pr_size, old_rss, psinfo.pr_psargs);
+
+ /*
+ * Within the process's address space, attempt to page out mappings.
+ */
+ sum_d_rss = 0;
+ while (excess > 0 && pmap != NULL && !shutting_down) {
+ /* invalidate the entire mapping */
+ if ((res = pageout_mapping(pid, pmap)) < 0)
+ debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
+ pid, pmap->pr_vaddr, pmap->pr_size / 1024, errno);
+
+ map_cnt++;
+
+ /*
+ * Re-check the process rss and get the delta.
+ */
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0)
+ != sizeof (psinfo)) {
+ excess -= old_rss;
+ goto done;
+ }
+
+ d_rss = (int64_t)psinfo.pr_rssize - old_rss;
+ old_rss = (int64_t)psinfo.pr_rssize;
+ sum_d_rss += d_rss;
+
+ /*
+ * d_rss hopefully should be negative (or 0 if nothing
+ * invalidated) but can be positive if more got paged in.
+ */
+ excess += d_rss;
+
+ if (excess <= 0) {
+ debug("pid %ld: (part.) nmap %d delta_rss %lldKB "
+ "excess %lldKB\n", pid, map_cnt,
+ (unsigned long long)sum_d_rss, (long long)excess);
+ map_cnt = 0;
+
+ /*
+ * If we're actually under, this will suspend checking
+ * in the middle of this process's address space.
+ */
+ excess = check_suspend();
+ if (shutting_down)
+ goto done;
+
+ /*
+ * since we might have suspended, re-read process's rss
+ */
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0)
+ != sizeof (psinfo)) {
+ excess -= old_rss;
+ goto done;
+ }
+
+ old_rss = (int64_t)psinfo.pr_rssize;
+
+ debug("pid %ld: resume pageout; excess %lld\n", pid,
+ (long long)excess);
+ sum_d_rss = 0;
+ }
+
+ pmap = nextmapping(&cur);
+ }
+
+ debug("pid %ld: nmap %d delta_rss %lldKB excess %lldKB\n",
+ pid, map_cnt, (unsigned long long)sum_d_rss, (long long)excess);
+
+done:
+ if (cur.pr_mapp != NULL)
+ free(cur.pr_mapp);
+
+ (void) close(psfd);
+
+ if (shutting_down)
+ return (0);
+
+ return (excess);
+}
+
+/*
+ * Get the zone's RSS data.
+ */
+static uint64_t
+get_mem_info()
+{
+ uint64_t n = 1;
+ zsd_vmusage64_t buf;
+ uint64_t tmp_rss;
+ DIR *pdir = NULL;
+ struct dirent *dent;
+
+ /*
+ * Start by doing the fast, cheap RSS calculation using the rss value
+ * in psinfo_t. Because that's per-process, it can lead to double
+ * counting some memory and overestimating how much is being used, but
+ * as long as that's not over the cap, then we don't need do the
+ * expensive calculation.
+ *
+ * If we have to do the expensive calculation, we remember the scaling
+ * factor so that we can try to use that on subsequent iterations for
+ * the fast rss.
+ */
+ if (shutting_down)
+ return (0);
+
+ if ((pdir = opendir(zoneproc)) == NULL)
+ return (0);
+
+ accurate_rss = 0;
+ fast_rss = 0;
+ while (!shutting_down && (dent = readdir(pdir)) != NULL) {
+ pid_t pid;
+ int psfd;
+ int64_t rss;
+ char pathbuf[MAXPATHLEN];
+ psinfo_t psinfo;
+
+ if (strcmp(".", dent->d_name) == 0 ||
+ strcmp("..", dent->d_name) == 0)
+ continue;
+
+ pid = atoi(dent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo",
+ zoneproc, pid);
+
+ rss = 0;
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) != -1) {
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) ==
+ sizeof (psinfo))
+ rss = (int64_t)psinfo.pr_rssize;
+
+ (void) close(psfd);
+ }
+
+ fast_rss += rss;
+ }
+
+ (void) closedir(pdir);
+
+ if (shutting_down)
+ return (0);
+
+ debug("fast rss: %lluKB, scale: %llu, prev: %lluKB\n", fast_rss,
+ scale_rss, prev_fast_rss);
+
+ /* see if we can get by with a scaled fast rss */
+ tmp_rss = fast_rss;
+ if (scale_rss > 1 && prev_fast_rss > 0) {
+ /*
+ * Only scale the fast value if it hasn't ballooned too much
+ * to trust.
+ */
+ if (fast_rss / prev_fast_rss < 2) {
+ fast_rss /= scale_rss;
+ debug("scaled fast rss: %lluKB\n", fast_rss);
+ }
+ }
+
+ if (fast_rss <= zone_rss_cap || skip_vmusage) {
+ uint64_t zone_rss_bytes;
+
+ zone_rss_bytes = fast_rss * 1024;
+ /* Use the zone's approx. RSS in the kernel */
+ (void) zone_setattr(zid, ZONE_ATTR_RSS, &zone_rss_bytes, 0);
+ return (fast_rss);
+ }
+
+ buf.vmu_id = zid;
+
+ /* get accurate usage (cached data may be up to 5 seconds old) */
+ if (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, VMUSAGE_A_ZONE, 5,
+ (uintptr_t)&buf, (uintptr_t)&n) != 0) {
+ debug("vmusage failed\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ if (n > 1) {
+ /* This should never happen */
+ debug("vmusage returned more than one result\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ if (buf.vmu_id != zid) {
+ /* This should never happen */
+ debug("vmusage returned the incorrect zone\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ accurate_rss = buf.vmu_rss_all / 1024;
+
+ /* calculate scaling factor to use for fast_rss from now on */
+ if (accurate_rss > 0) {
+ scale_rss = fast_rss / accurate_rss;
+ debug("new scaling factor: %llu\n", scale_rss);
+ /* remember the fast rss when we had to get the accurate rss */
+ prev_fast_rss = tmp_rss;
+ }
+
+ debug("accurate rss: %lluKB, scale: %llu, prev: %lluKB\n", accurate_rss,
+ scale_rss, prev_fast_rss);
+ return (accurate_rss);
+}
+
+/*
+ * Needed to read the zones physical-memory-cap rctl.
+ */
+static struct ps_prochandle *
+grab_zone_proc()
+{
+ DIR *dirp;
+ struct dirent *dentp;
+ struct ps_prochandle *ph = NULL;
+ int tmp;
+
+ if ((dirp = opendir(zoneproc)) == NULL)
+ return (NULL);
+
+ while (!shutting_down && (dentp = readdir(dirp))) {
+ int pid;
+
+ if (strcmp(".", dentp->d_name) == 0 ||
+ strcmp("..", dentp->d_name) == 0)
+ continue;
+
+ pid = atoi(dentp->d_name);
+ /* attempt to grab process */
+ if ((ph = Pgrab(pid, 0, &tmp)) != NULL) {
+ if (Psetflags(ph, PR_RLC) == 0) {
+ if (Pcreate_agent(ph) == 0) {
+ (void) closedir(dirp);
+ return (ph);
+ }
+ }
+ Prelease(ph, 0);
+ }
+ }
+
+ (void) closedir(dirp);
+ return (NULL);
+}
+
+static uint64_t
+get_zone_cap()
+{
+ rctlblk_t *rblk;
+ uint64_t mcap;
+ struct ps_prochandle *ph;
+
+ if ((rblk = (rctlblk_t *)malloc(rctlblk_size())) == NULL)
+ return (UINT64_MAX);
+
+ if ((ph = grab_zone_proc()) == NULL) {
+ free(rblk);
+ return (UINT64_MAX);
+ }
+
+ if (pr_getrctl(ph, "zone.max-physical-memory", NULL, rblk,
+ RCTL_FIRST)) {
+ Pdestroy_agent(ph);
+ Prelease(ph, 0);
+ free(rblk);
+ return (UINT64_MAX);
+ }
+
+ Pdestroy_agent(ph);
+ Prelease(ph, 0);
+
+ mcap = rctlblk_get_value(rblk);
+ free(rblk);
+ return (mcap);
+}
+
+/*
+ * check_suspend is invoked at the beginning of every pass through the process
+ * list or after we've paged out enough so that we think the excess is under
+ * the cap. The purpose is to periodically check the zone's rss and return
+ * the excess when the zone is over the cap. The rest of the time this
+ * function will sleep, periodically waking up to check the current rss.
+ *
+ * Depending on the percentage of penetration of the zone's rss into the
+ * cap we sleep for longer or shorter amounts. This reduces the impact of this
+ * work on the system, which is important considering that each zone will be
+ * monitoring its rss.
+ */
+static int64_t
+check_suspend()
+{
+ static hrtime_t last_cap_read = 0;
+ static uint64_t addon;
+ static uint64_t lo_thresh; /* Thresholds for how long to sleep */
+ static uint64_t hi_thresh; /* when under the cap (80% & 90%). */
+ static uint64_t prev_zone_rss = 0;
+ static uint32_t pfdelay = 0; /* usec page fault delay when over */
+
+ /* Wait a second to give the async pageout a chance to catch up. */
+ (void) sleep_shutdown(1);
+
+ while (!shutting_down) {
+ int64_t new_excess;
+ int sleep_time;
+ hrtime_t now;
+ struct stat st;
+ uint64_t zone_rss; /* total RSS(KB) */
+
+ /*
+ * Check if the debug log files exists and enable or disable
+ * debug.
+ */
+ if (debug_log_fp == NULL) {
+ if (stat(debug_log, &st) == 0)
+ debug_log_fp = fopen(debug_log, "w");
+ } else {
+ if (stat(debug_log, &st) == -1) {
+ (void) fclose(debug_log_fp);
+ debug_log_fp = NULL;
+ }
+ }
+
+ /*
+ * If the CAP_REFRESH interval has passed, re-get the current
+ * cap in case it has been dynamically updated.
+ */
+ now = gethrtime();
+ if (now - last_cap_read > CAP_REFRESH) {
+ uint64_t mcap;
+
+ last_cap_read = now;
+
+ mcap = get_zone_cap();
+ if (mcap != 0 && mcap != UINT64_MAX)
+ zone_rss_cap = ROUNDUP(mcap, 1024) / 1024;
+ else
+ zone_rss_cap = UINT64_MAX;
+
+ lo_thresh = (uint64_t)(zone_rss_cap * .8);
+ hi_thresh = (uint64_t)(zone_rss_cap * .9);
+ addon = (uint64_t)(zone_rss_cap * 0.05);
+
+ /*
+ * We allow the memory cap tunables to be changed on
+ * the fly.
+ */
+ get_mcap_tunables();
+
+ debug("%s: %s\n", TUNE_CMD, over_cmd);
+ debug("%s: %d\n", TUNE_NVMU, skip_vmusage);
+ debug("%s: %d\n", TUNE_NPAGE, skip_pageout);
+ debug("%s: %d\n", TUNE_NPFTHROT, skip_pf_throttle);
+ debug("current cap %lluKB lo %lluKB hi %lluKB\n",
+ zone_rss_cap, lo_thresh, hi_thresh);
+ }
+
+ /* No cap, nothing to do. */
+ if (zone_rss_cap == 0 || zone_rss_cap == UINT64_MAX) {
+ debug("no cap, sleep 120 seconds\n");
+ (void) sleep_shutdown(120);
+ continue;
+ }
+
+ zone_rss = get_mem_info();
+
+ /* calculate excess */
+ new_excess = zone_rss - zone_rss_cap;
+
+ debug("rss %lluKB, cap %lluKB, excess %lldKB\n",
+ zone_rss, zone_rss_cap, new_excess);
+
+ /*
+ * If necessary, updates stats.
+ */
+
+ /*
+ * If it looks like we did some paging out since last over the
+ * cap then update the kstat so we can approximate how much was
+ * paged out.
+ */
+ if (prev_zone_rss > zone_rss_cap && zone_rss < prev_zone_rss) {
+ uint64_t diff;
+
+ /* assume diff is num bytes we paged out */
+ diff = (prev_zone_rss - zone_rss) * 1024;
+
+ (void) zone_setattr(zid, ZONE_ATTR_PMCAP_PAGEOUT,
+ &diff, 0);
+ }
+ prev_zone_rss = zone_rss;
+
+ if (new_excess > 0) {
+ uint64_t n = 1;
+
+ /* Increment "nover" kstat. */
+ (void) zone_setattr(zid, ZONE_ATTR_PMCAP_NOVER, &n, 0);
+
+ if (!skip_pf_throttle) {
+ /*
+ * Tell the kernel to start throttling page
+ * faults by some number of usecs to help us
+ * catch up. If we are persistently over the
+ * cap the delay ramps up to a max of 2000usecs.
+ * Note that for delays less than 1 tick
+ * (i.e. all of these) we busy-wait in as_fault.
+ * delay faults/sec
+ * 125 8000
+ * 250 4000
+ * 500 2000
+ * 1000 1000
+ * 2000 500
+ */
+ if (pfdelay == 0)
+ pfdelay = 125;
+ else if (pfdelay < 2000)
+ pfdelay *= 2;
+
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY,
+ &pfdelay, 0);
+ }
+
+ /*
+ * Once we go over the cap, then we want to
+ * page out a little extra instead of stopping
+ * right at the cap. To do this we add 5% to
+ * the excess so that pageout_proces will work
+ * a little longer before stopping.
+ */
+ return ((int64_t)(new_excess + addon));
+ }
+
+ /*
+ * At this point we are under the cap.
+ *
+ * Tell the kernel to stop throttling page faults.
+ *
+ * Scale the amount of time we sleep before rechecking the
+ * zone's memory usage. Also, scale the accpetable age of
+ * cached results from vm_getusage. We do this based on the
+ * penetration into the capped limit.
+ */
+ if (pfdelay > 0) {
+ pfdelay = 0;
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY,
+ &pfdelay, 0);
+ }
+
+ if (zone_rss <= lo_thresh) {
+ sleep_time = 120;
+ } else if (zone_rss <= hi_thresh) {
+ sleep_time = 60;
+ } else {
+ sleep_time = 30;
+ }
+
+ debug("sleep %d seconds\n", sleep_time);
+ (void) sleep_shutdown(sleep_time);
+ }
+
+ /* Shutting down, tell the kernel so it doesn't throttle */
+ if (pfdelay > 0) {
+ pfdelay = 0;
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY, &pfdelay, 0);
+ }
+
+ return (0);
+}
+
+static void
+get_mcap_tunables()
+{
+ zone_dochandle_t handle;
+ struct zone_attrtab attr;
+
+ over_cmd[0] = '\0';
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return;
+
+ if (zonecfg_get_handle(zonename, handle) != Z_OK)
+ goto done;
+
+ /* Reset to defaults in case rebooting and settings have changed */
+ over_cmd[0] = '\0';
+ skip_vmusage = B_FALSE;
+ skip_pageout = B_FALSE;
+ skip_pf_throttle = B_FALSE;
+
+ if (zonecfg_setattrent(handle) != Z_OK)
+ goto done;
+ while (zonecfg_getattrent(handle, &attr) == Z_OK) {
+ if (strcmp(TUNE_CMD, attr.zone_attr_name) == 0) {
+ (void) strlcpy(over_cmd, attr.zone_attr_value,
+ sizeof (over_cmd));
+ } else if (strcmp(TUNE_NVMU, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_vmusage = B_TRUE;
+ } else if (strcmp(TUNE_NPAGE, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_pageout = B_TRUE;
+ } else if (strcmp(TUNE_NPFTHROT, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_pf_throttle = B_TRUE;
+ }
+ }
+ (void) zonecfg_endattrent(handle);
+
+done:
+ zonecfg_fini_handle(handle);
+}
+
+/* ARGSUSED */
+static int
+chk_proc_fs(void *data, const char *spec, const char *dir,
+ const char *fstype, const char *opt)
+{
+ if (fstype != NULL && strcmp(fstype, "proc") == 0)
+ *((boolean_t *)data) = B_TRUE;
+
+ return (0);
+}
+
+static boolean_t
+has_proc()
+{
+ brand_handle_t bh;
+ boolean_t fnd = B_FALSE;
+
+ if ((bh = brand_open(brand_name)) != NULL) {
+ (void) brand_platform_iter_mounts(bh, chk_proc_fs, &fnd);
+ }
+
+ brand_close(bh);
+ return (fnd);
+}
+
+/*
+ * We run this loop for brands with no /proc to simply update the RSS, using
+ * the cheap GZ /proc data, every 5 minutes.
+ */
+static void
+no_procfs()
+{
+ DIR *pdir = NULL;
+ struct dirent *dent;
+ uint64_t zone_rss_bytes;
+
+ (void) sleep_shutdown(30);
+ while (!shutting_down) {
+ /*
+ * Just do the fast, cheap RSS calculation using the rss value
+ * in psinfo_t. Because that's per-process, it can lead to
+ * double counting some memory and overestimating how much is
+ * being used. Since there is no /proc in the zone, we use the
+ * GZ /proc and check for the correct zone.
+ */
+ if ((pdir = opendir("/proc")) == NULL)
+ return;
+
+ fast_rss = 0;
+ while (!shutting_down && (dent = readdir(pdir)) != NULL) {
+ pid_t pid;
+ int psfd;
+ int64_t rss;
+ char pathbuf[MAXPATHLEN];
+ psinfo_t psinfo;
+
+ if (strcmp(".", dent->d_name) == 0 ||
+ strcmp("..", dent->d_name) == 0)
+ continue;
+
+ pid = atoi(dent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf),
+ "/proc/%d/psinfo", pid);
+
+ rss = 0;
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) != -1) {
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) ==
+ sizeof (psinfo)) {
+ if (psinfo.pr_zoneid == zid)
+ rss = (int64_t)psinfo.pr_rssize;
+ }
+
+ (void) close(psfd);
+ }
+
+ fast_rss += rss;
+ }
+
+ (void) closedir(pdir);
+
+ if (shutting_down)
+ return;
+
+ zone_rss_bytes = fast_rss * 1024;
+ /* Use the zone's approx. RSS in the kernel */
+ (void) zone_setattr(zid, ZONE_ATTR_RSS, &zone_rss_bytes, 0);
+
+ (void) sleep_shutdown(300);
+ }
+}
+
+/*
+ * Thread that checks zone's memory usage and when over the cap, goes through
+ * the zone's process list trying to pageout processes to get under the cap.
+ */
+static void
+mcap_zone()
+{
+ DIR *pdir = NULL;
+ int64_t excess;
+
+ debug("thread startup\n");
+
+ get_mcap_tunables();
+
+ /*
+ * If the zone has no /proc filesystem, we can't use the fast algorithm
+ * to check RSS or pageout any processes. All we can do is periodically
+ * update it's RSS kstat using the expensive sycall.
+ */
+ if (!has_proc()) {
+ no_procfs();
+ debug("thread shutdown\n");
+ return;
+ }
+
+ /*
+ * When first starting it is likely lots of other zones are starting
+ * too because the system is booting. Since we just started the zone
+ * we're not worried about being over the cap right away, so we let
+ * things settle a bit and tolerate some older data here to minimize
+ * the load on the system.
+ */
+ (void) sleep_shutdown(15); /* wait 15 secs. so the zone can get going */
+
+ /* Wait until zone's /proc is mounted */
+ while (!shutting_down) {
+ struct stat st;
+
+ if (stat(zoneproc, &st) == 0 &&
+ strcmp(st.st_fstype, "proc") == 0)
+ break;
+ sleep_shutdown(5);
+ }
+
+ /* Open zone's /proc and walk entries. */
+ while (!shutting_down) {
+ if ((pdir = opendir(zoneproc)) != NULL)
+ break;
+ sleep_shutdown(5);
+ }
+
+ while (!shutting_down) {
+ struct dirent *dirent;
+
+ /* Wait until we've gone over the cap. */
+ excess = check_suspend();
+
+ debug("starting to scan, excess %lldk\n", (long long)excess);
+
+ if (over_cmd[0] != '\0') {
+ uint64_t zone_rss; /* total RSS(KB) */
+
+ debug("run phys_mcap_cmd: %s\n", over_cmd);
+ run_over_cmd();
+
+ zone_rss = get_mem_info();
+ excess = zone_rss - zone_rss_cap;
+ debug("rss %lluKB, cap %lluKB, excess %lldKB\n",
+ zone_rss, zone_rss_cap, excess);
+ if (excess <= 0)
+ continue;
+ }
+
+ while (!shutting_down && (dirent = readdir(pdir)) != NULL) {
+ pid_t pid;
+
+ if (strcmp(".", dirent->d_name) == 0 ||
+ strcmp("..", dirent->d_name) == 0)
+ continue;
+
+ pid = atoi(dirent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ if (skip_pageout)
+ (void) sleep_shutdown(2);
+ else
+ excess = pageout_process(pid, excess);
+
+ if (excess <= 0) {
+ debug("apparently under; excess %lld\n",
+ (long long)excess);
+ /* Double check the current excess */
+ excess = check_suspend();
+ }
+ }
+
+ debug("process pass done; excess %lld\n", (long long)excess);
+ rewinddir(pdir);
+
+ if (skip_pageout)
+ (void) sleep_shutdown(120);
+ }
+
+ if (pdir != NULL)
+ (void) closedir(pdir);
+ debug("thread shutdown\n");
+}
+
+void
+create_mcap_thread(zlog_t *zlogp, zoneid_t id)
+{
+ int res;
+ char brandname[MAXNAMELEN];
+
+ shutting_down = 0;
+ zid = id;
+ logp = zlogp;
+ (void) getzonenamebyid(zid, zonename, sizeof (zonename));
+
+ if (zone_get_zonepath(zonename, zonepath, sizeof (zonepath)) != 0)
+ zerror(zlogp, B_FALSE, "zone %s missing zonepath", zonename);
+
+ brandname[0] = '\0';
+ if (zone_get_brand(zonename, brandname, sizeof (brandname)) != 0)
+ zerror(zlogp, B_FALSE, "zone %s missing brand", zonename);
+
+ /* all but the lx brand currently use /proc */
+ if (strcmp(brandname, "lx") == 0) {
+ (void) snprintf(zoneproc, sizeof (zoneproc),
+ "%s/root/native/proc", zonepath);
+ } else {
+ (void) snprintf(zoneproc, sizeof (zoneproc), "%s/root/proc",
+ zonepath);
+ }
+
+ (void) snprintf(debug_log, sizeof (debug_log), "%s/mcap_debug.log",
+ zonepath);
+
+ res = thr_create(NULL, NULL, (void *(*)(void *))mcap_zone, NULL, NULL,
+ &mcap_tid);
+ if (res != 0) {
+ zerror(zlogp, B_FALSE, "error %d creating memory cap thread",
+ res);
+ mcap_tid = 0;
+ }
+}
+
+void
+destroy_mcap_thread()
+{
+ if (mcap_tid != 0) {
+ shutting_down = 1;
+ (void) cond_signal(&shutdown_cv);
+ (void) thr_join(mcap_tid, NULL, NULL);
+ mcap_tid = 0;
+ }
+}
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index b9954b81b3..e63f87d2a0 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -137,6 +137,9 @@
#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
+/* Number of times to retry unmounting if it fails */
+#define UMOUNT_RETRIES 30
+
/* a reasonable estimate for the number of lwps per process */
#define LWPS_PER_PROCESS 10
@@ -165,6 +168,7 @@ extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/* from zoneadmd */
extern char query_hook[];
+extern char post_statechg_hook[];
/*
* For each "net" resource configured in zonecfg, we track a zone_addr_list_t
@@ -201,7 +205,7 @@ autofs_cleanup(zoneid_t zoneid)
/*
* Ask autofs to unmount all trigger nodes in the given zone.
*/
- return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
+ return (_autofssys(AUTOFS_UNMOUNTALL, (void *)((uintptr_t)zoneid)));
}
static void
@@ -592,6 +596,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
}
/*
+ * Perform brand-specific cleanup if we are unable to unmount a FS.
+ */
+static void
+brand_umount_cleanup(zlog_t *zlogp, char *path)
+{
+ char cmdbuf[2 * MAXPATHLEN];
+
+ if (post_statechg_hook[0] == '\0')
+ return;
+
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
+ ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf))
+ return;
+
+ (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE);
+}
+
+/*
* The general strategy for unmounting filesystems is as follows:
*
* - Remote filesystems may be dead, and attempting to contact them as
@@ -624,6 +646,7 @@ static int
unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
{
int error = 0;
+ int fail = 0;
FILE *mnttab;
struct mnttab *mnts;
uint_t nmnt;
@@ -711,18 +734,39 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
if (umount2(path, MS_FORCE) == 0) {
unmounted = B_TRUE;
stuck = B_FALSE;
+ fail = 0;
} else {
/*
- * The first failure indicates a
- * mount we won't be able to get
- * rid of automatically, so we
- * bail.
+ * We may hit a failure here if there
+ * is an app in the GZ with an open
+ * pipe into the zone (commonly into
+ * the zone's /var/run). This type
+ * of app will notice the closed
+ * connection and cleanup, but it may
+ * take a while and we have no easy
+ * way to notice that. To deal with
+ * this case, we will wait and retry
+ * a few times before we give up.
*/
- error++;
- zerror(zlogp, B_FALSE,
- "unable to unmount '%s'", path);
- free_mnttable(mnts, nmnt);
- goto out;
+ fail++;
+ if (fail < (UMOUNT_RETRIES - 1)) {
+ zerror(zlogp, B_FALSE,
+ "unable to unmount '%s', "
+ "retrying in 2 seconds",
+ path);
+ (void) sleep(2);
+ } else if (fail > UMOUNT_RETRIES) {
+ error++;
+ zerror(zlogp, B_FALSE,
+ "unmount of '%s' failed",
+ path);
+ free_mnttable(mnts, nmnt);
+ goto out;
+ } else {
+ /* Try the hook 2 times */
+ brand_umount_cleanup(zlogp,
+ path);
+ }
}
}
/*
@@ -1060,23 +1104,10 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
int
vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
{
- zone_dochandle_t handle;
-
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_get_iptype(handle, iptypep) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, iptypep) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid ip-type configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
- zonecfg_fini_handle(handle);
return (0);
}
@@ -1089,14 +1120,13 @@ static int
mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
{
char brand[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
brand_handle_t bh = NULL;
struct zone_devtab ztab;
di_prof_t prof = NULL;
int err;
int retval = -1;
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
if (di_prof_init(devpath, &prof)) {
zerror(zlogp, B_TRUE, "failed to initialize profile");
@@ -1131,6 +1161,8 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
curr_iptype = "exclusive";
break;
}
+ if (curr_iptype == NULL)
+ abort();
if (brand_platform_iter_devices(bh, zone_name,
mount_one_dev_device_cb, prof, curr_iptype) != 0) {
@@ -1145,28 +1177,19 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
}
/* Add user-specified devices and directories */
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_FALSE, "can't initialize zone handle");
- goto cleanup;
- }
- if (err = zonecfg_get_handle(zone_name, handle)) {
- zerror(zlogp, B_FALSE, "can't get handle for zone "
- "%s: %s", zone_name, zonecfg_strerror(err));
- goto cleanup;
- }
- if (err = zonecfg_setdevent(handle)) {
+ if ((err = zonecfg_setdevent(snap_hndl)) != 0) {
zerror(zlogp, B_FALSE, "%s: %s", zone_name,
zonecfg_strerror(err));
goto cleanup;
}
- while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
+ while (zonecfg_getdevent(snap_hndl, &ztab) == Z_OK) {
if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
zerror(zlogp, B_TRUE, "failed to add "
"user-specified device");
goto cleanup;
}
}
- (void) zonecfg_enddevent(handle);
+ (void) zonecfg_enddevent(snap_hndl);
/* Send profile to kernel */
if (di_prof_commit(prof)) {
@@ -1179,8 +1202,6 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
cleanup:
if (bh != NULL)
brand_close(bh);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (prof)
di_prof_fini(prof);
return (retval);
@@ -1675,7 +1696,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
char luroot[MAXPATHLEN];
int i, num_fs = 0;
struct zone_fstab *fs_ptr = NULL;
- zone_dochandle_t handle = NULL;
zone_state_t zstate;
brand_handle_t bh;
plat_gmount_cb_data_t cb;
@@ -1699,12 +1719,7 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
goto bad;
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- goto bad;
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
- zonecfg_setfsent(handle) != Z_OK) {
+ if (zonecfg_setfsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
goto bad;
}
@@ -1722,7 +1737,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -1737,7 +1751,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
plat_gmount_cb, &cb) != 0) {
zerror(zlogp, B_FALSE, "unable to mount filesystems");
brand_close(bh);
- zonecfg_fini_handle(handle);
return (-1);
}
brand_close(bh);
@@ -1748,13 +1761,10 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
* higher level directories (e.g., /usr) get mounted before
* any beneath them (e.g., /usr/local).
*/
- if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs,
+ if (mount_filesystems_fsent(snap_hndl, zlogp, &fs_ptr, &num_fs,
mount_cmd) != 0)
goto bad;
- zonecfg_fini_handle(handle);
- handle = NULL;
-
/*
* Normally when we mount a zone all the zone filesystems
* get mounted relative to rootpath, which is usually
@@ -1833,8 +1843,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
return (0);
bad:
- if (handle != NULL)
- zonecfg_fini_handle(handle);
free_fs_data(fs_ptr, num_fs);
return (-1);
}
@@ -2190,13 +2198,7 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
/*
* Here, we know that the interface can't be brought up.
- * A similar warning message was already printed out to
- * the console by zoneadm(1M) so instead we log the
- * message to syslog and continue.
*/
- zerror(&logsys, B_TRUE, "WARNING: skipping network interface "
- "'%s' which may not be present/plumbed in the "
- "global zone.", lifr.lifr_name);
(void) close(s);
return (Z_OK);
}
@@ -2409,7 +2411,6 @@ bad:
static int
configure_shared_network_interfaces(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab, loopback_iftab;
zoneid_t zoneid;
@@ -2418,29 +2419,19 @@ configure_shared_network_interfaces(zlog_t *zlogp)
return (-1);
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_setnwifent(handle) == Z_OK) {
+ if (zonecfg_setnwifent(snap_hndl) == Z_OK) {
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
return (-1);
}
}
- (void) zonecfg_endnwifent(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
}
- zonecfg_fini_handle(handle);
if (is_system_labeled()) {
/*
* Labeled zones share the loopback interface
@@ -2894,7 +2885,6 @@ free_ip_interface(zone_addr_list_t *zalist)
static int
configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab;
char rootpath[MAXPATHLEN];
char path[MAXPATHLEN];
@@ -2903,30 +2893,18 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
boolean_t added = B_FALSE;
zone_addr_list_t *zalist = NULL, *new;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setnwifent(handle) != Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_setnwifent(snap_hndl) != Z_OK)
return (0);
- }
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (prof == NULL) {
if (zone_get_devroot(zone_name, rootpath,
sizeof (rootpath)) != Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"unable to determine dev root");
return (-1);
@@ -2934,8 +2912,7 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
(void) snprintf(path, sizeof (path), "%s%s", rootpath,
"/dev");
if (di_prof_init(path, &prof) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"failed to initialize profile");
return (-1);
@@ -2959,17 +2936,17 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
nwiftab.zone_nwif_physical) == 0) {
added = B_TRUE;
} else {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
- zerror(zlogp, B_TRUE, "failed to add network device");
- return (-1);
+ /*
+ * Failed to add network device, but the brand hook
+ * might be doing this for us, so keep silent.
+ */
+ continue;
}
/* set up the new IP interface, and add them all later */
new = malloc(sizeof (*new));
if (new == NULL) {
zerror(zlogp, B_TRUE, "no memory for %s",
nwiftab.zone_nwif_physical);
- zonecfg_fini_handle(handle);
free_ip_interface(zalist);
}
bzero(new, sizeof (*new));
@@ -2979,16 +2956,14 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
}
if (zalist != NULL) {
if ((errno = add_net(zlogp, zoneid, zalist)) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE, "failed to add address");
free_ip_interface(zalist);
return (-1);
}
free_ip_interface(zalist);
}
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
if (prof != NULL && added) {
if (di_prof_commit(prof) != 0) {
@@ -3124,48 +3099,23 @@ remove_datalink_protect(zlog_t *zlogp, zoneid_t zoneid)
/* datalink does not belong to the GZ */
continue;
}
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'protection' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
+
dlstatus = dladm_set_linkprop(dld_handle, *dllink,
"allowed-ips", NULL, 0, DLADM_OPT_ACTIVE);
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'allowed-ips' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
}
free(dllinks);
return (0);
}
static int
-unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
-{
- int dlnum = 0;
-
- /*
- * The kernel shutdown callback for the dls module should have removed
- * all datalinks from this zone. If any remain, then there's a
- * problem.
- */
- if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) {
- zerror(zlogp, B_TRUE, "unable to list network interfaces");
- return (-1);
- }
- if (dlnum != 0) {
- zerror(zlogp, B_FALSE,
- "datalinks remain in zone after shutdown");
- return (-1);
- }
- return (0);
-}
-
-static int
tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
{
@@ -3247,26 +3197,14 @@ static int
get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
{
int error = -1;
- zone_dochandle_t handle;
char *privname = NULL;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (ALT_MOUNT(mount_cmd)) {
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
- if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, &iptype) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine ip-type");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -3279,17 +3217,15 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
break;
}
- if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_default_privset(privs, curr_iptype) == Z_OK)
return (0);
- }
+
zerror(zlogp, B_FALSE,
"failed to determine the zone's default privilege set");
- zonecfg_fini_handle(handle);
return (-1);
}
- switch (zonecfg_get_privset(handle, privs, &privname)) {
+ switch (zonecfg_get_privset(snap_hndl, privs, &privname)) {
case Z_OK:
error = 0;
break;
@@ -3312,7 +3248,6 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
}
free(privname);
- zonecfg_fini_handle(handle);
return (error);
}
@@ -3325,7 +3260,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlist_t **nvlv = NULL;
int rctlcount = 0;
int error = -1;
- zone_dochandle_t handle;
struct zone_rctltab rctltab;
rctlblk_t *rctlblk = NULL;
uint64_t maxlwps;
@@ -3334,16 +3268,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
rctltab.zone_rctl_valptr = NULL;
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
@@ -3356,18 +3280,18 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
* max-processes property. If only the max-processes property is set,
* we add a max-lwps property with a limit derived from max-processes.
*/
- if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
+ if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs)
== Z_OK &&
- zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
+ zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps)
== Z_NO_ENTRY) {
- if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
maxprocs * LWPS_PER_PROCESS) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
goto out;
}
}
- if (zonecfg_setrctlent(handle) != Z_OK) {
+ if (zonecfg_setrctlent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
goto out;
}
@@ -3376,7 +3300,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
zerror(zlogp, B_TRUE, "memory allocation failed");
goto out;
}
- while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
+ while (zonecfg_getrctlent(snap_hndl, &rctltab) == Z_OK) {
struct zone_rctlvaltab *rctlval;
uint_t i, count;
const char *name = rctltab.zone_rctl_name;
@@ -3458,7 +3382,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlv = NULL;
rctlcount++;
}
- (void) zonecfg_endrctlent(handle);
+ (void) zonecfg_endrctlent(snap_hndl);
if (rctlcount == 0) {
error = 0;
@@ -3483,8 +3407,6 @@ out:
nvlist_free(nvl);
if (nvlv != NULL)
free(nvlv);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
return (error);
}
@@ -3500,7 +3422,7 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
> sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, retstr) != 0)
+ if (do_subproc(zlogp, cmdbuf, retstr, B_FALSE) != 0)
return (-1);
return (0);
@@ -3509,7 +3431,6 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
static int
get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
size_t total, offset, len;
int error = -1;
@@ -3520,30 +3441,20 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
zerror(zlogp, B_FALSE, "getting implicit datasets failed");
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
total = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK)
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK)
total += strlen(dstab.zone_dataset_name) + 1;
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_datasets != NULL)
implicit_len = strlen(implicit_datasets);
@@ -3560,12 +3471,12 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
offset = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
len = strlen(dstab.zone_dataset_name);
(void) strlcpy(str + offset, dstab.zone_dataset_name,
total - offset);
@@ -3573,7 +3484,7 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
if (offset < total - 1)
str[offset++] = ',';
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_len > 0)
(void) strlcpy(str + offset, implicit_datasets, total - offset);
@@ -3585,8 +3496,6 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
out:
if (error != 0 && str != NULL)
free(str);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (implicit_datasets != NULL)
free(implicit_datasets);
@@ -3596,40 +3505,26 @@ out:
static int
validate_datasets(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setdsent(handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
if ((hdl = libzfs_init()) == NULL) {
zerror(zlogp, B_FALSE, "opening ZFS library");
- zonecfg_fini_handle(handle);
return (-1);
}
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
ZFS_TYPE_FILESYSTEM)) == NULL) {
zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (-1);
}
@@ -3644,7 +3539,6 @@ validate_datasets(zlog_t *zlogp)
zerror(zlogp, B_FALSE, "cannot set 'zoned' "
"property for ZFS dataset '%s'\n",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
zfs_close(zhp);
libzfs_fini(hdl);
return (-1);
@@ -3652,9 +3546,8 @@ validate_datasets(zlog_t *zlogp)
zfs_close(zhp);
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (0);
@@ -4388,62 +4281,25 @@ duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
}
/*
- * Set memory cap and pool info for the zone's resource management
- * configuration.
+ * Set pool info for the zone's resource management configuration.
*/
static int
setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
{
int res;
uint64_t tmp;
- struct zone_mcaptab mcap;
char sched[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
char pool_err[128];
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
-
- if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (res);
- }
-
- /*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
- */
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
- char smf_err[128];
-
- num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(zlogp, B_TRUE, "could not set zone memory cap");
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
-
- if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
- zerror(zlogp, B_FALSE, "enabling system/rcap service "
- "failed: %s", smf_err);
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
- }
-
/* Get the scheduling class set in the zone configuration. */
- if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
+ if (zonecfg_get_sched_class(snap_hndl, sched, sizeof (sched)) == Z_OK &&
strlen(sched) > 0) {
if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
strlen(sched)) == -1)
zerror(zlogp, B_TRUE, "WARNING: unable to set the "
"default scheduling class");
- } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+ } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
== Z_OK) {
/*
* If the zone has the zone.cpu-shares rctl set then we want to
@@ -4454,7 +4310,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
*/
char class_name[PC_CLNMSZ];
- if (zonecfg_get_dflt_sched_class(handle, class_name,
+ if (zonecfg_get_dflt_sched_class(snap_hndl, class_name,
sizeof (class_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "WARNING: unable to determine "
"the zone's scheduling class");
@@ -4487,7 +4343,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
* right thing in all cases (reuse or create) based on the current
* zonecfg.
*/
- if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_tmp_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
@@ -4496,14 +4352,13 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
else
zerror(zlogp, B_FALSE, "could not bind zone to "
"temporary pool: %s", zonecfg_strerror(res));
- zonecfg_fini_handle(handle);
return (Z_POOL_BIND);
}
/*
* Check if we need to warn about poold not being enabled.
*/
- if (zonecfg_warn_poold(handle)) {
+ if (zonecfg_warn_poold(snap_hndl)) {
zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
"been specified\nbut the dynamic pool service is not "
"enabled.\nThe system will not dynamically adjust the\n"
@@ -4513,7 +4368,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* The following is a warning, not an error. */
- if ((res = zonecfg_bind_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
@@ -4527,10 +4382,9 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* Update saved pool name in case it has changed */
- (void) zonecfg_get_poolname(handle, zone_name, pool_name,
+ (void) zonecfg_get_poolname(snap_hndl, zone_name, pool_name,
sizeof (pool_name));
- zonecfg_fini_handle(handle);
return (Z_OK);
}
@@ -4631,33 +4485,28 @@ setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid)
}
static int
-setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid)
+setup_zone_attrs(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
int res = Z_OK;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
+ if ((res = setup_zone_hostid(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
- }
- if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK)
- goto out;
-
- if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_fs_allowed(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
out:
- zonecfg_fini_handle(handle);
return (res);
}
+/*
+ * The zone_did is a persistent debug ID. Each zone should have a unique ID
+ * in the kernel. This is used for things like DTrace which want to monitor
+ * zones across reboots. They can't use the zoneid since that changes on
+ * each boot.
+ */
zoneid_t
-vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
+vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zone_did)
{
zoneid_t rval = -1;
priv_set_t *privs;
@@ -4673,7 +4522,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
tsol_zcent_t *zcent = NULL;
int match = 0;
int doi = 0;
- int flags;
+ int flags = -1;
zone_iptype_t iptype;
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
@@ -4695,6 +4544,8 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
flags = ZCF_NET_EXCL;
break;
}
+ if (flags == -1)
+ abort();
if ((privs = priv_allocset()) == NULL) {
zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
@@ -4798,7 +4649,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
xerr = 0;
if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel,
- flags)) == -1) {
+ flags, zone_did)) == -1) {
if (xerr == ZE_AREMOUNTS) {
if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
zerror(zlogp, B_FALSE,
@@ -4844,7 +4695,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
struct brand_attr attr;
char modname[MAXPATHLEN];
- if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK)
+ if (setup_zone_attrs(zlogp, zoneid) != Z_OK)
goto error;
if ((bh = brand_open(brand_name)) == NULL) {
@@ -4902,6 +4753,8 @@ error:
}
if (rctlbuf != NULL)
free(rctlbuf);
+ if (zfsbuf != NULL)
+ free(zfsbuf);
priv_freeset(privs);
if (fp != NULL)
zonecfg_close_scratch(fp);
@@ -5044,6 +4897,8 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
return (-1);
}
break;
+ default:
+ abort();
}
}
@@ -5119,7 +4974,8 @@ unmounted:
}
int
-vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting,
+ boolean_t debug)
{
char *kzone;
zoneid_t zoneid;
@@ -5158,16 +5014,12 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
goto error;
}
- if (remove_datalink_pool(zlogp, zoneid) != 0) {
+ if (remove_datalink_pool(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE, "unable clear datalink pool property");
- goto error;
- }
- if (remove_datalink_protect(zlogp, zoneid) != 0) {
+ if (remove_datalink_protect(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE,
"unable clear datalink protect property");
- goto error;
- }
/*
* The datalinks assigned to the zone will be removed from the NGZ as
@@ -5207,7 +5059,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
brand_close(bh);
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto error;
}
@@ -5239,12 +5091,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
}
break;
case ZS_EXCLUSIVE:
- if (unconfigure_exclusive_network_interfaces(zlogp,
- zoneid) != 0) {
- zerror(zlogp, B_FALSE, "unable to unconfigure "
- "network interfaces in zone");
- goto error;
- }
status = dladm_zone_halt(dld_handle, zoneid);
if (status != DLADM_STATUS_OK) {
zerror(zlogp, B_FALSE, "unable to notify "
@@ -5281,14 +5127,9 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
if (rebooting) {
struct zone_psettab pset_tab;
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) != NULL &&
- zonecfg_get_handle(zone_name, handle) == Z_OK &&
- zonecfg_lookup_pset(handle, &pset_tab) == Z_OK)
+ if (zonecfg_lookup_pset(snap_hndl, &pset_tab) == Z_OK)
destroy_tmp_pool = B_FALSE;
-
- zonecfg_fini_handle(handle);
}
if (destroy_tmp_pool) {
diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c
index 963bfd3100..d5a33133b7 100644
--- a/usr/src/cmd/zoneadmd/zcons.c
+++ b/usr/src/cmd/zoneadmd/zcons.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -117,9 +117,10 @@
#define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock"
+#define ZCONS_RETRY 10
+
static int serverfd = -1; /* console server unix domain socket fd */
char boot_args[BOOTARGS_MAX];
-char bad_boot_arg[BOOTARGS_MAX];
/*
* The eventstream is a simple one-directional flow of messages from the
@@ -129,7 +130,10 @@ char bad_boot_arg[BOOTARGS_MAX];
*/
static int eventstream[2];
-
+/* flag used to cope with race creating master zcons devlink */
+static boolean_t master_zcons_failed = B_FALSE;
+/* flag to track if we've seen a state change when there is no master zcons */
+static boolean_t state_changed = B_FALSE;
int
eventstream_init()
@@ -321,7 +325,7 @@ destroy_console_devs(zlog_t *zlogp)
* interfaces to instantiate a new zone console node. We do a lot of
* sanity checking, and are careful to reuse a console if one exists.
*
- * Once the device is in the device tree, we kick devfsadm via di_init_devs()
+ * Once the device is in the device tree, we kick devfsadm via di_devlink_init()
* to ensure that the appropriate symlinks (to the master and slave console
* devices) are placed in /dev in the global zone.
*/
@@ -407,43 +411,63 @@ devlinks:
* Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
* which will cause the master to retain a reference to the slave.
* This prevents ttymon from blowing through the slave's STREAMS anchor.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
- if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ masterfd = open(conspath, O_RDWR | O_NOCTTY);
+ if (masterfd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (masterfd == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
- goto error;
+ master_zcons_failed = B_TRUE;
}
+
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SLAVE_NAME);
- if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ slavefd = open(conspath, O_RDWR | O_NOCTTY);
+ if (slavefd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (slavefd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
- (void) close(masterfd);
- goto error;
- }
+
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
- for (i = 0; i < 5; i++) {
- if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
- == 0) {
- rv = 0;
- break;
- } else if (errno != ENXIO) {
- break;
+ if (masterfd != -1 && slavefd != -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ if (ioctl(masterfd, ZC_HOLDSLAVE,
+ (caddr_t)(intptr_t)slavefd) == 0) {
+ rv = 0;
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
}
- (void) sleep(1);
+ if (rv != 0)
+ zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
+ "slave handle of zone console for %s", zone_name);
}
- if (rv != 0)
- zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
- "handle of zone console for %s", zone_name);
- (void) close(slavefd);
- (void) close(masterfd);
+ if (slavefd != -1)
+ (void) close(slavefd);
+ if (masterfd != -1)
+ (void) close(masterfd);
error:
if (ddef_hdl)
@@ -653,14 +677,6 @@ event_message(int clifd, char *clilocale, zone_evt_t evt)
case Z_EVT_ZONE_BOOTFAILED:
str = "NOTICE: Zone boot failed";
break;
- case Z_EVT_ZONE_BADARGS:
- /*LINTED*/
- (void) snprintf(lmsg, sizeof (lmsg),
- localize_msg(clilocale,
- "WARNING: Ignoring invalid boot arguments: %s"),
- bad_boot_arg);
- lstr = lmsg;
- break;
default:
return;
}
@@ -854,7 +870,6 @@ init_console(zlog_t *zlogp)
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
@@ -866,6 +881,17 @@ init_console(zlog_t *zlogp)
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+void
+zcons_statechanged()
+{
+ state_changed = B_TRUE;
+}
+
+/*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
@@ -883,6 +909,7 @@ serve_console(zlog_t *zlogp)
int masterfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
@@ -890,6 +917,46 @@ serve_console(zlog_t *zlogp)
for (;;) {
masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (masterfd == -1) {
+ if (master_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console master");
(void) mutex_lock(&lock);
goto death;
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index cb81b77727..bb53b01d16 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -68,6 +69,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/time.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
@@ -108,6 +110,7 @@
static char *progname;
char *zone_name; /* zone which we are managing */
+zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
char pool_name[MAXNAMELEN];
char default_brand[MAXNAMELEN];
char brand_name[MAXNAMELEN];
@@ -116,10 +119,11 @@ boolean_t zone_iscluster;
boolean_t zone_islabeled;
boolean_t shutdown_in_progress;
static zoneid_t zone_id;
+static zoneid_t zone_did = 0;
dladm_handle_t dld_handle = NULL;
-static char pre_statechg_hook[2 * MAXPATHLEN];
-static char post_statechg_hook[2 * MAXPATHLEN];
+char pre_statechg_hook[2 * MAXPATHLEN];
+char post_statechg_hook[2 * MAXPATHLEN];
char query_hook[2 * MAXPATHLEN];
zlog_t logsys;
@@ -141,6 +145,9 @@ boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
#define DEFAULT_LOCALE "C"
+#define RSRC_NET "net"
+#define RSRC_DEV "device"
+
static const char *
z_cmd_name(zone_cmd_t zcmd)
{
@@ -257,34 +264,31 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
}
/*
- * Emit a warning for any boot arguments which are unrecognized. Since
- * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
+ * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
* put the arguments into an argv style array, use getopt to process them,
- * and put the resultant argument string back into outargs.
+ * and put the resultant argument string back into outargs. Non-Solaris brands
+ * may support alternate forms of boot arguments so we must handle that as well.
*
* During the filtering, we pull out any arguments which are truly "boot"
* arguments, leaving only those which are to be passed intact to the
* progenitor process. The one we support at the moment is -i, which
* indicates to the kernel which program should be launched as 'init'.
*
- * A return of Z_INVAL indicates specifically that the arguments are
- * not valid; this is a non-fatal error. Except for Z_OK, all other return
- * values are treated as fatal.
+ * Except for Z_OK, all other return values are treated as fatal.
*/
static int
filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
- char *init_file, char *badarg)
+ char *init_file)
{
int argc = 0, argc_save;
int i;
- int err;
+ int err = Z_OK;
char *arg, *lasts, **argv = NULL, **argv_save;
char zonecfg_args[BOOTARGS_MAX];
char scratchargs[BOOTARGS_MAX], *sargs;
char c;
bzero(outargs, BOOTARGS_MAX);
- bzero(badarg, BOOTARGS_MAX);
/*
* If the user didn't specify transient boot arguments, check
@@ -292,25 +296,10 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
* and use them if applicable.
*/
if (inargs == NULL || inargs[0] == '\0') {
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE,
- "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- err = zonecfg_get_snapshot_handle(zone_name, handle);
- if (err != Z_OK) {
- zerror(zlogp, B_FALSE,
- "invalid configuration snapshot");
- zonecfg_fini_handle(handle);
- return (Z_BAD_HANDLE);
- }
-
bzero(zonecfg_args, sizeof (zonecfg_args));
- (void) zonecfg_get_bootargs(handle, zonecfg_args,
+ (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
sizeof (zonecfg_args));
inargs = zonecfg_args;
- zonecfg_fini_handle(handle);
}
if (strlen(inargs) >= BOOTARGS_MAX) {
@@ -390,36 +379,29 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
break;
case '?':
/*
- * We warn about unknown arguments but pass them
- * along anyway-- if someone wants to develop their
- * own init replacement, they can pass it whatever
- * args they want.
+ * If a brand has its own init, we need to pass along
+ * whatever the user provides. We use the entire
+ * unknown string here so that we correctly handle
+ * unknown long options (e.g. --debug).
*/
- err = Z_INVAL;
(void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c", outargs, optopt);
- (void) snprintf(badarg, BOOTARGS_MAX,
- "%s -%c", badarg, optopt);
+ "%s %s", outargs, argv[optind - 1]);
break;
}
}
/*
- * For Solaris Zones we warn about and discard non-option arguments.
- * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
- * to the kernel, we concat up all the other remaining boot args.
- * and warn on them as a group.
+ * We need to pass along everything else since we don't know what
+ * the brand's init is expecting. For example, an argument list like:
+ * --confdir /foo --debug
+ * will cause the getopt parsing to stop at '/foo' but we need to pass
+ * that on, along with the '--debug'. This does mean that we require
+ * any of our known options (-ifms) to preceed the brand-specific ones.
*/
- if (optind < argc) {
- err = Z_INVAL;
- while (optind < argc) {
- (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
- badarg, strlen(badarg) > 0 ? " " : "",
- argv[optind]);
- optind++;
- }
- zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
- "arguments `%s'.", badarg);
+ while (optind < argc) {
+ (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
+ argv[optind]);
+ optind++;
}
done:
@@ -458,7 +440,7 @@ mkzonedir(zlog_t *zlogp)
* Run the brand's pre-state change callback, if it exists.
*/
static int
-brand_prestatechg(zlog_t *zlogp, int state, int cmd)
+brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -471,7 +453,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -481,7 +463,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
* Run the brand's post-state change callback, if it exists.
*/
static int
-brand_poststatechg(zlog_t *zlogp, int state, int cmd)
+brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -494,7 +476,7 @@ brand_poststatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -533,35 +515,44 @@ notify_zonestatd(zoneid_t zoneid)
* subcommand.
*/
static int
-zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
+zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
{
int err;
+ boolean_t snapped = B_FALSE;
- if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
- return (-1);
-
+ if ((snap_hndl = zonecfg_init_handle()) == NULL) {
+ zerror(zlogp, B_TRUE, "getting zone configuration handle");
+ goto bad;
+ }
if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
zonecfg_strerror(err));
goto bad;
}
+ snapped = B_TRUE;
- if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "invalid configuration snapshot");
goto bad;
}
+
+ if (zone_did == 0)
+ zone_did = zone_get_did(zone_name);
+
+ if (brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
+ goto bad;
+
+ if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
+ goto bad;
+
if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
bringup_failure_recovery = B_TRUE;
- (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
+ debug);
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
+ if (brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
goto bad;
return (0);
@@ -571,7 +562,13 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, READY, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
+ (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT, debug);
+ if (snapped)
+ if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
+ zerror(zlogp, B_FALSE, "destroying snapshot: %s",
+ zonecfg_strerror(err));
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (-1);
}
@@ -686,6 +683,8 @@ mount_early_fs(void *data, const char *spec, const char *dir,
char opt_buf[MAX_MNTOPT_STR];
int optlen = 0;
int mflag = MS_DATA;
+ int i;
+ int ret;
(void) ct_tmpl_clear(tmpl_fd);
/*
@@ -713,9 +712,26 @@ mount_early_fs(void *data, const char *spec, const char *dir,
optlen = MAX_MNTOPT_STR;
mflag = MS_OPTIONSTR;
}
- if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
- _exit(errno);
- _exit(0);
+
+ /*
+ * There is an obscure race condition which can cause mount
+ * to return EBUSY. This happens for example on the mount
+ * of the zone's /etc/svc/volatile file system if there is
+ * a GZ process running svcs -Z, which will touch the
+ * mountpoint, just as we're trying to do the mount. To cope
+ * with this, we retry up to 3 times to let this transient
+ * process get out of the way.
+ */
+ for (i = 0; i < 3; i++) {
+ ret = 0;
+ if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
+ optlen) != 0)
+ ret = errno;
+ if (ret != EBUSY)
+ break;
+ (void) sleep(1);
+ }
+ _exit(ret);
}
/* parent */
@@ -739,12 +755,150 @@ mount_early_fs(void *data, const char *spec, const char *dir,
}
/*
+ * env variable name format
+ * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
+ * Any dashes (-) in the property names are replaced with underscore (_).
+ */
+static void
+set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
+{
+ char *p;
+ char nm[MAXNAMELEN];
+
+ if (attr == NULL)
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
+ name);
+ else
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
+ attr, name);
+
+ p = nm;
+ while ((p = strchr(p, '-')) != NULL)
+ *p++ = '_';
+
+ (void) setenv(nm, val, 1);
+}
+
+/*
+ * Export zonecfg network and device properties into environment for the boot
+ * and state change hooks.
+ * If debug is true, export the brand hook debug env. variable as well.
+ *
+ * We could export more of the config in the future, as necessary.
+ */
+static int
+setup_subproc_env(boolean_t debug)
+{
+ int res;
+ struct zone_nwiftab ntab;
+ struct zone_devtab dtab;
+ struct zone_attrtab atab;
+ char net_resources[MAXNAMELEN * 2];
+ char dev_resources[MAXNAMELEN * 2];
+
+ /* snap_hndl is null when called through the set_brand_env code path */
+ if (snap_hndl == NULL)
+ return (Z_OK);
+
+ net_resources[0] = '\0';
+ if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *phys;
+
+ phys = ntab.zone_nwif_physical;
+
+ (void) strlcat(net_resources, phys, sizeof (net_resources));
+ (void) strlcat(net_resources, " ", sizeof (net_resources));
+
+ set_zonecfg_env(RSRC_NET, phys, "physical", phys);
+
+ set_zonecfg_env(RSRC_NET, phys, "address",
+ ntab.zone_nwif_address);
+ set_zonecfg_env(RSRC_NET, phys, "allowed-address",
+ ntab.zone_nwif_allowed_address);
+ set_zonecfg_env(RSRC_NET, phys, "defrouter",
+ ntab.zone_nwif_defrouter);
+ set_zonecfg_env(RSRC_NET, phys, "global-nic",
+ ntab.zone_nwif_gnic);
+ set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
+ set_zonecfg_env(RSRC_NET, phys, "vlan-id",
+ ntab.zone_nwif_vlan_id);
+
+ for (rap = ntab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
+ rap->zone_res_attr_value);
+ nwifent_free_attrs(&ntab);
+ }
+
+ (void) setenv("_ZONECFG_net_resources", net_resources, 1);
+
+ (void) zonecfg_endnwifent(snap_hndl);
+
+ if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *match;
+
+ match = dtab.zone_dev_match;
+
+ (void) strlcat(dev_resources, match, sizeof (dev_resources));
+ (void) strlcat(dev_resources, " ", sizeof (dev_resources));
+
+ for (rap = dtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_DEV, match,
+ rap->zone_res_attr_name, rap->zone_res_attr_value);
+ }
+
+ (void) zonecfg_enddevent(snap_hndl);
+
+ if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
+ set_zonecfg_env("attr", NULL, atab.zone_attr_name,
+ atab.zone_attr_value);
+ }
+
+ (void) zonecfg_endattrent(snap_hndl);
+
+ if (debug)
+ (void) setenv("_ZONEADMD_brand_debug", "1", 1);
+ else
+ (void) setenv("_ZONEADMD_brand_debug", "", 1);
+
+ res = Z_OK;
+
+done:
+ return (res);
+}
+
+void
+nwifent_free_attrs(struct zone_nwiftab *np)
+{
+ struct zone_res_attrtab *rap;
+
+ for (rap = np->zone_nwif_attrp; rap != NULL; ) {
+ struct zone_res_attrtab *tp = rap;
+
+ rap = rap->zone_res_attr_next;
+ free(tp);
+ }
+}
+
+/*
* If retstr is not NULL, the output of the subproc is returned in the str,
* otherwise it is output using zerror(). Any memory allocated for retstr
* should be freed by the caller.
*/
int
-do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
+do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
{
char buf[1024]; /* arbitrary large amount */
char *inbuf;
@@ -763,6 +917,11 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
inbuf = buf;
}
+ if (setup_subproc_env(debug) != Z_OK) {
+ zerror(zlogp, B_FALSE, "failed to setup environment");
+ return (-1);
+ }
+
file = popen(cmdbuf, "r");
if (file == NULL) {
zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
@@ -771,8 +930,13 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
while (fgets(inbuf, 1024, file) != NULL) {
if (retstr == NULL) {
- if (zlogp != &logsys)
+ if (zlogp != &logsys) {
+ int last = strlen(inbuf) - 1;
+
+ if (inbuf[last] == '\n')
+ inbuf[last] = '\0';
zerror(zlogp, B_FALSE, "%s", inbuf);
+ }
} else {
char *p;
@@ -802,8 +966,51 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
return (WEXITSTATUS(status));
}
+/*
+ * Get the path for this zone's init(1M) (or equivalent) process. First look
+ * for a zone-specific init-name attr, then get it from the brand.
+ */
static int
-zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
+get_initname(brand_handle_t bh, char *initname, int len)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "init-name",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ (void) strlcpy(initname, a.zone_attr_value, len);
+ return (0);
+ }
+
+ return (brand_get_initname(bh, initname, len));
+}
+
+/*
+ * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
+ * First look for a zone-specific restart-init attr, then get it from the brand.
+ */
+static boolean_t
+restartinit(brand_handle_t bh)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "restart-init",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ if (strcmp(a.zone_attr_value, "false") == 0)
+ return (B_FALSE);
+ return (B_TRUE);
+ }
+
+ return (brand_restartinit(bh));
+}
+
+static int
+zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
{
zoneid_t zoneid;
struct stat st;
@@ -813,13 +1020,12 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
fs_callback_t cb;
brand_handle_t bh;
zone_iptype_t iptype;
- boolean_t links_loaded = B_FALSE;
dladm_status_t status;
char errmsg[DLADM_STRSIZE];
int err;
boolean_t restart_init;
- if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
return (-1);
if ((zoneid = getzoneidbyname(zone_name)) == -1) {
@@ -867,23 +1073,20 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
}
/* Get the path for this zone's init(1M) (or equivalent) process. */
- if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
+ if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine zone's init(1M) location");
brand_close(bh);
goto bad;
}
- /* See if this zone's brand should restart init if it dies. */
- restart_init = brand_restartinit(bh);
+ /* See if we should restart init if it dies. */
+ restart_init = restartinit(bh);
brand_close(bh);
- err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
- bad_boot_arg);
- if (err == Z_INVAL)
- eventstream_write(Z_EVT_ZONE_BADARGS);
- else if (err != Z_OK)
+ err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
+ if (err != Z_OK)
goto bad;
assert(init_file[0] != '\0');
@@ -919,7 +1122,6 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
" %s", dladm_status2str(status, errmsg));
goto bad;
}
- links_loaded = B_TRUE;
}
/*
@@ -928,7 +1130,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
* is booted.
*/
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto bad;
}
@@ -960,9 +1162,12 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0)
goto bad;
+ /* Startup a thread to perform memory capping for the zone. */
+ create_mcap_thread(zlogp, zone_id);
+
return (0);
bad:
@@ -970,32 +1175,38 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, RUNNING, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
- if (links_loaded)
- (void) dladm_zone_halt(dld_handle, zoneid);
+ (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
+
return (-1);
}
static int
-zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
+zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
+ boolean_t debug)
{
int err;
- if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
+ if (brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
return (-1);
- if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
+ /* Shutting down, stop the memcap thread */
+ destroy_mcap_thread();
+
+ if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
if (!bringup_failure_recovery)
zerror(zlogp, B_FALSE, "unable to destroy zone");
return (-1);
}
+ if (brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
+ return (-1);
+
if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
zerror(zlogp, B_FALSE, "destroying snapshot: %s",
zonecfg_strerror(err));
- if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
- return (-1);
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (0);
}
@@ -1177,6 +1388,39 @@ audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
}
/*
+ * Log the exit time and status of the zone's init process into
+ * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
+ * be -1, otherwise it will be the exit status as described in wait.3c.
+ * If the zone is configured to restart init, then nothing will be logged if
+ * init exits unexpectedly (the kernel will never upcall in this case).
+ */
+static void
+log_init_exit(int status)
+{
+ char zpath[MAXPATHLEN];
+ char p[MAXPATHLEN];
+ char buf[128];
+ struct timeval t;
+ int fd;
+
+ if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
+ return;
+ if (snprintf(p, sizeof (p), "%s/lastexited", zpath) > sizeof (p))
+ return;
+ if (gettimeofday(&t, NULL) != 0)
+ return;
+ if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
+ status) > sizeof (buf))
+ return;
+ if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
+ return;
+
+ (void) write(fd, buf, strlen(buf));
+
+ (void) close(fd);
+}
+
+/*
* The main routine for the door server that deals with zone state transitions.
*/
/* ARGSUSED */
@@ -1189,9 +1433,11 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
zone_state_t zstate;
zone_cmd_t cmd;
+ boolean_t debug;
+ int init_status;
zone_cmd_arg_t *zargp;
- boolean_t kernelcall;
+ boolean_t kernelcall = B_TRUE;
int rval = -1;
uint64_t uniqid;
@@ -1241,6 +1487,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
goto out;
}
cmd = zargp->cmd;
+ debug = zargp->debug;
+ init_status = zargp->status;
if (door_ucred(&uc) != 0) {
zerror(&logsys, B_TRUE, "door_ucred");
@@ -1347,23 +1595,25 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_INSTALLED:
switch (cmd) {
case Z_READY:
- rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
+ rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
if (rval == 0)
eventstream_write(Z_EVT_ZONE_READIED);
+ zcons_statechanged();
break;
case Z_BOOT:
case Z_FORCEBOOT:
eventstream_write(Z_EVT_ZONE_BOOTING);
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- == 0) {
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0) {
rval = zone_bootup(zlogp, zargp->bootbuf,
- zstate);
+ zstate, debug);
}
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_FALSE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
break;
@@ -1415,7 +1665,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = zone_ready(zlogp,
strcmp(zargp->bootbuf, "-U") == 0 ?
- Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
+ Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
if (rval != 0)
break;
@@ -1477,15 +1727,18 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = 0;
break;
case Z_BOOT:
+ case Z_FORCEBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_BOOTING);
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1493,15 +1746,17 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_HALT:
if (kernelcall) /* Invalid; can't happen */
abort();
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
+ zcons_statechanged();
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_SHUTDOWN:
case Z_REBOOT:
case Z_NOTE_UNINSTALLING:
case Z_MOUNT:
+ case Z_FORCEMOUNT:
case Z_UNMOUNT:
if (kernelcall) /* Invalid; can't happen */
abort();
@@ -1518,7 +1773,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_UNMOUNT:
if (kernelcall) /* Invalid; can't happen */
abort();
- rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
+ rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
if (rval == 0) {
eventstream_write(Z_EVT_ZONE_HALTED);
(void) sema_post(&scratch_sem);
@@ -1540,15 +1795,18 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_DOWN:
switch (cmd) {
case Z_READY:
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0)
break;
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0)
eventstream_write(Z_EVT_ZONE_READIED);
else
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_BOOT:
+ case Z_FORCEBOOT:
/*
* We could have two clients racing to boot this
* zone; the second client loses, but his request
@@ -1559,32 +1817,40 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = 0;
break;
case Z_HALT:
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if (kernelcall) {
+ log_init_exit(init_status);
+ } else {
+ log_init_exit(-1);
+ }
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
eventstream_write(Z_EVT_ZONE_HALTED);
+ zcons_statechanged();
break;
case Z_REBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_REBOOTING);
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0) {
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- != 0) {
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "reboot");
if (rval != 0) {
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1596,6 +1862,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
break;
case Z_NOTE_UNINSTALLING:
case Z_MOUNT:
+ case Z_FORCEMOUNT:
case Z_UNMOUNT:
zerror(zlogp, B_FALSE, "%s operation is invalid "
"for zones in state '%s'", z_cmd_name(cmd),
@@ -1759,11 +2026,35 @@ top:
* state.
*/
if (zstate > ZONE_STATE_INSTALLED) {
+ static zoneid_t zid;
+
zerror(zlogp, B_FALSE,
"zone '%s': WARNING: zone is in state '%s', but "
"zoneadmd does not appear to be available; "
"restarted zoneadmd to recover.",
zone_name, zone_state_str(zstate));
+
+ /*
+ * Startup a thread to perform memory capping for the
+ * zone. zlogp won't be valid for much longer so use
+ * logsys.
+ */
+ if ((zid = getzoneidbyname(zone_name)) != -1)
+ create_mcap_thread(&logsys, zid);
+
+ /* recover the global configuration snapshot */
+ if (snap_hndl == NULL) {
+ if ((snap_hndl = zonecfg_init_handle())
+ == NULL ||
+ zonecfg_create_snapshot(zone_name)
+ != Z_OK ||
+ zonecfg_get_snapshot_handle(zone_name,
+ snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "recovering "
+ "zone configuration handle");
+ goto out;
+ }
+ }
}
(void) fdetach(zone_door_path);
@@ -1777,6 +2068,52 @@ out:
}
/*
+ * Run the query hook with the 'env' parameter. It should return a
+ * string of tab-delimited key-value pairs, each of which should be set
+ * in the environment.
+ *
+ * Because the env_vars string values become part of the environment, the
+ * string is static and we don't free it.
+ *
+ * This function is always called before zoneadmd forks and makes itself
+ * exclusive, so it is possible there could more than one instance of zoneadmd
+ * running in parallel at this point. Thus, we have no zonecfg snapshot and
+ * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
+ * need any zonecfg info to query for a brand-specific env value.
+ */
+static int
+set_brand_env(zlog_t *zlogp)
+{
+ int ret = 0;
+ static char *env_vars = NULL;
+ char buf[2 * MAXPATHLEN];
+
+ if (query_hook[0] == '\0' || env_vars != NULL)
+ return (0);
+
+ if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
+ return (-1);
+
+ if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
+ return (-1);
+
+ if (env_vars != NULL) {
+ char *sp;
+
+ sp = strtok(env_vars, "\t");
+ while (sp != NULL) {
+ if (putenv(sp) != 0) {
+ ret = -1;
+ break;
+ }
+ sp = strtok(NULL, "\t");
+ }
+ }
+
+ return (ret);
+}
+
+/*
* Setup the brand's pre and post state change callbacks, as well as the
* query callback, if any of these exist.
*/
@@ -2012,6 +2349,11 @@ main(int argc, char *argv[])
}
priv_freeset(privset);
+ if (set_brand_env(zlogp) != 0) {
+ zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
+ return (1);
+ }
+
if (mkzonedir(zlogp) != 0)
return (1);
@@ -2052,6 +2394,13 @@ main(int argc, char *argv[])
(void) sigaddset(&block_cld, SIGCHLD);
(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
+ /*
+ * The parent only needs stderr after the fork, so close other fd's
+ * that we inherited from zoneadm so that the parent doesn't have those
+ * open while waiting. The child will close the rest after the fork.
+ */
+ closefrom(3);
+
if ((ctfd = init_template()) == -1) {
zerror(zlogp, B_TRUE, "failed to create contract");
return (1);
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h
index d784a303b3..ceab787dab 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.h
+++ b/usr/src/cmd/zoneadmd/zoneadmd.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _ZONEADMD_H
@@ -90,17 +91,18 @@ extern mutex_t msglock;
extern boolean_t in_death_throes;
extern boolean_t bringup_failure_recovery;
extern char *zone_name;
+extern zone_dochandle_t snap_hndl;
extern char pool_name[MAXNAMELEN];
extern char brand_name[MAXNAMELEN];
extern char default_brand[MAXNAMELEN];
extern char boot_args[BOOTARGS_MAX];
-extern char bad_boot_arg[BOOTARGS_MAX];
extern boolean_t zone_isnative;
extern boolean_t zone_iscluster;
extern dladm_handle_t dld_handle;
extern void zerror(zlog_t *, boolean_t, const char *, ...);
extern char *localize_msg(char *locale, const char *msg);
+extern void nwifent_free_attrs(struct zone_nwiftab *);
/*
* Eventstream interfaces.
@@ -112,8 +114,7 @@ typedef enum {
Z_EVT_ZONE_HALTED,
Z_EVT_ZONE_READIED,
Z_EVT_ZONE_UNINSTALLING,
- Z_EVT_ZONE_BOOTFAILED,
- Z_EVT_ZONE_BADARGS
+ Z_EVT_ZONE_BOOTFAILED
} zone_evt_t;
extern int eventstream_init();
@@ -135,9 +136,9 @@ typedef enum {
/*
* Virtual platform interfaces.
*/
-extern zoneid_t vplat_create(zlog_t *, zone_mnt_t);
+extern zoneid_t vplat_create(zlog_t *, zone_mnt_t, zoneid_t);
extern int vplat_bringup(zlog_t *, zone_mnt_t, zoneid_t);
-extern int vplat_teardown(zlog_t *, boolean_t, boolean_t);
+extern int vplat_teardown(zlog_t *, boolean_t, boolean_t, boolean_t);
extern int vplat_get_iptype(zlog_t *, zone_iptype_t *);
/*
@@ -154,6 +155,13 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen);
*/
extern int init_console(zlog_t *);
extern void serve_console(zlog_t *);
+extern void zcons_statechanged();
+
+/*
+ * Memory capping thread creation.
+ */
+extern void create_mcap_thread(zlog_t *, zoneid_t);
+extern void destroy_mcap_thread();
/*
* Contract handling.
@@ -163,7 +171,7 @@ extern int init_template(void);
/*
* Routine to manage child processes.
*/
-extern int do_subproc(zlog_t *, char *, char **);
+extern int do_subproc(zlog_t *, char *, char **, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/zonecfg/Makefile b/usr/src/cmd/zonecfg/Makefile
index ae8f5c11d1..94d725776b 100644
--- a/usr/src/cmd/zonecfg/Makefile
+++ b/usr/src/cmd/zonecfg/Makefile
@@ -27,6 +27,7 @@ PROG= zonecfg
OBJS= zonecfg.o zonecfg_lex.o zonecfg_grammar.tab.o
include ../Makefile.cmd
+include ../Makefile.ctf
# zonecfg has a name clash with main() and libl.so.1. However, zonecfg must
# still export a number of "yy*" (libl) interfaces. Reduce all other symbols
@@ -36,7 +37,7 @@ MAPOPTS = $(MAPFILES:%=-M%)
LFLAGS = -t
YFLAGS = -d -b zonecfg_grammar
-LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs -lbrand -ldladm -linetutil
+LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs -lbrand -ldladm -linetutil -luuid
CPPFLAGS += -I.
LDFLAGS += $(MAPOPTS)
CLEANFILES += zonecfg_lex.c zonecfg_grammar.tab.c zonecfg_grammar.tab.h
diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c
index 3dbec383bf..235019ef46 100644
--- a/usr/src/cmd/zonecfg/zonecfg.c
+++ b/usr/src/cmd/zonecfg/zonecfg.c
@@ -23,6 +23,7 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Gary Mills
+ * Copyright 2013, Joyent Inc. All rights reserved.
*/
/*
@@ -79,6 +80,7 @@
#include <libinetutil.h>
#include <pwd.h>
#include <inet/ip.h>
+#include <uuid/uuid.h>
#include <libzonecfg.h>
#include "zonecfg.h"
@@ -126,7 +128,7 @@ extern int lex_lineno;
#define SHELP_REMOVE "remove [-F] <resource-type> " \
"[ <property-name>=<property-value> ]*\n" \
"\t(global scope)\n" \
- "remove <property-name> <property-value>\n" \
+ "remove [-F] <property-name> <property-value>\n" \
"\t(resource scope)"
#define SHELP_REVERT "revert [-F]"
#define SHELP_SELECT "select <resource-type> { <property-name>=" \
@@ -187,6 +189,8 @@ char *res_types[] = {
"admin",
"fs-allowed",
ALIAS_MAXPROCS,
+ ALIAS_ZFSPRI,
+ "uuid",
NULL
};
@@ -234,6 +238,12 @@ char *prop_types[] = {
"fs-allowed",
ALIAS_MAXPROCS,
"allowed-address",
+ ALIAS_ZFSPRI,
+ "mac-addr",
+ "vlan-id",
+ "global-nic",
+ "property",
+ "uuid",
NULL
};
@@ -299,6 +309,7 @@ static const char *clear_cmds[] = {
"clear " ALIAS_MAXSEMIDS,
"clear " ALIAS_SHARES,
"clear " ALIAS_MAXPROCS,
+ "clear " ALIAS_ZFSPRI,
NULL
};
@@ -349,6 +360,8 @@ static const char *set_cmds[] = {
"set hostid=",
"set fs-allowed=",
"set " ALIAS_MAXPROCS "=",
+ "set " ALIAS_ZFSPRI "=",
+ "set uuid=",
NULL
};
@@ -381,6 +394,7 @@ static const char *info_cmds[] = {
"info admin",
"info fs-allowed",
"info max-processes",
+ "info uuid",
NULL
};
@@ -406,9 +420,20 @@ static const char *net_res_scope_cmds[] = {
"exit",
"help",
"info",
+ "add property ",
+ "clear allowed-address",
+ "clear defrouter",
+ "clear global-nic",
+ "clear mac-addr",
+ "clear vlan-id",
+ "remove property ",
"set address=",
- "set physical=",
+ "set allowed-address=",
"set defrouter=",
+ "set global-nic=",
+ "set mac-addr=",
+ "set physical=",
+ "set vlan-id=",
NULL
};
@@ -418,6 +443,7 @@ static const char *device_res_scope_cmds[] = {
"exit",
"help",
"info",
+ "add property ",
"set match=",
NULL
};
@@ -525,6 +551,7 @@ static zone_dochandle_t handle;
/* used all over the place */
static char zone[ZONENAME_MAX];
static char revert_zone[ZONENAME_MAX];
+static char new_uuid[UUID_PRINTABLE_STRING_LENGTH];
/* global brand operations */
static brand_handle_t brand;
@@ -579,7 +606,6 @@ static struct zone_rctltab old_rctltab, in_progress_rctltab;
static struct zone_attrtab old_attrtab, in_progress_attrtab;
static struct zone_dstab old_dstab, in_progress_dstab;
static struct zone_psettab old_psettab, in_progress_psettab;
-static struct zone_mcaptab old_mcaptab, in_progress_mcaptab;
static struct zone_admintab old_admintab, in_progress_admintab;
static GetLine *gl; /* The gl_get_line() resource object */
@@ -1078,11 +1104,20 @@ usage(boolean_t verbose, uint_t flags)
(void) fprintf(fp, gettext("Valid commands:\n"));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_ADDRESS), gettext("<IP-address>"));
+ (void) fprintf(fp, "\t%s %s (%s=<value>,%s=<value>)\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_ALLOWED_ADDRESS),
gettext("<IP-address>"));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_PHYSICAL), gettext("<interface>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_MAC), gettext("<mac-address>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_GNIC), gettext("<global zone NIC>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_VLANID), gettext("<vlan ID>"));
(void) fprintf(fp, gettext("See ifconfig(1M) for "
"details of the <interface> string.\n"));
(void) fprintf(fp, gettext("%s %s is valid "
@@ -1090,10 +1125,12 @@ usage(boolean_t verbose, uint_t flags)
"must not be set.\n"),
cmd_to_str(CMD_SET), pt_to_str(PT_ADDRESS),
pt_to_str(PT_IPTYPE), gettext("shared"));
- (void) fprintf(fp, gettext("%s %s is valid "
- "if the %s property is set to %s, otherwise it "
- "must not be set.\n"),
- cmd_to_str(CMD_SET), pt_to_str(PT_ALLOWED_ADDRESS),
+ (void) fprintf(fp, gettext("%s (%s, %s, %s, %s) are "
+ "valid if the %s property is set to %s, otherwise "
+ "they must not be set.\n"),
+ cmd_to_str(CMD_SET),
+ pt_to_str(PT_ALLOWED_ADDRESS), pt_to_str(PT_MAC),
+ pt_to_str(PT_VLANID), pt_to_str(PT_GNIC),
pt_to_str(PT_IPTYPE), gettext("exclusive"));
(void) fprintf(fp, gettext("\t%s %s=%s\n%s %s "
"is valid if the %s or %s property is set, "
@@ -1109,6 +1146,9 @@ usage(boolean_t verbose, uint_t flags)
"used to configure a device node.\n"),
rt_to_str(resource_scope));
(void) fprintf(fp, gettext("Valid commands:\n"));
+ (void) fprintf(fp, "\t%s %s (%s=<value>,%s=<value>)\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_MATCH), gettext("<device-path>"));
break;
@@ -1240,10 +1280,12 @@ usage(boolean_t verbose, uint_t flags)
if (flags & HELP_USAGE) {
(void) fprintf(fp, "%s:\t%s %s\n", gettext("usage"),
execname, cmd_to_str(CMD_HELP));
- (void) fprintf(fp, "\t%s -z <zone>\t\t\t(%s)\n",
+ (void) fprintf(fp, "\t%s {-z <zone>|-u <uuid>}\t\t\t(%s)\n",
execname, gettext("interactive"));
- (void) fprintf(fp, "\t%s -z <zone> <command>\n", execname);
- (void) fprintf(fp, "\t%s -z <zone> -f <command-file>\n",
+ (void) fprintf(fp, "\t%s {-z <zone>|-u <uuid>} <command>\n",
+ execname);
+ (void) fprintf(fp,
+ "\t%s {-z <zone>|-u <uuid>} -f <command-file>\n",
execname);
}
if (flags & HELP_SUBCMDS) {
@@ -1332,15 +1374,22 @@ usage(boolean_t verbose, uint_t flags)
pt_to_str(PT_MAXSEMIDS));
(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
pt_to_str(PT_SHARES));
+ (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+ pt_to_str(PT_UUID));
+ (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+ pt_to_str(PT_ZFSPRI));
(void) fprintf(fp, "\t%s\t\t%s, %s, %s, %s, %s\n",
rt_to_str(RT_FS), pt_to_str(PT_DIR),
pt_to_str(PT_SPECIAL), pt_to_str(PT_RAW),
pt_to_str(PT_TYPE), pt_to_str(PT_OPTIONS));
- (void) fprintf(fp, "\t%s\t\t%s, %s, %s|%s\n", rt_to_str(RT_NET),
+ (void) fprintf(fp, "\t%s\t\t%s, %s, %s, %s, %s, %s, %s %s\n",
+ rt_to_str(RT_NET),
pt_to_str(PT_ADDRESS), pt_to_str(PT_ALLOWED_ADDRESS),
- pt_to_str(PT_PHYSICAL), pt_to_str(PT_DEFROUTER));
- (void) fprintf(fp, "\t%s\t\t%s\n", rt_to_str(RT_DEVICE),
- pt_to_str(PT_MATCH));
+ pt_to_str(PT_GNIC), pt_to_str(PT_MAC),
+ pt_to_str(PT_PHYSICAL), pt_to_str(PT_NPROP),
+ pt_to_str(PT_VLANID), pt_to_str(PT_DEFROUTER));
+ (void) fprintf(fp, "\t%s\t\t%s, %s\n", rt_to_str(RT_DEVICE),
+ pt_to_str(PT_MATCH), pt_to_str(PT_NPROP));
(void) fprintf(fp, "\t%s\t\t%s, %s\n", rt_to_str(RT_RCTL),
pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s\t\t%s, %s, %s\n", rt_to_str(RT_ATTR),
@@ -1395,6 +1444,9 @@ initialize(boolean_t handle_expected)
if (zonecfg_check_handle(handle) != Z_OK) {
if ((err = zonecfg_get_handle(zone, handle)) == Z_OK) {
got_handle = B_TRUE;
+
+ (void) zonecfg_fix_obsolete(handle);
+
if (zonecfg_get_brand(handle, brandname,
sizeof (brandname)) != Z_OK) {
zerr("Zone %s is inconsistent: missing "
@@ -1662,6 +1714,7 @@ create_func(cmd_t *cmd)
boolean_t force = B_FALSE;
boolean_t attach = B_FALSE;
boolean_t arg_err = B_FALSE;
+ uuid_t uuid;
assert(cmd != NULL);
@@ -1669,7 +1722,7 @@ create_func(cmd_t *cmd)
(void) strlcpy(zone_template, "SUNWdefault", sizeof (zone_template));
optind = 0;
- while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:"))
+ while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:X"))
!= EOF) {
switch (arg) {
case '?':
@@ -1695,6 +1748,17 @@ create_func(cmd_t *cmd)
(void) strlcpy(zone_template, optarg,
sizeof (zone_template));
break;
+ case 'X':
+ (void) snprintf(zone_template, sizeof (zone_template),
+ "%s/%s.xml", ZONE_CONFIG_ROOT, zone);
+ err = zonecfg_get_xml_handle(zone_template, handle);
+ if (err != Z_OK) {
+ zone_perror(execname, err, B_TRUE);
+ exit(Z_ERR);
+ }
+ got_handle = B_TRUE;
+ need_to_commit = B_TRUE;
+ return;
default:
short_usage(CMD_CREATE);
arg_err = B_TRUE;
@@ -1751,6 +1815,10 @@ create_func(cmd_t *cmd)
zonecfg_fini_handle(handle);
handle = tmphandle;
got_handle = B_TRUE;
+
+ /* Allocate a new uuid for this new zone */
+ uuid_generate(uuid);
+ uuid_unparse(uuid, new_uuid);
}
/*
@@ -1797,8 +1865,8 @@ export_func(cmd_t *cmd)
struct zone_rctltab rctltab;
struct zone_dstab dstab;
struct zone_psettab psettab;
- struct zone_mcaptab mcaptab;
struct zone_rctlvaltab *valptr;
+ struct zone_res_attrtab *rap;
struct zone_admintab admintab;
int err, arg;
char zonepath[MAXPATHLEN], outfile[MAXPATHLEN], pool[MAXNAMELEN];
@@ -1811,6 +1879,7 @@ export_func(cmd_t *cmd)
FILE *of;
boolean_t autoboot;
zone_iptype_t iptype;
+ uuid_t uuid;
boolean_t need_to_close = B_FALSE;
boolean_t arg_err = B_FALSE;
@@ -1921,6 +1990,14 @@ export_func(cmd_t *cmd)
pt_to_str(PT_FS_ALLOWED), fsallowedp);
}
+ if (zonecfg_get_uuid(zone, uuid) == Z_OK && !uuid_is_null(uuid)) {
+ char suuid[UUID_PRINTABLE_STRING_LENGTH];
+
+ uuid_unparse(uuid, suuid);
+ (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_UUID), suuid);
+ }
+
if ((err = zonecfg_setfsent(handle)) != Z_OK) {
zone_perror(zone, err, B_FALSE);
goto done;
@@ -1968,7 +2045,17 @@ export_func(cmd_t *cmd)
export_prop(of, PT_ALLOWED_ADDRESS,
nwiftab.zone_nwif_allowed_address);
export_prop(of, PT_PHYSICAL, nwiftab.zone_nwif_physical);
+ export_prop(of, PT_MAC, nwiftab.zone_nwif_mac);
+ export_prop(of, PT_VLANID, nwiftab.zone_nwif_vlan_id);
+ export_prop(of, PT_GNIC, nwiftab.zone_nwif_gnic);
export_prop(of, PT_DEFROUTER, nwiftab.zone_nwif_defrouter);
+ for (rap = nwiftab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ fprintf(of, "%s %s (%s=%s,%s=\"%s\")\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), rap->zone_res_attr_name,
+ pt_to_str(PT_VALUE), rap->zone_res_attr_value);
+ }
(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
}
(void) zonecfg_endnwifent(handle);
@@ -1981,21 +2068,17 @@ export_func(cmd_t *cmd)
(void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
rt_to_str(RT_DEVICE));
export_prop(of, PT_MATCH, devtab.zone_dev_match);
+ for (rap = devtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ fprintf(of, "%s %s (%s=%s,%s=\"%s\")\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), rap->zone_res_attr_name,
+ pt_to_str(PT_VALUE), rap->zone_res_attr_value);
+ }
(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
}
(void) zonecfg_enddevent(handle);
- if (zonecfg_getmcapent(handle, &mcaptab) == Z_OK) {
- char buf[128];
-
- (void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
- rt_to_str(RT_MCAP));
- bytes_to_units(mcaptab.zone_physmem_cap, buf, sizeof (buf));
- (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
- pt_to_str(PT_PHYSICAL), buf);
- (void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
- }
-
if ((err = zonecfg_setrctlent(handle)) != Z_OK) {
zone_perror(zone, err, B_FALSE);
goto done;
@@ -2149,7 +2232,6 @@ add_resource(cmd_t *cmd)
{
int type;
struct zone_psettab tmp_psettab;
- struct zone_mcaptab tmp_mcaptab;
uint64_t tmp;
uint64_t tmp_mcap;
char pool[MAXNAMELEN];
@@ -2241,9 +2323,10 @@ add_resource(cmd_t *cmd)
* Make sure there isn't already a mem-cap entry or max-swap
* or max-locked rctl.
*/
- if (zonecfg_lookup_mcap(handle, &tmp_mcaptab) == Z_OK ||
- zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp_mcap)
- == Z_OK ||
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
+ &tmp_mcap) == Z_OK ||
+ zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM,
+ &tmp_mcap) == Z_OK ||
zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
&tmp_mcap) == Z_OK) {
zerr(gettext("The %s resource or a related resource "
@@ -2256,7 +2339,6 @@ add_resource(cmd_t *cmd)
"to even the root user; "
"this could render the system impossible\n"
"to administer. Please use caution."));
- bzero(&in_progress_mcaptab, sizeof (in_progress_mcaptab));
return;
case RT_ADMIN:
bzero(&in_progress_admintab, sizeof (in_progress_admintab));
@@ -2359,6 +2441,68 @@ bad:
zonecfg_free_rctl_value_list(rctlvaltab);
}
+/*
+ * Resource attribute ("property" resource embedded on net or dev resource)
+ */
+static void
+do_res_attr(struct zone_res_attrtab **headp, complex_property_ptr_t cpp)
+{
+ complex_property_ptr_t cp;
+ struct zone_res_attrtab *np;
+ int err;
+ boolean_t seen_name = B_FALSE, seen_value = B_FALSE;
+
+ if ((np = calloc(1, sizeof (struct zone_res_attrtab))) == NULL) {
+ zone_perror(zone, Z_NOMEM, B_TRUE);
+ exit(Z_ERR);
+ }
+
+ for (cp = cpp; cp != NULL; cp = cp->cp_next) {
+ switch (cp->cp_type) {
+ case PT_NAME:
+ if (seen_name) {
+ zerr(gettext("%s already specified"),
+ pt_to_str(PT_NAME));
+ goto bad;
+ }
+ (void) strlcpy(np->zone_res_attr_name, cp->cp_value,
+ sizeof (np->zone_res_attr_name));
+ seen_name = B_TRUE;
+ break;
+ case PT_VALUE:
+ if (seen_value) {
+ zerr(gettext("%s already specified"),
+ pt_to_str(PT_VALUE));
+ goto bad;
+ }
+ (void) strlcpy(np->zone_res_attr_value, cp->cp_value,
+ sizeof (np->zone_res_attr_value));
+ seen_value = B_TRUE;
+ break;
+ default:
+ zone_perror(pt_to_str(PT_NPROP), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ zonecfg_free_res_attr_list(np);
+ return;
+ }
+ }
+
+ if (!seen_name)
+ zerr(gettext("%s not specified"), pt_to_str(PT_NAME));
+ if (!seen_value)
+ zerr(gettext("%s not specified"), pt_to_str(PT_VALUE));
+
+ err = zonecfg_add_res_attr(headp, np);
+ if (err != Z_OK)
+ zone_perror(pt_to_str(PT_NPROP), err, B_TRUE);
+ return;
+
+bad:
+ zonecfg_free_res_attr_list(np);
+}
+
static void
add_property(cmd_t *cmd)
{
@@ -2426,6 +2570,44 @@ add_property(cmd_t *cmd)
}
}
return;
+ case RT_NET:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ do_res_attr(&(in_progress_nwiftab.zone_nwif_attrp),
+ pp->pv_complex);
+ return;
+ case RT_DEVICE:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ do_res_attr(&(in_progress_devtab.zone_dev_attrp),
+ pp->pv_complex);
+ return;
case RT_RCTL:
if (prop_type != PT_VALUE) {
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
@@ -2470,7 +2652,7 @@ static boolean_t
gz_invalid_rt_property(int type)
{
return (global_zone && (type == RT_ZONENAME || type == RT_ZONEPATH ||
- type == RT_AUTOBOOT || type == RT_LIMITPRIV ||
+ type == RT_AUTOBOOT || type == RT_LIMITPRIV || type == RT_UUID ||
type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED ||
type == RT_IPTYPE || type == RT_HOSTID || type == RT_FS_ALLOWED));
}
@@ -2479,7 +2661,7 @@ static boolean_t
gz_invalid_property(int type)
{
return (global_zone && (type == PT_ZONENAME || type == PT_ZONEPATH ||
- type == PT_AUTOBOOT || type == PT_LIMITPRIV ||
+ type == PT_AUTOBOOT || type == PT_LIMITPRIV || type == PT_UUID ||
type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED ||
type == PT_IPTYPE || type == PT_HOSTID || type == PT_FS_ALLOWED));
}
@@ -2530,8 +2712,9 @@ add_func(cmd_t *cmd)
resource_scope = cmd->cmd_res_type;
end_op = CMD_ADD;
add_resource(cmd);
- } else
+ } else {
add_property(cmd);
+ }
}
/*
@@ -2696,6 +2879,32 @@ fill_in_fstab(cmd_t *cmd, struct zone_fstab *fstab, boolean_t fill_in_only)
return (zonecfg_lookup_filesystem(handle, fstab));
}
+/*
+ * Turn an addr that looks like f:2:0:44:5:6C into 0f:02:00:44:05:6c
+ * We're expecting a dst of at least MAXMACADDRLEN size here.
+ */
+static void
+normalize_mac_addr(char *dst, const char *src, int len)
+{
+ char *p, *e, *sep = "";
+ long n;
+ char buf[MAXMACADDRLEN], tmp[4];
+
+ *dst = '\0';
+ (void) strlcpy(buf, src, sizeof (buf));
+ p = strtok(buf, ":");
+ while (p != NULL) {
+ n = strtol(p, &e, 16);
+ if (*e != NULL || n > 0xff)
+ return;
+ (void) snprintf(tmp, sizeof (tmp), "%s%02x", sep, n);
+ (void) strlcat(dst, tmp, len);
+
+ sep = ":";
+ p = strtok(NULL, ":");
+ }
+}
+
static int
fill_in_nwiftab(cmd_t *cmd, struct zone_nwiftab *nwiftab,
boolean_t fill_in_only)
@@ -2729,6 +2938,21 @@ fill_in_nwiftab(cmd_t *cmd, struct zone_nwiftab *nwiftab,
pp->pv_simple,
sizeof (nwiftab->zone_nwif_physical));
break;
+ case PT_MAC:
+ normalize_mac_addr(nwiftab->zone_nwif_mac,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_mac));
+ break;
+ case PT_VLANID:
+ (void) strlcpy(nwiftab->zone_nwif_vlan_id,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_vlan_id));
+ break;
+ case PT_GNIC:
+ (void) strlcpy(nwiftab->zone_nwif_gnic,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_gnic));
+ break;
case PT_DEFROUTER:
(void) strlcpy(nwiftab->zone_nwif_defrouter,
pp->pv_simple,
@@ -2979,6 +3203,8 @@ prompt_remove_resource(cmd_t *cmd, char *rsrc)
num = zonecfg_num_resources(handle, rsrc);
if (num == 0) {
+ if (force)
+ return (B_TRUE);
z_cmd_rt_perror(CMD_REMOVE, cmd->cmd_res_type, Z_NO_ENTRY,
B_TRUE);
return (B_FALSE);
@@ -3007,7 +3233,7 @@ prompt_remove_resource(cmd_t *cmd, char *rsrc)
}
static void
-remove_fs(cmd_t *cmd)
+remove_fs(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3016,13 +3242,16 @@ remove_fs(cmd_t *cmd)
struct zone_fstab fstab;
if ((err = fill_in_fstab(cmd, &fstab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
return;
}
- if ((err = zonecfg_delete_filesystem(handle, &fstab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_filesystem(handle, &fstab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
zonecfg_free_fs_option_list(fstab.zone_fs_options);
return;
}
@@ -3041,7 +3270,7 @@ remove_fs(cmd_t *cmd)
}
static void
-remove_net(cmd_t *cmd)
+remove_net(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3050,13 +3279,18 @@ remove_net(cmd_t *cmd)
struct zone_nwiftab nwiftab;
if ((err = fill_in_nwiftab(cmd, &nwiftab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_NET, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_nwif(handle, &nwiftab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_nwif(handle, &nwiftab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_NET, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3074,7 +3308,7 @@ remove_net(cmd_t *cmd)
}
static void
-remove_device(cmd_t *cmd)
+remove_device(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3083,13 +3317,18 @@ remove_device(cmd_t *cmd)
struct zone_devtab devtab;
if ((err = fill_in_devtab(cmd, &devtab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_dev(handle, &devtab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_dev(handle, &devtab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3107,7 +3346,7 @@ remove_device(cmd_t *cmd)
}
static void
-remove_attr(cmd_t *cmd)
+remove_attr(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3116,13 +3355,18 @@ remove_attr(cmd_t *cmd)
struct zone_attrtab attrtab;
if ((err = fill_in_attrtab(cmd, &attrtab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_attr(handle, &attrtab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_attr(handle, &attrtab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3140,7 +3384,7 @@ remove_attr(cmd_t *cmd)
}
static void
-remove_dataset(cmd_t *cmd)
+remove_dataset(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3149,13 +3393,18 @@ remove_dataset(cmd_t *cmd)
struct zone_dstab dstab;
if ((err = fill_in_dstab(cmd, &dstab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_ds(handle, &dstab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_ds(handle, &dstab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3173,7 +3422,7 @@ remove_dataset(cmd_t *cmd)
}
static void
-remove_rctl(cmd_t *cmd)
+remove_rctl(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3182,13 +3431,18 @@ remove_rctl(cmd_t *cmd)
struct zone_rctltab rctltab;
if ((err = fill_in_rctltab(cmd, &rctltab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
return;
}
@@ -3207,72 +3461,90 @@ remove_rctl(cmd_t *cmd)
}
static void
-remove_pset()
+remove_pset(boolean_t force)
{
int err;
struct zone_psettab psettab;
if ((err = zonecfg_lookup_pset(handle, &psettab)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
return;
}
- if ((err = zonecfg_delete_pset(handle)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_pset(handle)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
}
static void
-remove_pcap()
+remove_pcap(boolean_t force)
{
int err;
uint64_t tmp;
if (zonecfg_get_aliased_rctl(handle, ALIAS_CPUCAP, &tmp) != Z_OK) {
- zerr("%s %s: %s", cmd_to_str(CMD_REMOVE), rt_to_str(RT_PCAP),
- zonecfg_strerror(Z_NO_RESOURCE_TYPE));
- saw_error = B_TRUE;
+ if (!force) {
+ zerr("%s %s: %s", cmd_to_str(CMD_REMOVE),
+ rt_to_str(RT_PCAP),
+ zonecfg_strerror(Z_NO_RESOURCE_TYPE));
+ saw_error = B_TRUE;
+ }
return;
}
- if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_CPUCAP)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_PCAP, err, B_TRUE);
- else
+ if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_CPUCAP)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_PCAP, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
}
static void
-remove_mcap()
+remove_mcap(boolean_t force)
{
int err, res1, res2, res3;
uint64_t tmp;
- struct zone_mcaptab mcaptab;
boolean_t revert = B_FALSE;
- res1 = zonecfg_lookup_mcap(handle, &mcaptab);
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &tmp);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &tmp);
/* if none of these exist, there is no resource to remove */
if (res1 != Z_OK && res2 != Z_OK && res3 != Z_OK) {
- zerr("%s %s: %s", cmd_to_str(CMD_REMOVE), rt_to_str(RT_MCAP),
- zonecfg_strerror(Z_NO_RESOURCE_TYPE));
- saw_error = B_TRUE;
+ if (!force) {
+ zerr("%s %s: %s", cmd_to_str(CMD_REMOVE),
+ rt_to_str(RT_MCAP),
+ zonecfg_strerror(Z_NO_RESOURCE_TYPE));
+ saw_error = B_TRUE;
+ }
return;
}
if (res1 == Z_OK) {
- if ((err = zonecfg_delete_mcap(handle)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXPHYSMEM))
+ != Z_OK) {
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
}
+
if (res2 == Z_OK) {
if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXSWAP))
!= Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
@@ -3280,8 +3552,11 @@ remove_mcap()
if (res3 == Z_OK) {
if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM))
!= Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
@@ -3292,7 +3567,7 @@ remove_mcap()
}
static void
-remove_admin(cmd_t *cmd)
+remove_admin(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3301,34 +3576,33 @@ remove_admin(cmd_t *cmd)
struct zone_admintab admintab;
if ((err = fill_in_admintab(cmd, &admintab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err,
+ B_TRUE);
return;
}
if ((err = zonecfg_delete_admin(handle, &admintab,
- zone))
- != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
- else
+ zone)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
- } else {
- /*
- * unqualified admin removal.
- * remove all admins but prompt if more
- * than one.
- */
- if (!prompt_remove_resource(cmd, "admin"))
- return;
-
- if ((err = zonecfg_delete_admins(handle, zone))
- != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
- else
- need_to_commit = B_TRUE;
}
+
+ /*
+ * unqualified admin removal.
+ * remove all admins but prompt if more than one.
+ */
+ if (!prompt_remove_resource(cmd, "admin"))
+ return;
+
+ if ((err = zonecfg_delete_admins(handle, zone)) != Z_OK)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err, B_TRUE);
+ else
+ need_to_commit = B_TRUE;
}
static void
@@ -3337,6 +3611,7 @@ remove_resource(cmd_t *cmd)
int type;
int arg;
boolean_t arg_err = B_FALSE;
+ boolean_t force = B_FALSE;
if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
long_usage(CMD_REMOVE, B_TRUE);
@@ -3351,6 +3626,7 @@ remove_resource(cmd_t *cmd)
arg_err = B_TRUE;
break;
case 'F':
+ force = B_TRUE;
break;
default:
short_usage(CMD_REMOVE);
@@ -3366,34 +3642,34 @@ remove_resource(cmd_t *cmd)
switch (type) {
case RT_FS:
- remove_fs(cmd);
+ remove_fs(cmd, force);
return;
case RT_NET:
- remove_net(cmd);
+ remove_net(cmd, force);
return;
case RT_DEVICE:
- remove_device(cmd);
+ remove_device(cmd, force);
return;
case RT_RCTL:
- remove_rctl(cmd);
+ remove_rctl(cmd, force);
return;
case RT_ATTR:
- remove_attr(cmd);
+ remove_attr(cmd, force);
return;
case RT_DATASET:
- remove_dataset(cmd);
+ remove_dataset(cmd, force);
return;
case RT_DCPU:
- remove_pset();
+ remove_pset(force);
return;
case RT_PCAP:
- remove_pcap();
+ remove_pcap(force);
return;
case RT_MCAP:
- remove_mcap();
+ remove_mcap(force);
return;
case RT_ADMIN:
- remove_admin(cmd);
+ remove_admin(cmd, force);
return;
default:
zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, B_TRUE);
@@ -3410,7 +3686,27 @@ remove_property(cmd_t *cmd)
int err, res_type, prop_type;
property_value_ptr_t pp;
struct zone_rctlvaltab *rctlvaltab;
+ struct zone_res_attrtab *np;
complex_property_ptr_t cx;
+ int arg;
+ boolean_t force = B_FALSE;
+ boolean_t arg_err = B_FALSE;
+
+ optind = 0;
+ while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "F")) != EOF) {
+ switch (arg) {
+ case 'F':
+ force = B_TRUE;
+ break;
+ default:
+ arg_err = B_TRUE;
+ break;
+ }
+ }
+ if (arg_err) {
+ saw_error = B_TRUE;
+ return;
+ }
res_type = resource_scope;
prop_type = cmd->cmd_prop_name[0];
@@ -3452,7 +3748,7 @@ remove_property(cmd_t *cmd)
prop_id = pp->pv_simple;
err = zonecfg_remove_fs_option(&in_progress_fstab,
prop_id);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err, B_TRUE);
} else {
list_property_ptr_t list;
@@ -3464,12 +3760,62 @@ remove_property(cmd_t *cmd)
break;
err = zonecfg_remove_fs_option(
&in_progress_fstab, prop_id);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err,
B_TRUE);
}
}
return;
+ case RT_NET: /* FALLTHRU */
+ case RT_DEVICE:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_REMOVE, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ np = alloca(sizeof (struct zone_res_attrtab));
+ for (cx = pp->pv_complex; cx != NULL; cx = cx->cp_next) {
+ switch (cx->cp_type) {
+ case PT_NAME:
+ (void) strlcpy(np->zone_res_attr_name,
+ cx->cp_value,
+ sizeof (np->zone_res_attr_name));
+ break;
+ case PT_VALUE:
+ (void) strlcpy(np->zone_res_attr_value,
+ cx->cp_value,
+ sizeof (np->zone_res_attr_value));
+ break;
+ default:
+ zone_perror(pt_to_str(prop_type),
+ Z_NO_PROPERTY_TYPE, B_TRUE);
+ long_usage(CMD_REMOVE, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ }
+ np->zone_res_attr_next = NULL;
+
+ if (res_type == RT_NET) {
+ err = zonecfg_remove_res_attr(
+ &(in_progress_nwiftab.zone_nwif_attrp), np);
+ } else { /* RT_DEVICE */
+ err = zonecfg_remove_res_attr(
+ &(in_progress_devtab.zone_dev_attrp), np);
+ }
+ if (err != Z_OK && !force)
+ zone_perror(pt_to_str(prop_type), err, B_TRUE);
+ return;
case RT_RCTL:
if (prop_type != PT_VALUE) {
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
@@ -3518,22 +3864,10 @@ remove_property(cmd_t *cmd)
rctlvaltab->zone_rctlval_next = NULL;
err = zonecfg_remove_rctl_value(&in_progress_rctltab,
rctlvaltab);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err, B_TRUE);
zonecfg_free_rctl_value_list(rctlvaltab);
return;
- case RT_NET:
- if (prop_type != PT_DEFROUTER) {
- zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
- B_TRUE);
- long_usage(CMD_REMOVE, B_TRUE);
- usage(B_FALSE, HELP_PROPS);
- return;
- } else {
- bzero(&in_progress_nwiftab.zone_nwif_defrouter,
- sizeof (in_progress_nwiftab.zone_nwif_defrouter));
- return;
- }
default:
zone_perror(rt_to_str(res_type), Z_NO_RESOURCE_TYPE, B_TRUE);
long_usage(CMD_REMOVE, B_TRUE);
@@ -3596,8 +3930,7 @@ clear_property(cmd_t *cmd)
case RT_MCAP:
switch (prop_type) {
case PT_PHYSICAL:
- in_progress_mcaptab.zone_physmem_cap[0] = '\0';
- need_to_commit = B_TRUE;
+ remove_aliased_rctl(PT_PHYSICAL, ALIAS_MAXPHYSMEM);
return;
case PT_SWAP:
remove_aliased_rctl(PT_SWAP, ALIAS_MAXSWAP);
@@ -3607,6 +3940,30 @@ clear_property(cmd_t *cmd)
return;
}
break;
+ case RT_NET:
+ switch (prop_type) {
+ case PT_ALLOWED_ADDRESS:
+ in_progress_nwiftab.zone_nwif_allowed_address[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_DEFROUTER:
+ in_progress_nwiftab.zone_nwif_defrouter[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_GNIC:
+ in_progress_nwiftab.zone_nwif_gnic[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_MAC:
+ in_progress_nwiftab.zone_nwif_mac[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_VLANID:
+ in_progress_nwiftab.zone_nwif_vlan_id[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ }
+ break;
default:
break;
}
@@ -3632,6 +3989,8 @@ clear_global(cmd_t *cmd)
/* FALLTHRU */
case PT_ZONEPATH:
/* FALLTHRU */
+ case PT_UUID:
+ /* FALLTHRU */
case PT_BRAND:
zone_perror(pt_to_str(type), Z_CLEAR_DISALLOW, B_TRUE);
return;
@@ -3694,6 +4053,9 @@ clear_global(cmd_t *cmd)
case PT_SHARES:
remove_aliased_rctl(PT_SHARES, ALIAS_SHARES);
return;
+ case PT_ZFSPRI:
+ remove_aliased_rctl(PT_ZFSPRI, ALIAS_ZFSPRI);
+ return;
case PT_HOSTID:
if ((err = zonecfg_set_hostid(handle, NULL)) != Z_OK)
z_cmd_rt_perror(CMD_CLEAR, RT_HOSTID, err, B_TRUE);
@@ -3739,7 +4101,7 @@ clear_func(cmd_t *cmd)
void
select_func(cmd_t *cmd)
{
- int type, err, res;
+ int type, err;
uint64_t limit;
uint64_t tmp;
@@ -3834,7 +4196,8 @@ select_func(cmd_t *cmd)
return;
case RT_MCAP:
/* if none of these exist, there is no resource to select */
- if ((res = zonecfg_lookup_mcap(handle, &old_mcaptab)) != Z_OK &&
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &limit)
+ != Z_OK &&
zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &limit)
!= Z_OK &&
zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &limit)
@@ -3843,12 +4206,6 @@ select_func(cmd_t *cmd)
B_TRUE);
global_scope = B_TRUE;
}
- if (res == Z_OK)
- bcopy(&old_mcaptab, &in_progress_mcaptab,
- sizeof (struct zone_mcaptab));
- else
- bzero(&in_progress_mcaptab,
- sizeof (in_progress_mcaptab));
return;
case RT_ADMIN:
if ((err = fill_in_admintab(cmd, &old_admintab, B_FALSE))
@@ -4115,7 +4472,6 @@ set_func(cmd_t *cmd)
boolean_t autoboot;
zone_iptype_t iptype;
boolean_t force_set = B_FALSE;
- size_t physmem_size = sizeof (in_progress_mcaptab.zone_physmem_cap);
uint64_t mem_cap, mem_limit;
float cap;
char *unitp;
@@ -4190,6 +4546,10 @@ set_func(cmd_t *cmd)
res_type = RT_HOSTID;
} else if (prop_type == PT_FS_ALLOWED) {
res_type = RT_FS_ALLOWED;
+ } else if (prop_type == PT_ZFSPRI) {
+ res_type = RT_ZFSPRI;
+ } else if (prop_type == PT_UUID) {
+ res_type = RT_UUID;
} else {
zerr(gettext("Cannot set a resource-specific property "
"from the global scope."));
@@ -4219,10 +4579,12 @@ set_func(cmd_t *cmd)
* A nasty expression but not that complicated:
* 1. fs options are simple or list (tested below)
* 2. rctl value's are complex or list (tested below)
+ * 3. net attr's are complex (tested below)
* Anything else should be simple.
*/
if (!(res_type == RT_FS && prop_type == PT_OPTIONS) &&
!(res_type == RT_RCTL && prop_type == PT_VALUE) &&
+ !(res_type == RT_NET && prop_type == PT_NPROP) &&
(pp->pv_type != PROP_VAL_SIMPLE ||
(prop_id = pp->pv_simple) == NULL)) {
zerr(gettext("A %s value was expected here."),
@@ -4395,6 +4757,9 @@ set_func(cmd_t *cmd)
case RT_SHARES:
set_aliased_rctl(ALIAS_SHARES, prop_type, prop_id);
return;
+ case RT_ZFSPRI:
+ set_aliased_rctl(ALIAS_ZFSPRI, prop_type, prop_id);
+ return;
case RT_HOSTID:
if ((err = zonecfg_set_hostid(handle, prop_id)) != Z_OK) {
if (err == Z_TOO_BIG) {
@@ -4408,6 +4773,15 @@ set_func(cmd_t *cmd)
}
need_to_commit = B_TRUE;
return;
+ case RT_UUID:
+ /*
+ * We can't set here. We have to wait until commit since the
+ * uuid will be updating the index file and we may not have
+ * created the zone yet.
+ */
+ (void) strlcpy(new_uuid, prop_id, sizeof (new_uuid));
+ need_to_commit = B_TRUE;
+ return;
case RT_FS_ALLOWED:
if ((err = zonecfg_set_fs_allowed(handle, prop_id)) != Z_OK)
zone_perror(zone, err, B_TRUE);
@@ -4482,6 +4856,21 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_nwiftab.zone_nwif_physical));
break;
+ case PT_MAC:
+ normalize_mac_addr(in_progress_nwiftab.zone_nwif_mac,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_mac));
+ break;
+ case PT_VLANID:
+ (void) strlcpy(in_progress_nwiftab.zone_nwif_vlan_id,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_vlan_id));
+ break;
+ case PT_GNIC:
+ (void) strlcpy(in_progress_nwiftab.zone_nwif_gnic,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_gnic));
+ break;
case PT_DEFROUTER:
if (validate_net_address_syntax(prop_id, B_TRUE)
!= Z_OK) {
@@ -4492,6 +4881,20 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_nwiftab.zone_nwif_defrouter));
break;
+ case PT_NPROP:
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+ zonecfg_free_res_attr_list(
+ in_progress_nwiftab.zone_nwif_attrp);
+ in_progress_nwiftab.zone_nwif_attrp = NULL;
+ if (!(pp->pv_type == PROP_VAL_LIST &&
+ pp->pv_list == NULL))
+ add_property(cmd);
+ break;
default:
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
B_TRUE);
@@ -4507,6 +4910,20 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_devtab.zone_dev_match));
break;
+ case PT_NPROP:
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+ zonecfg_free_res_attr_list(
+ in_progress_devtab.zone_dev_attrp);
+ in_progress_devtab.zone_dev_attrp = NULL;
+ if (!(pp->pv_type == PROP_VAL_LIST &&
+ pp->pv_list == NULL))
+ add_property(cmd);
+ break;
default:
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
B_TRUE);
@@ -4667,18 +5084,30 @@ set_func(cmd_t *cmd)
case RT_MCAP:
switch (prop_type) {
case PT_PHYSICAL:
+ /*
+ * We have to check if an rctl is allowed here since
+ * there might already be a rctl defined that blocks
+ * the alias.
+ */
+ if (!zonecfg_aliased_rctl_ok(handle,
+ ALIAS_MAXPHYSMEM)) {
+ zone_perror(pt_to_str(PT_LOCKED),
+ Z_ALIAS_DISALLOW, B_FALSE);
+ saw_error = B_TRUE;
+ return;
+ }
+
if (!zonecfg_valid_memlimit(prop_id, &mem_cap)) {
- zerr(gettext("A positive number with a "
+ zerr(gettext("A non-negative number with a "
"required scale suffix (K, M, G or T) was "
- "expected here."));
- saw_error = B_TRUE;
- } else if (mem_cap < ONE_MB) {
- zerr(gettext("%s value is too small. It must "
- "be at least 1M."), pt_to_str(PT_PHYSICAL));
+ "expected\nhere."));
saw_error = B_TRUE;
} else {
- snprintf(in_progress_mcaptab.zone_physmem_cap,
- physmem_size, "%llu", mem_cap);
+ if ((err = zonecfg_set_aliased_rctl(handle,
+ ALIAS_MAXPHYSMEM, mem_cap)) != Z_OK)
+ zone_perror(zone, err, B_TRUE);
+ else
+ need_to_commit = B_TRUE;
}
break;
case PT_SWAP:
@@ -4950,6 +5379,23 @@ info_hostid(zone_dochandle_t handle, FILE *fp)
}
static void
+info_uuid(FILE *fp)
+{
+ uuid_t uuid;
+ char suuid[UUID_PRINTABLE_STRING_LENGTH];
+
+ if (new_uuid[0] != '\0') {
+ (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_UUID), new_uuid);
+ } else if (zonecfg_get_uuid(zone, uuid) == Z_OK &&
+ !uuid_is_null(uuid)) {
+ uuid_unparse(uuid, suuid);
+ (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_UUID), suuid);
+ } else {
+ (void) fprintf(fp, "%s:\n", pt_to_str(PT_UUID));
+ }
+}
+
+static void
info_fs_allowed(zone_dochandle_t handle, FILE *fp)
{
char fsallowedp[ZONE_FS_ALLOWED_MAX];
@@ -5031,12 +5477,25 @@ loopend:
static void
output_net(FILE *fp, struct zone_nwiftab *nwiftab)
{
+ struct zone_res_attrtab *np;
+
(void) fprintf(fp, "%s:\n", rt_to_str(RT_NET));
output_prop(fp, PT_ADDRESS, nwiftab->zone_nwif_address, B_TRUE);
output_prop(fp, PT_ALLOWED_ADDRESS,
nwiftab->zone_nwif_allowed_address, B_TRUE);
- output_prop(fp, PT_PHYSICAL, nwiftab->zone_nwif_physical, B_TRUE);
output_prop(fp, PT_DEFROUTER, nwiftab->zone_nwif_defrouter, B_TRUE);
+ output_prop(fp, PT_GNIC, nwiftab->zone_nwif_gnic, B_TRUE);
+ output_prop(fp, PT_MAC, nwiftab->zone_nwif_mac, B_TRUE);
+ output_prop(fp, PT_PHYSICAL, nwiftab->zone_nwif_physical, B_TRUE);
+ output_prop(fp, PT_VLANID, nwiftab->zone_nwif_vlan_id, B_TRUE);
+
+ for (np = nwiftab->zone_nwif_attrp; np != NULL;
+ np = np->zone_res_attr_next) {
+ fprintf(fp, "\t%s: (%s=%s,%s=\"%s\")\n",
+ pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), np->zone_res_attr_name,
+ pt_to_str(PT_VALUE), np->zone_res_attr_value);
+ }
}
static void
@@ -5079,8 +5538,18 @@ info_net(zone_dochandle_t handle, FILE *fp, cmd_t *cmd)
static void
output_dev(FILE *fp, struct zone_devtab *devtab)
{
+ struct zone_res_attrtab *np;
+
(void) fprintf(fp, "%s:\n", rt_to_str(RT_DEVICE));
output_prop(fp, PT_MATCH, devtab->zone_dev_match, B_TRUE);
+
+ for (np = devtab->zone_dev_attrp; np != NULL;
+ np = np->zone_res_attr_next) {
+ fprintf(fp, "\t%s: (%s=%s,%s=\"%s\")\n",
+ pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), np->zone_res_attr_name,
+ pt_to_str(PT_VALUE), np->zone_res_attr_value);
+ }
}
static void
@@ -5339,15 +5808,18 @@ bytes_to_units(char *str, char *buf, int bufsize)
}
static void
-output_mcap(FILE *fp, struct zone_mcaptab *mcaptab, int showswap,
+output_mcap(FILE *fp, int showphys, uint64_t maxphys, int showswap,
uint64_t maxswap, int showlocked, uint64_t maxlocked)
{
char buf[128];
(void) fprintf(fp, "%s:\n", rt_to_str(RT_MCAP));
- if (mcaptab->zone_physmem_cap[0] != '\0') {
- bytes_to_units(mcaptab->zone_physmem_cap, buf, sizeof (buf));
- output_prop(fp, PT_PHYSICAL, buf, B_TRUE);
+
+ if (showphys == Z_OK) {
+ (void) snprintf(buf, sizeof (buf), "%llu", maxphys);
+ bytes_to_units(buf, buf, sizeof (buf));
+ /* Print directly since "physical" also is a net property. */
+ (void) fprintf(fp, "\t[%s: %s]\n", pt_to_str(PT_PHYSICAL), buf);
}
if (showswap == Z_OK) {
@@ -5369,16 +5841,16 @@ info_mcap(zone_dochandle_t handle, FILE *fp)
int res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
- struct zone_mcaptab lookup;
+ uint64_t phys_limit;
- bzero(&lookup, sizeof (lookup));
- res1 = zonecfg_getmcapent(handle, &lookup);
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &phys_limit);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &swap_limit);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
&locked_limit);
if (res1 == Z_OK || res2 == Z_OK || res3 == Z_OK)
- output_mcap(fp, &lookup, res2, swap_limit, res3, locked_limit);
+ output_mcap(fp, res1, phys_limit, res2, swap_limit,
+ res3, locked_limit);
}
static void
@@ -5429,9 +5901,11 @@ info_func(cmd_t *cmd)
FILE *fp = stdout;
boolean_t need_to_close = B_FALSE;
int type;
- int res1, res2;
+ int res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
+ uint64_t phys_limit;
+ struct stat statbuf;
assert(cmd != NULL);
@@ -5479,7 +5953,9 @@ info_func(cmd_t *cmd)
&swap_limit);
res2 = zonecfg_get_aliased_rctl(handle,
ALIAS_MAXLOCKEDMEM, &locked_limit);
- output_mcap(fp, &in_progress_mcaptab, res1, swap_limit,
+ res3 = zonecfg_get_aliased_rctl(handle,
+ ALIAS_MAXPHYSMEM, &phys_limit);
+ output_mcap(fp, res3, phys_limit, res1, swap_limit,
res2, locked_limit);
break;
case RT_ADMIN:
@@ -5519,6 +5995,7 @@ info_func(cmd_t *cmd)
info_iptype(handle, fp);
info_hostid(handle, fp);
info_fs_allowed(handle, fp);
+ info_uuid(fp);
}
info_aliased_rctl(handle, fp, ALIAS_MAXLWPS);
info_aliased_rctl(handle, fp, ALIAS_MAXPROCS);
@@ -5527,6 +6004,7 @@ info_func(cmd_t *cmd)
info_aliased_rctl(handle, fp, ALIAS_MAXMSGIDS);
info_aliased_rctl(handle, fp, ALIAS_MAXSEMIDS);
info_aliased_rctl(handle, fp, ALIAS_SHARES);
+ info_aliased_rctl(handle, fp, ALIAS_ZFSPRI);
if (!global_zone) {
info_fs(handle, fp, cmd);
info_net(handle, fp, cmd);
@@ -5590,6 +6068,9 @@ info_func(cmd_t *cmd)
case RT_SHARES:
info_aliased_rctl(handle, fp, ALIAS_SHARES);
break;
+ case RT_ZFSPRI:
+ info_aliased_rctl(handle, fp, ALIAS_ZFSPRI);
+ break;
case RT_FS:
info_fs(handle, fp, cmd);
break;
@@ -5620,6 +6101,9 @@ info_func(cmd_t *cmd)
case RT_HOSTID:
info_hostid(handle, fp);
break;
+ case RT_UUID:
+ info_uuid(fp);
+ break;
case RT_ADMIN:
info_auth(handle, fp, cmd);
break;
@@ -6101,11 +6585,29 @@ verify_func(cmd_t *cmd)
if (save) {
if (ret_val == Z_OK) {
+ /*
+ * If the zone doesn't yet have a debug ID, set one now.
+ */
+ if (zonecfg_get_did(handle) == -1)
+ zonecfg_set_did(handle);
+
if ((ret_val = zonecfg_save(handle)) == Z_OK) {
need_to_commit = B_FALSE;
(void) strlcpy(revert_zone, zone,
sizeof (revert_zone));
}
+
+ /*
+ * Commit a new uuid at this point since we now know the
+ * zone index entry will exist.
+ */
+ if (new_uuid[0] != '\0') {
+ if ((err = zonecfg_set_uuid(zone, zonepath,
+ new_uuid)) != Z_OK)
+ zone_perror(zone, err, B_FALSE);
+ else
+ new_uuid[0] = '\0';
+ }
} else {
zerr(gettext("Zone %s failed to verify"), zone);
}
@@ -6275,6 +6777,7 @@ end_func(cmd_t *cmd)
int err, arg, res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
+ uint64_t phys_limit;
uint64_t proc_cap;
assert(cmd != NULL);
@@ -6578,8 +7081,8 @@ end_func(cmd_t *cmd)
break;
case RT_MCAP:
/* Make sure everything was filled in. */
- res1 = strlen(in_progress_mcaptab.zone_physmem_cap) == 0 ?
- Z_ERR : Z_OK;
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM,
+ &phys_limit);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
&swap_limit);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
@@ -6595,11 +7098,6 @@ end_func(cmd_t *cmd)
/* if phys & locked are both set, verify locked <= phys */
if (res1 == Z_OK && res3 == Z_OK) {
- uint64_t phys_limit;
- char *endp;
-
- phys_limit = strtoull(
- in_progress_mcaptab.zone_physmem_cap, &endp, 10);
if (phys_limit < locked_limit) {
zerr(gettext("The %s cap must be less than or "
"equal to the %s cap."),
@@ -6611,23 +7109,6 @@ end_func(cmd_t *cmd)
}
err = Z_OK;
- if (res1 == Z_OK) {
- /*
- * We could be ending from either an add operation
- * or a select operation. Since all of the properties
- * within this resource are optional, we always use
- * modify on the mcap entry. zonecfg_modify_mcap()
- * will handle both adding and modifying a memory cap.
- */
- err = zonecfg_modify_mcap(handle, &in_progress_mcaptab);
- } else if (end_op == CMD_SELECT) {
- /*
- * If we're ending from a select and the physical
- * memory cap is empty then the user could have cleared
- * the physical cap value, so try to delete the entry.
- */
- (void) zonecfg_delete_mcap(handle);
- }
break;
case RT_ADMIN:
/* First make sure everything was filled in. */
@@ -7176,8 +7657,10 @@ get_execbasename(char *execfullname)
int
main(int argc, char *argv[])
{
- int err, arg;
+ int err, arg, uflag = 0, zflag = 0;
struct stat st;
+ uuid_t uuidin;
+ char zonename[ZONENAME_MAX + 1];
/* This must be before anything goes to stdout. */
setbuf(stdout, NULL);
@@ -7204,7 +7687,7 @@ main(int argc, char *argv[])
exit(Z_OK);
}
- while ((arg = getopt(argc, argv, "?f:R:z:")) != EOF) {
+ while ((arg = getopt(argc, argv, "?f:R:z:u:")) != EOF) {
switch (arg) {
case '?':
if (optopt == '?')
@@ -7231,6 +7714,21 @@ main(int argc, char *argv[])
}
zonecfg_set_root(optarg);
break;
+ case 'u':
+ if (uuid_parse((char *)optarg, uuidin) == -1)
+ return (Z_INVALID_PROPERTY);
+
+ if (zonecfg_get_name_by_uuid(uuidin, zonename,
+ ZONENAME_MAX) != Z_OK) {
+ zone_perror(optarg, Z_BOGUS_ZONE_NAME, B_TRUE);
+ usage(B_FALSE, HELP_SYNTAX);
+ exit(Z_USAGE);
+ }
+
+ (void) strlcpy(zone, zonename, sizeof (zone));
+ (void) strlcpy(revert_zone, zonename, sizeof (zone));
+ uflag = 1;
+ break;
case 'z':
if (strcmp(optarg, GLOBAL_ZONENAME) == 0) {
global_zone = B_TRUE;
@@ -7241,6 +7739,7 @@ main(int argc, char *argv[])
}
(void) strlcpy(zone, optarg, sizeof (zone));
(void) strlcpy(revert_zone, optarg, sizeof (zone));
+ zflag = 1;
break;
default:
usage(B_FALSE, HELP_USAGE);
@@ -7248,7 +7747,7 @@ main(int argc, char *argv[])
}
}
- if (optind > argc || strcmp(zone, "") == 0) {
+ if (optind > argc || strcmp(zone, "") == 0 || (uflag && zflag)) {
usage(B_FALSE, HELP_USAGE);
exit(Z_USAGE);
}
diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h
index d8f8b14ce8..f8c78437ad 100644
--- a/usr/src/cmd/zonecfg/zonecfg.h
+++ b/usr/src/cmd/zonecfg/zonecfg.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#ifndef _ZONECFG_H
@@ -90,9 +91,11 @@ extern "C" {
#define RT_ADMIN 26
#define RT_FS_ALLOWED 27
#define RT_MAXPROCS 28 /* really a rctl alias property, but for info */
+#define RT_ZFSPRI 29 /* really a rctl alias property, but for info */
+#define RT_UUID 30 /* really a property, but for info */
#define RT_MIN RT_UNKNOWN
-#define RT_MAX RT_MAXPROCS
+#define RT_MAX RT_UUID
/* property types: increment PT_MAX when expanding this list */
#define PT_UNKNOWN 0
@@ -137,9 +140,15 @@ extern "C" {
#define PT_FS_ALLOWED 39
#define PT_MAXPROCS 40
#define PT_ALLOWED_ADDRESS 41
+#define PT_ZFSPRI 42
+#define PT_MAC 43
+#define PT_VLANID 44
+#define PT_GNIC 45
+#define PT_NPROP 46
+#define PT_UUID 47
#define PT_MIN PT_UNKNOWN
-#define PT_MAX PT_ALLOWED_ADDRESS
+#define PT_MAX PT_UUID
#define MAX_EQ_PROP_PAIRS 3
diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y
index d7f11b6a46..13a17876b7 100644
--- a/usr/src/cmd/zonecfg/zonecfg_grammar.y
+++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent Inc. All rights reserved.
*/
/*
@@ -136,6 +137,7 @@ complex_piece_func(int cp_type, const char *str, complex_property_ptr_t cp_next)
%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND PSET PCAP
%token MCAP NCPUS IMPORTANCE SHARES MAXLWPS MAXSHMMEM MAXSHMIDS MAXMSGIDS
%token MAXSEMIDS LOCKED SWAP SCHED CLEAR DEFROUTER ADMIN USER AUTHS MAXPROCS
+%token ZFSPRI MAC VLANID GNIC NPROP UUID
%type <strval> TOKEN EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET
property_value OPEN_PAREN CLOSE_PAREN COMMA simple_prop_val
@@ -145,7 +147,7 @@ complex_piece_func(int cp_type, const char *str, complex_property_ptr_t cp_next)
%type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME
MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT
ACTION BRAND SCHED IPTYPE DEFROUTER HOSTID USER AUTHS FS_ALLOWED
- ALLOWED_ADDRESS
+ ALLOWED_ADDRESS MAC VLANID GNIC NPROP UUID
%type <cmd> command
%type <cmd> add_command ADD
%type <cmd> cancel_command CANCEL
@@ -650,6 +652,24 @@ info_command: INFO
$$->cmd_res_type = RT_FS_ALLOWED;
$$->cmd_prop_nv_pairs = 0;
}
+ | INFO UUID
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &info_func;
+ $$->cmd_res_type = RT_UUID;
+ $$->cmd_prop_nv_pairs = 0;
+ }
+ | INFO ZFSPRI
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &info_func;
+ $$->cmd_res_type = RT_ZFSPRI;
+ $$->cmd_prop_nv_pairs = 0;
+ }
| INFO resource_type property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -734,6 +754,19 @@ remove_command: REMOVE
$$->cmd_prop_name[0] = $2;
$$->cmd_property_ptr[0] = &property[0];
}
+ | REMOVE TOKEN property_name property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 1;
+ $$->cmd_prop_name[0] = $3;
+ $$->cmd_property_ptr[0] = &property[0];
+ }
| REMOVE resource_type property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -745,6 +778,20 @@ remove_command: REMOVE
$$->cmd_prop_name[0] = $3;
$$->cmd_property_ptr[0] = &property[0];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 1;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ }
| REMOVE resource_type property_name EQUAL property_value property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -758,6 +805,22 @@ remove_command: REMOVE
$$->cmd_prop_name[1] = $6;
$$->cmd_property_ptr[1] = &property[1];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 2;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ $$->cmd_prop_name[1] = $7;
+ $$->cmd_property_ptr[1] = &property[1];
+ }
| REMOVE resource_type property_name EQUAL property_value property_name EQUAL property_value property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -773,6 +836,24 @@ remove_command: REMOVE
$$->cmd_prop_name[2] = $9;
$$->cmd_property_ptr[2] = &property[2];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value property_name EQUAL property_value property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 3;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ $$->cmd_prop_name[1] = $7;
+ $$->cmd_property_ptr[1] = &property[1];
+ $$->cmd_prop_name[2] = $10;
+ $$->cmd_property_ptr[2] = &property[2];
+ }
revert_command: REVERT
{
@@ -976,6 +1057,10 @@ property_name: SPECIAL { $$ = PT_SPECIAL; }
| ALLOWED_ADDRESS { $$ = PT_ALLOWED_ADDRESS; }
| PHYSICAL { $$ = PT_PHYSICAL; }
| DEFROUTER { $$ = PT_DEFROUTER; }
+ | MAC { $$ = PT_MAC; }
+ | VLANID { $$ = PT_VLANID; }
+ | GNIC { $$ = PT_GNIC; }
+ | NPROP { $$ = PT_NPROP; }
| NAME { $$ = PT_NAME; }
| VALUE { $$ = PT_VALUE; }
| MATCH { $$ = PT_MATCH; }
@@ -999,6 +1084,8 @@ property_name: SPECIAL { $$ = PT_SPECIAL; }
| USER { $$ = PT_USER; }
| AUTHS { $$ = PT_AUTHS; }
| FS_ALLOWED { $$ = PT_FS_ALLOWED; }
+ | UUID { $$ = PT_UUID; }
+ | ZFSPRI { $$ = PT_ZFSPRI; }
/*
* The grammar builds data structures from the bottom up. Thus various
diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l
index 6a0b577b75..328a75c922 100644
--- a/usr/src/cmd/zonecfg/zonecfg_lex.l
+++ b/usr/src/cmd/zonecfg/zonecfg_lex.l
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <assert.h>
@@ -57,10 +58,11 @@ extern void yyerror(char *s);
static char *create_token(char *s);
%}
-%a 7000
+%a 8000
%p 5000
%e 2000
%n 1000
+%o 13000
%{
/*
@@ -236,6 +238,18 @@ static char *create_token(char *s);
<TSTATE>defrouter { return DEFROUTER; }
<CSTATE>defrouter { return DEFROUTER; }
+<TSTATE>mac-addr { return MAC; }
+<CSTATE>mac-addr { return MAC; }
+
+<TSTATE>vlan-id { return VLANID; }
+<CSTATE>vlan-id { return VLANID; }
+
+<TSTATE>global-nic { return GNIC; }
+<CSTATE>global-nic { return GNIC; }
+
+<TSTATE>property { return NPROP; }
+<CSTATE>property { return NPROP; }
+
<TSTATE>dir { return DIR; }
<CSTATE>dir { return DIR; }
@@ -308,6 +322,12 @@ static char *create_token(char *s);
<TSTATE>fs-allowed { return FS_ALLOWED; }
<CSTATE>fs-allowed { return FS_ALLOWED; }
+<TSTATE>uuid { return UUID; }
+<CSTATE>uuid { return UUID; }
+
+<TSTATE>zfs-io-priority { return ZFSPRI; }
+<CSTATE>zfs-io-priority { return ZFSPRI; }
+
<TSTATE>= { return EQUAL; }
<LSTATE>= { return EQUAL; }
<CSTATE>= { return EQUAL; }
@@ -357,6 +377,13 @@ static char *create_token(char *s);
return TOKEN;
}
+<CSTATE>\"[^\"\n]*[\"\n] {
+ yylval.strval = create_token(yytext + 1);
+ if (yylval.strval[yyleng - 2] == '"')
+ yylval.strval[yyleng - 2] = 0;
+ return TOKEN;
+ }
+
<TSTATE>\"[^\"\n]*[\"\n] {
yylval.strval = create_token(yytext + 1);
if (yylval.strval[yyleng - 2] == '"')
diff --git a/usr/src/cmd/zonename/Makefile b/usr/src/cmd/zonename/Makefile
index 566e893a67..3a51952455 100644
--- a/usr/src/cmd/zonename/Makefile
+++ b/usr/src/cmd/zonename/Makefile
@@ -28,8 +28,10 @@
#
PROG= zonename
+OBJS= zonename.o
include ../Makefile.cmd
+include ../Makefile.ctf
LDLIBS += -lzonecfg
@@ -37,6 +39,10 @@ LDLIBS += -lzonecfg
all: $(PROG)
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
install: all $(ROOTSBINPROG)
$(RM) $(ROOTPROG)
$(SYMLINK) ../../sbin/$(PROG) $(ROOTPROG)
@@ -44,6 +50,10 @@ install: all $(ROOTSBINPROG)
check: $(PROG).c
$(CSTYLE) -pP $(PROG).c
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
clean:
lint: lint_PROG
diff --git a/usr/src/cmd/zonestat/zonestatd/zonestatd.c b/usr/src/cmd/zonestat/zonestatd/zonestatd.c
index b764551131..6c293bcc0e 100644
--- a/usr/src/cmd/zonestat/zonestatd/zonestatd.c
+++ b/usr/src/cmd/zonestat/zonestatd/zonestatd.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <alloca.h>
#include <assert.h>
@@ -2190,7 +2191,7 @@ zsd_get_zone_rctl_usage(char *name)
return (rctlblk_get_value(rblk));
}
-#define ZSD_NUM_RCTL_VALS 19
+#define ZSD_NUM_RCTL_VALS 20
/*
* Fetch the limit information for a zone. This uses zone_enter() as the
@@ -2237,12 +2238,6 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
*msgids = 0;
*lofi = 0;
- /* Get the ram cap first since it is a zone attr */
- ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP,
- ram_cap, sizeof (*ram_cap));
- if (ret < 0 || *ram_cap == 0)
- *ram_cap = ZS_LIMIT_NONE;
-
/* Get the zone's default scheduling class */
ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
class, sizeof (class));
@@ -2298,6 +2293,7 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
+ vals[i++] = zsd_get_zone_rctl_usage("zone.max-physical-memory");
if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
@@ -2342,6 +2338,7 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
*msgids = vals[i++];
*lofi_cap = vals[i++];
*lofi = vals[i++];
+ *ram_cap = vals[i++];
/* Interpret maximum values as no cap */
if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 8b992efcb4..bcab7b063c 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -25,6 +25,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
#include <assert.h>
diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c
new file mode 100644
index 0000000000..08ab453885
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.c
@@ -0,0 +1,244 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/signal.h>
+#include <lx_signum.h>
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Solaris and Linux.
+ *
+ * The simplest transformation that must be done when sending signals is to
+ * translate between Linux and Solaris signal numbers.
+ *
+ * These are the major signal number differences between Linux and Solaris:
+ *
+ * ====================================
+ * | Number | Linux | Solaris |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a Solaris signal, nor does every Solaris
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ *
+ * One mapping issue is that Linux supports 33 real time signals, with SIGRTMIN
+ * typically starting at or near 32 (SIGRTMIN) and proceeding to 64 (SIGRTMAX)
+ * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of
+ * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as
+ * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes
+ * and supports exactly 32 real time signals, in the range 41 (SIGRTMIN)
+ * to 72 (SIGRTMAX).
+ *
+ * At present, attempting to translate a Linux signal equal to 63
+ * will generate an error (we allow SIGRTMAX because a program
+ * should be able to send SIGRTMAX without getting an EINVAL, though obviously
+ * anything that loops through the signals from SIGRTMIN to SIGRTMAX will
+ * fail.)
+ *
+ * Similarly, attempting to translate a native Solaris signal in the range
+ * 32-40 will also generate an error as we don't want to support the receipt of
+ * those signals from the Solaris global zone.
+ */
+
+/*
+ * Linux to Solaris signal map
+ *
+ * Usage: solaris_signal = ltos_signum[lx_signal];
+ */
+const int
+ltos_signo[LX_NSIG + 1] = {
+ 0,
+ SIGHUP,
+ SIGINT,
+ SIGQUIT,
+ SIGILL,
+ SIGTRAP,
+ SIGABRT,
+ SIGBUS,
+ SIGFPE,
+ SIGKILL,
+ SIGUSR1,
+ SIGSEGV,
+ SIGUSR2,
+ SIGPIPE,
+ SIGALRM,
+ SIGTERM,
+ SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */
+ SIGCHLD,
+ SIGCONT,
+ SIGSTOP,
+ SIGTSTP,
+ SIGTTIN,
+ SIGTTOU,
+ SIGURG,
+ SIGXCPU,
+ SIGXFSZ,
+ SIGVTALRM,
+ SIGPROF,
+ SIGWINCH,
+ SIGPOLL,
+ SIGPWR,
+ SIGSYS,
+ _SIGRTMIN, /* 32: Linux SIGRTMIN */
+ _SIGRTMIN + 1,
+ _SIGRTMIN + 2,
+ _SIGRTMIN + 3,
+ _SIGRTMIN + 4,
+ _SIGRTMIN + 5,
+ _SIGRTMIN + 6,
+ _SIGRTMIN + 7,
+ _SIGRTMIN + 8,
+ _SIGRTMIN + 9,
+ _SIGRTMIN + 10,
+ _SIGRTMIN + 11,
+ _SIGRTMIN + 12,
+ _SIGRTMIN + 13,
+ _SIGRTMIN + 14,
+ _SIGRTMIN + 15,
+ _SIGRTMIN + 16,
+ _SIGRTMIN + 17,
+ _SIGRTMIN + 18,
+ _SIGRTMIN + 19,
+ _SIGRTMIN + 20,
+ _SIGRTMIN + 21,
+ _SIGRTMIN + 22,
+ _SIGRTMIN + 23,
+ _SIGRTMIN + 24,
+ _SIGRTMIN + 25,
+ _SIGRTMIN + 26,
+ _SIGRTMIN + 27,
+ _SIGRTMIN + 28,
+ _SIGRTMIN + 29,
+ _SIGRTMIN + 30,
+ -1, /* 63: Linux SIGRTMIN + 31, or SIGRTMAX - 1 */
+ _SIGRTMAX, /* 64: Linux SIGRTMAX */
+};
+
+/*
+ * Solaris to Linux signal map
+ *
+ * Usage: lx_signal = stol_signo[solaris_signal];
+ */
+const int
+stol_signo[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: Solaris SIGWAITING */
+ -1, /* 33: Solaris SIGLWP */
+ -1, /* 34: Solaris SIGFREEZE */
+ -1, /* 35: Solaris SIGTHAW */
+ -1, /* 36: Solaris SIGCANCEL */
+ -1, /* 37: Solaris SIGLOST */
+ -1, /* 38: Solaris SIGXRES */
+ -1, /* 39: Solaris SIGJVM1 */
+ -1, /* 40: Solaris SIGJVM2 */
+ -1, /* 41: Solaris SIGINFO */
+ LX_SIGRTMIN, /* 42: Solaris _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMAX, /* 73: Solaris _SIGRTMAX */
+};
diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h
new file mode 100644
index 0000000000..f410500925
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.h
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_SIGNUM_H
+#define _LX_SIGNUM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG 64 /* Linux _NSIG */
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGNUM_H */
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h
new file mode 100644
index 0000000000..7bfc0537c9
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_syscall.h
@@ -0,0 +1,47 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_SYSCALL_H
+#define _LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The br_scall_args field of lx_lwp_data is going to be populated with
+ * pointers to structs. The types of these structs should be defined in this
+ * header file. These are Linux specific arguments to system calls that don't
+ * exist in illumos. Each section should be labelled with which system call it
+ * belongs to.
+ */
+
+/* arguments for waitpid(2) */
+/* see comments in usr/src/lib/brand/lx/lx_brand/common/wait.c */
+#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */
+#define LX_WALL 0x40000000 /* Wait on all children */
+#define LX_WCLONE 0x80000000 /* Wait only on clone children */
+typedef struct lx_waitid_args {
+ int waitid_flags;
+} lx_waitid_args_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSCALL_H */
diff --git a/usr/src/common/ctf/ctf_create.c b/usr/src/common/ctf/ctf_create.c
index 239d166f44..f9feca81cf 100644
--- a/usr/src/common/ctf/ctf_create.c
+++ b/usr/src/common/ctf/ctf_create.c
@@ -574,7 +574,7 @@ ctf_dtd_lookup(ctf_file_t *fp, ctf_id_t type)
int
ctf_discard(ctf_file_t *fp)
{
- ctf_dtdef_t *dtd, *ntd;
+ ctf_dtdef_t *dtd, *ntd = NULL;
if (!(fp->ctf_flags & LCTF_RDWR))
return (ctf_set_errno(fp, ECTF_RDONLY));
diff --git a/usr/src/common/ctf/ctf_hash.c b/usr/src/common/ctf/ctf_hash.c
index b10a7618f6..e68fe8e516 100644
--- a/usr/src/common/ctf/ctf_hash.c
+++ b/usr/src/common/ctf/ctf_hash.c
@@ -25,9 +25,8 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctf_impl.h>
+#include <sys/debug.h>
static const ushort_t _CTF_EMPTY[1] = { 0 };
@@ -176,3 +175,160 @@ ctf_hash_destroy(ctf_hash_t *hp)
hp->h_chains = NULL;
}
}
+
+int
+ctf_idhash_create(ctf_idhash_t *ihp, ulong_t nelems)
+{
+ if (nelems > USHRT_MAX)
+ return (EOVERFLOW);
+
+ /*
+ * If the hash table is going to be empty, don't bother allocating any
+ * memory and make the only bucket point to a zero so lookups fail.
+ */
+ if (nelems == 0) {
+ bzero(ihp, sizeof (ctf_idhash_t));
+ ihp->ih_buckets = (ushort_t *)_CTF_EMPTY;
+ ihp->ih_nbuckets = 1;
+ return (0);
+ }
+
+ ihp->ih_nbuckets = 211; /* use a prime number of hash buckets */
+ ihp->ih_nelems = nelems + 1; /* we use index zero as a sentinel */
+ ihp->ih_free = 1; /* first free element is index 1 */
+
+ ihp->ih_buckets = ctf_alloc(sizeof (ushort_t) * ihp->ih_nbuckets);
+ ihp->ih_chains = ctf_alloc(sizeof (ctf_ihelem_t) * ihp->ih_nelems);
+
+ if (ihp->ih_buckets == NULL || ihp->ih_chains == NULL) {
+ ctf_idhash_destroy(ihp);
+ return (EAGAIN);
+ }
+
+ bzero(ihp->ih_buckets, sizeof (ushort_t) * ihp->ih_nbuckets);
+ bzero(ihp->ih_chains, sizeof (ctf_ihelem_t) * ihp->ih_nelems);
+
+ return (0);
+}
+
+void
+ctf_idhash_clear(ctf_idhash_t *ihp)
+{
+ /* Nothing to do for a sentinel hash */
+ if (ihp->ih_nbuckets == 1) {
+ ASSERT(ihp->ih_buckets == (ushort_t *)_CTF_EMPTY);
+ return;
+ }
+
+ ihp->ih_free = 1;
+ bzero(ihp->ih_buckets, sizeof (ushort_t) * ihp->ih_nbuckets);
+ bzero(ihp->ih_chains, sizeof (ctf_ihelem_t) * ihp->ih_nelems);
+}
+
+uint_t
+ctf_idhash_size(const ctf_idhash_t *hp)
+{
+ return (hp->ih_nelems ? hp->ih_nelems - 1 : 0);
+}
+
+static ulong_t
+ctf_idhash_compute(ctf_id_t id)
+{
+ return (id);
+}
+
+int
+ctf_idhash_insert(ctf_idhash_t *ihp, ushort_t type, ushort_t value)
+{
+ ctf_ihelem_t *ihep = &ihp->ih_chains[ihp->ih_free];
+ ulong_t h;
+
+ if (ihp->ih_free >= ihp->ih_nelems)
+ return (EOVERFLOW);
+
+ ihep->ih_type = type;
+ ihep->ih_value = value;
+ h = ctf_idhash_compute(type) % ihp->ih_nbuckets;
+ ihep->ih_next = ihp->ih_buckets[h];
+ ihp->ih_buckets[h] = ihp->ih_free++;
+
+ return (0);
+}
+
+int
+ctf_idhash_define(ctf_idhash_t *ihp, ushort_t type, ushort_t value)
+{
+ ctf_ihelem_t *hep = ctf_idhash_lookup(ihp, type);
+
+ if (hep == NULL)
+ return (ctf_idhash_insert(ihp, type, value));
+
+ hep->ih_value = value;
+ return (0);
+}
+
+
+ctf_ihelem_t *
+ctf_idhash_lookup(ctf_idhash_t *ihp, ushort_t key)
+{
+ ctf_ihelem_t *ihep;
+ ushort_t i;
+
+ ulong_t h = ctf_idhash_compute(key) % ihp->ih_nbuckets;
+
+ for (i = ihp->ih_buckets[h]; i != 0; i = ihep->ih_next) {
+ ihep = &ihp->ih_chains[i];
+
+ if (ihep->ih_type == key)
+ return (ihep);
+ }
+
+ return (NULL);
+}
+
+void
+ctf_idhash_destroy(ctf_idhash_t *ihp)
+{
+ if (ihp->ih_buckets != NULL && ihp->ih_nbuckets != 1) {
+ ctf_free(ihp->ih_buckets, sizeof (ushort_t) * ihp->ih_nbuckets);
+ ihp->ih_buckets = NULL;
+ }
+
+ if (ihp->ih_chains != NULL) {
+ ctf_free(ihp->ih_chains, sizeof (ctf_helem_t) * ihp->ih_nelems);
+ ihp->ih_chains = NULL;
+ }
+}
+
+/*ARGSUSED*/
+int
+ctf_idhash_iter_init(ctf_idhash_t *ihp, ctf_idhash_iter_t **iterp)
+{
+
+ *iterp = ctf_alloc(sizeof (ctf_idhash_iter_t));
+ if (*iterp == NULL)
+ return (ENOMEM);
+
+ if (ihp->ih_free == 0)
+ (*iterp)->cii_id = 0;
+ else
+ (*iterp)->cii_id = 1;
+
+ return (0);
+}
+
+const ctf_ihelem_t *
+ctf_idhash_iter(ctf_idhash_t *ihp, ctf_idhash_iter_t *iter)
+{
+ if (iter->cii_id >= ihp->ih_free)
+ return (NULL);
+
+ return (&ihp->ih_chains[iter->cii_id++]);
+}
+
+/*ARGSUSED*/
+void
+ctf_idhash_iter_fini(ctf_idhash_t *ihp, ctf_idhash_iter_t *iter)
+{
+ ctf_free(iter, sizeof (ctf_idhash_iter_t));
+}
diff --git a/usr/src/common/ctf/ctf_impl.h b/usr/src/common/ctf/ctf_impl.h
index f56fa6a005..42aec80a43 100644
--- a/usr/src/common/ctf/ctf_impl.h
+++ b/usr/src/common/ctf/ctf_impl.h
@@ -25,7 +25,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _CTF_IMPL_H
@@ -77,6 +77,10 @@ typedef struct ctf_hash {
uint_t h_free; /* index of next free hash element */
} ctf_hash_t;
+struct ctf_idhash_iter {
+ int cii_id; /* Current iteration id */
+};
+
typedef struct ctf_strs {
const char *cts_strs; /* base address of string table */
size_t cts_len; /* size of string table in bytes */
diff --git a/usr/src/common/ctf/ctf_types.c b/usr/src/common/ctf/ctf_types.c
index ab1b9ff14b..93168f25cb 100644
--- a/usr/src/common/ctf/ctf_types.c
+++ b/usr/src/common/ctf/ctf_types.c
@@ -26,6 +26,7 @@
*/
#include <ctf_impl.h>
+#include <sys/debug.h>
ssize_t
ctf_get_ctt_size(const ctf_file_t *fp, const ctf_type_t *tp, ssize_t *sizep,
@@ -868,3 +869,66 @@ ctf_type_visit(ctf_file_t *fp, ctf_id_t type, ctf_visit_f *func, void *arg)
{
return (ctf_type_rvisit(fp, type, func, arg, "", 0, 0));
}
+
+int
+ctf_func_info_by_id(ctf_file_t *fp, ctf_id_t type, ctf_funcinfo_t *fip)
+{
+ ctf_file_t *ofp = fp;
+ const ctf_type_t *tp;
+ const ushort_t *dp;
+ int nargs;
+ ssize_t increment;
+
+ if ((tp = ctf_lookup_by_id(&fp, type)) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ if (LCTF_INFO_KIND(fp, tp->ctt_info) != CTF_K_FUNCTION)
+ return (ctf_set_errno(ofp, ECTF_NOTFUNC));
+
+ fip->ctc_return = tp->ctt_type;
+ nargs = LCTF_INFO_VLEN(fp, tp->ctt_info);
+ fip->ctc_argc = nargs;
+ fip->ctc_flags = 0;
+
+ /* dp should now point to the first argument */
+ if (nargs != 0) {
+ (void) ctf_get_ctt_size(fp, tp, NULL, &increment);
+ dp = (ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_txlate[CTF_TYPE_TO_INDEX(type)] + increment);
+ if (dp[nargs - 1] == 0) {
+ fip->ctc_flags |= CTF_FUNC_VARARG;
+ fip->ctc_argc--;
+ }
+ }
+
+ return (0);
+}
+
+int
+ctf_func_args_by_id(ctf_file_t *fp, ctf_id_t type, uint_t argc, ctf_id_t *argv)
+{
+ ctf_file_t *ofp = fp;
+ const ctf_type_t *tp;
+ const ushort_t *dp;
+ int nargs;
+ ssize_t increment;
+
+ if ((tp = ctf_lookup_by_id(&fp, type)) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ if (LCTF_INFO_KIND(fp, tp->ctt_info) != CTF_K_FUNCTION)
+ return (ctf_set_errno(ofp, ECTF_NOTFUNC));
+
+ nargs = LCTF_INFO_VLEN(fp, tp->ctt_info);
+ (void) ctf_get_ctt_size(fp, tp, NULL, &increment);
+ dp = (ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_txlate[CTF_TYPE_TO_INDEX(type)] +
+ increment);
+ if (nargs != 0 && dp[nargs - 1] == 0)
+ nargs--;
+
+ for (nargs = MIN(argc, nargs); nargs != 0; nargs--)
+ *argv++ = *dp++;
+
+ return (0);
+}
diff --git a/usr/src/common/fs/bootfsops.c b/usr/src/common/fs/bootfsops.c
new file mode 100644
index 0000000000..5a693b80e5
--- /dev/null
+++ b/usr/src/common/fs/bootfsops.c
@@ -0,0 +1,329 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/bootconf.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/fs/ufs_fsdir.h>
+#include <sys/fs/ufs_fs.h>
+#include <sys/fs/ufs_inode.h>
+#include <sys/sysmacros.h>
+#include <sys/bootvfs.h>
+#include <sys/bootinfo.h>
+#include <sys/filep.h>
+
+#ifdef _BOOT
+#include "../common/util.h"
+#else
+#include <sys/sunddi.h>
+#endif
+
+#define MAX_FILES MAX_BOOT_MODULES
+#define MAX_FDS 256
+
+extern void *bkmem_alloc(size_t);
+extern void bkmem_free(void *, size_t);
+
+/*
+ * TODO: Replace these declarations with inclusion of the ordinary userland
+ * bootfs headers once they're available.
+ */
+typedef struct bfile {
+ char bf_name[MAXPATHLEN];
+ caddr_t bf_addr;
+ size_t bf_size;
+ struct bfile *bf_next;
+ uint64_t bf_ino;
+} bfile_t;
+
+typedef struct bf_fd {
+ bfile_t *fd_file;
+ off_t fd_pos;
+} bf_fd_t;
+
+static bfile_t *head;
+static uint_t init_done;
+static bf_fd_t fds[MAX_FDS];
+
+static char cpath[MAXPATHLEN]; /* For canonicalising filenames */
+
+static void bbootfs_closeall(int);
+
+static void
+canonicalise(const char *fn, char *out)
+{
+ const char *p;
+ char *q, *s;
+ char *last;
+ char *oc;
+ int is_slash = 0;
+ static char scratch[MAXPATHLEN];
+
+ if (fn == NULL) {
+ *out = '\0';
+ return;
+ }
+
+ /*
+ * Remove leading slashes and condense all multiple slashes into one.
+ */
+ p = fn;
+ while (*p == '/')
+ ++p;
+
+ for (q = scratch; *p != '\0'; p++) {
+ if (*p == '/' && !is_slash) {
+ *q++ = '/';
+ is_slash = 1;
+ } else if (*p != '/') {
+ *q++ = *p;
+ is_slash = 0;
+ }
+ }
+ *q = '\0';
+
+ if (strncmp(scratch, "system/boot/", 12) == 0 ||
+ strcmp(scratch, "system/boot") == 0) {
+ s = scratch + 12;
+ } else {
+ s = scratch;
+ }
+
+ for (last = strsep(&s, "/"), q = oc = out; last != NULL;
+ last = strsep(&s, "/")) {
+ if (strcmp(last, ".") == 0)
+ continue;
+ if (strcmp(last, "..") == 0) {
+ for (oc = q; oc > out && *oc != '/'; oc--)
+ ;
+ q = oc;
+ continue;
+ }
+ if (q > out)
+ *q++ = '/';
+ q += snprintf(q, MAXPATHLEN - (q - out), "%s", last);
+ }
+
+ *q = '\0';
+}
+
+/* ARGSUSED */
+static int
+bbootfs_mountroot(char *str)
+{
+ return (-1);
+}
+
+static int
+bbootfs_unmountroot(void)
+{
+ return (-1);
+}
+
+static int
+bbootfs_init(void)
+{
+ bfile_t *fp;
+ char propname[32];
+ uint64_t propval;
+ uint_t i;
+
+ for (i = 0; i < MAX_FILES; i++) {
+ (void) snprintf(propname, sizeof (propname),
+ "module-name-%u", i);
+ if (do_bsys_getproplen(NULL, propname) < 0)
+ break;
+
+ if ((fp = bkmem_alloc(sizeof (bfile_t))) == NULL) {
+ bbootfs_closeall(1);
+ return (-1);
+ }
+
+ (void) do_bsys_getprop(NULL, propname, cpath);
+ canonicalise(cpath, fp->bf_name);
+
+ (void) snprintf(propname, sizeof (propname),
+ "module-addr-%u", i);
+ if (do_bsys_getproplen(NULL, propname) != sizeof (uint64_t)) {
+ bkmem_free(fp, sizeof (bfile_t));
+ continue;
+ }
+ (void) do_bsys_getprop(NULL, propname, &propval);
+ fp->bf_addr = (void *)(uintptr_t)propval;
+
+ (void) snprintf(propname, sizeof (propname),
+ "module-size-%u", i);
+ if (do_bsys_getproplen(NULL, propname) != sizeof (uint64_t)) {
+ bkmem_free(fp, sizeof (bfile_t));
+ continue;
+ }
+ (void) do_bsys_getprop(NULL, propname, &propval);
+ fp->bf_size = (size_t)propval;
+ fp->bf_ino = i;
+
+ fp->bf_next = head;
+ head = fp;
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bbootfs_open(char *fn, int flags)
+{
+ uint_t i;
+ bfile_t *fp;
+
+ if (!init_done) {
+ if (bbootfs_init() != 0)
+ return (-1);
+
+ init_done = 1;
+ }
+
+ canonicalise(fn, cpath);
+
+ for (fp = head; fp != NULL; fp = fp->bf_next) {
+ if (strcmp(fp->bf_name, cpath) == 0)
+ break;
+ }
+
+ if (fp == NULL)
+ return (-1);
+
+ for (i = 0; i < MAX_FDS; i++) {
+ if (fds[i].fd_file == NULL) {
+ fds[i].fd_file = fp;
+ fds[i].fd_pos = 0;
+ return (i);
+ }
+ }
+
+ return (-1);
+}
+
+static int
+bbootfs_close(int fd)
+{
+ if (fds[fd].fd_file == NULL)
+ return (-1);
+
+ fds[fd].fd_file = NULL;
+ fds[fd].fd_pos = 0;
+
+ return (0);
+}
+
+static ssize_t
+bbootfs_read(int fd, caddr_t buf, size_t size)
+{
+ ssize_t len;
+ bf_fd_t *fdp = &fds[fd];
+
+ if (fdp->fd_file == NULL)
+ return (-1);
+
+ if (fdp->fd_pos >= fdp->fd_file->bf_size)
+ return (-1);
+
+ if (fdp->fd_pos + size > fdp->fd_file->bf_size)
+ len = fdp->fd_file->bf_size - fdp->fd_pos;
+ else
+ len = size;
+
+ bcopy(fdp->fd_file->bf_addr + fdp->fd_pos, buf, len);
+
+ fdp->fd_pos += len;
+
+ return (len);
+}
+
+static off_t
+bbootfs_lseek(int fd, off_t addr, int whence)
+{
+ bf_fd_t *fdp = &fds[fd];
+
+ if (fdp->fd_file == NULL)
+ return (-1);
+
+ switch (whence) {
+ case SEEK_CUR:
+ fdp->fd_pos += addr;
+ break;
+ case SEEK_SET:
+ fdp->fd_pos = addr;
+ break;
+ case SEEK_END:
+ fdp->fd_pos = fdp->fd_file->bf_size;
+ break;
+ default:
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+bbootfs_fstat(int fd, struct bootstat *bsp)
+{
+ bf_fd_t *fdp = &fds[fd];
+
+ if (fdp->fd_file == NULL)
+ return (-1);
+
+ bsp->st_dev = 1;
+ bsp->st_ino = fdp->fd_file->bf_ino;
+ bsp->st_mode = 0444;
+ bsp->st_nlink = 1;
+ bsp->st_uid = bsp->st_gid = 0;
+ bsp->st_rdev = 0;
+ bsp->st_size = fdp->fd_file->bf_size;
+ bsp->st_blksize = 1;
+ bsp->st_blocks = fdp->fd_file->bf_size;
+ (void) strcpy(bsp->st_fstype, "bootfs");
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+bbootfs_closeall(int flag)
+{
+ bfile_t *fp;
+
+ while (head != NULL) {
+ fp = head;
+ head = head->bf_next;
+
+ bkmem_free(fp, sizeof (bfile_t));
+ }
+
+ init_done = 0;
+}
+
+struct boot_fs_ops bbootfs_ops = {
+ "bootfs",
+ bbootfs_mountroot,
+ bbootfs_unmountroot,
+ bbootfs_open,
+ bbootfs_close,
+ bbootfs_read,
+ bbootfs_lseek,
+ bbootfs_fstat,
+ bbootfs_closeall,
+ NULL
+};
diff --git a/usr/src/common/util/string.c b/usr/src/common/util/string.c
index d54b58d59c..80a076c436 100644
--- a/usr/src/common/util/string.c
+++ b/usr/src/common/util/string.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -264,7 +265,7 @@ next_fmt:
if (sign && pad == '0')
ADDCHAR('-');
- while (width-- > sign)
+ while ((!left_align) && (width-- > sign))
ADDCHAR(pad);
if (sign && pad == ' ')
ADDCHAR('-');
@@ -280,6 +281,10 @@ next_fmt:
ADDCHAR(*sp);
}
+ /* add left-alignment padding */
+ while (width-- > sign)
+ ADDCHAR(' ');
+
if (c == 'b' && ul != 0) {
int any = 0;
c = *bs++;
diff --git a/usr/src/common/util/string.h b/usr/src/common/util/string.h
index 052eeab4a4..f7acd734bd 100644
--- a/usr/src/common/util/string.h
+++ b/usr/src/common/util/string.h
@@ -61,6 +61,7 @@ extern char *strncpy(char *, const char *, size_t);
extern char *strrchr(const char *, int c);
extern char *strstr(const char *, const char *);
extern char *strpbrk(const char *, const char *);
+extern char *strsep(char **, const char *);
extern char *strncat(char *, const char *, size_t);
extern size_t strlcat(char *, const char *, size_t);
extern size_t strlcpy(char *, const char *, size_t);
diff --git a/usr/src/common/util/strtolctype.h b/usr/src/common/util/strtolctype.h
index 5675e42be7..535c014d1f 100644
--- a/usr/src/common/util/strtolctype.h
+++ b/usr/src/common/util/strtolctype.h
@@ -44,7 +44,7 @@ extern "C" {
* safe in probe context.
*/
-#if defined(_KERNEL) && !defined(_BOOT)
+#if defined(_KERNEL) || defined(_BOOT)
#define isalnum(ch) (isalpha(ch) || isdigit(ch))
#define isalpha(ch) (isupper(ch) || islower(ch))
@@ -56,7 +56,7 @@ extern "C" {
#define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
((ch) >= 'A' && (ch) <= 'F'))
-#endif /* _KERNEL && !_BOOT */
+#endif /* _KERNEL || _BOOT */
#define DIGIT(x) \
(isdigit(x) ? (x) - '0' : islower(x) ? (x) + 10 - 'a' : (x) + 10 - 'A')
diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c
index e145b1c866..54aaa027dd 100644
--- a/usr/src/common/zfs/zfs_prop.c
+++ b/usr/src/common/zfs/zfs_prop.c
@@ -444,6 +444,23 @@ zfs_prop_delegatable(zfs_prop_t prop)
return (pd->pd_attr != PROP_READONLY);
}
+boolean_t
+zfs_prop_cacheable(zfs_prop_t prop)
+{
+ /*
+ * It'd be nice if each prop had a flags field which could have flag
+ * like PROP_CACHEABLE, but since zprop_attr_t is an enum and this
+ * setting is orthogonal to the concepts of PROP_READONLY, etc., we have
+ * this function.
+ */
+ return (prop == ZFS_PROP_VERSION ||
+ prop == ZFS_PROP_NORMALIZE ||
+ prop == ZFS_PROP_UTF8ONLY ||
+ prop == ZFS_PROP_CASE ||
+ prop == ZFS_PROP_VOLSIZE ||
+ prop == ZFS_PROP_VOLBLOCKSIZE);
+}
+
/*
* Given a zfs dataset property name, returns the corresponding property ID.
*/
diff --git a/usr/src/common/zfs/zfs_prop.h b/usr/src/common/zfs/zfs_prop.h
index a63262311b..1796642c68 100644
--- a/usr/src/common/zfs/zfs_prop.h
+++ b/usr/src/common/zfs/zfs_prop.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _ZFS_PROP_H
@@ -86,6 +87,7 @@ typedef struct {
void zfs_prop_init(void);
zprop_type_t zfs_prop_get_type(zfs_prop_t);
boolean_t zfs_prop_delegatable(zfs_prop_t prop);
+boolean_t zfs_prop_cacheable(zfs_prop_t prop);
zprop_desc_t *zfs_prop_get_table(void);
/*
diff --git a/usr/src/grub/Makefile b/usr/src/grub/Makefile
index 28c2eca2ff..c31e9a802c 100644
--- a/usr/src/grub/Makefile
+++ b/usr/src/grub/Makefile
@@ -29,6 +29,7 @@ INST_TARGETS += $(ROOT_BOOT_GRUB)/$(GRUB_MENU)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(INSTALL_MENU)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(GRUB_DEFAULT)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(CAPABILITY)
+INST_TARGETS += $(ROOT_USR_SBIN)/grub
$(ROOT_BOOT_GRUB)/$(GRUB_DEFAULT) := FILEMODE = 444
$(ROOT_BOOT_GRUB)/$(CAPABILITY) := FILEMODE = 444
@@ -45,9 +46,14 @@ $(GRUB): FRC
$(ROOT_BOOT_GRUB)/%: $(ROOT_BOOT_GRUB) %
$(INS.file)
+$(ROOT_USR_SBIN)/%: $(GRUB)/grub/grub $(ROOT_USR_SBIN)
+ $(INS.file)
+
$(ROOT_BOOT_GRUB):
$(INS.dir)
+$(ROOT_USR_SBIN):
+ $(INS.dir)
clean clobber: $(SUBDIRS)
diff --git a/usr/src/grub/Makefile.grub b/usr/src/grub/Makefile.grub
index 18354324ae..99942fa2ed 100644
--- a/usr/src/grub/Makefile.grub
+++ b/usr/src/grub/Makefile.grub
@@ -10,3 +10,4 @@ PLATFORM = i86pc
ROOT_BOOT_GRUB = $(ROOT)/boot/grub
ROOT_PLAT_GRUB = $(ROOT)/platform/$(PLATFORM)/boot/grub
ROOT_SRC = $(ROOT)/usr/share/src/grub
+ROOT_USR_SBIN = $(ROOT)/usr/sbin
diff --git a/usr/src/grub/grub-0.97/stage2/boot.c b/usr/src/grub/grub-0.97/stage2/boot.c
index cfc2336a4c..027de7709b 100644
--- a/usr/src/grub/grub-0.97/stage2/boot.c
+++ b/usr/src/grub/grub-0.97/stage2/boot.c
@@ -25,6 +25,8 @@
#include "imgact_aout.h"
#include "i386-elf.h"
+#define SAFE_LOAD_BASE 0xc800000
+
static int cur_addr;
entry_func entry_addr;
static struct mod_list mll[99];
@@ -773,6 +775,17 @@ load_module (char *module, char *arg)
{
int len;
+ /*
+ * XXX Workaround for RICHMOND-16: on some systems, the region
+ * [c700000, c800000) is corrupted by an unknown external (off-CPU) actor(s)
+ * during boot. To be on the safe side, we will simply ensure that every
+ * module is loaded above this region. Note that this means this particular
+ * boot loader supports only systems with at least 200 MB of DRAM plus the
+ * amount of space used by any modules.
+ */
+ if (cur_addr < SAFE_LOAD_BASE)
+ cur_addr = SAFE_LOAD_BASE;
+
/* if we are supposed to load on 4K boundaries */
cur_addr = (cur_addr + 0xFFF) & 0xFFFFF000;
diff --git a/usr/src/grub/grub-0.97/stage2/cmdline.c b/usr/src/grub/grub-0.97/stage2/cmdline.c
index 46c5fda027..6d5591e1de 100644
--- a/usr/src/grub/grub-0.97/stage2/cmdline.c
+++ b/usr/src/grub/grub-0.97/stage2/cmdline.c
@@ -212,8 +212,27 @@ run_script (char *script, char *heap)
intervention. */
if (fallback_entryno < 0)
{
- grub_printf ("\nPress any key to continue...");
- (void) getkey ();
+ int time1, time2 = -1;
+
+ grub_printf (
+ "\nRebooting in 2 minutes (press any key to continue)...");
+ grub_timeout = 120;
+
+ /* using RT clock now, need to initialize value */
+ while ((time1 = getrtsecs()) == 0xFF);
+
+ while (grub_timeout >= 0) {
+ if ((time1 = getrtsecs()) != time2 && time1 != 0xFF) {
+ time2 = time1;
+ grub_timeout--;
+ }
+
+ if (checkkey() >= 0)
+ break;
+ }
+
+ grub_printf ("\nresetting...");
+ grub_reboot();
}
return 1;
diff --git a/usr/src/head/Makefile b/usr/src/head/Makefile
index d13946c1b9..a5cd55825b 100644
--- a/usr/src/head/Makefile
+++ b/usr/src/head/Makefile
@@ -154,6 +154,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \
regex.h \
regexp.h \
resolv.h \
+ resolv_joy.h \
rje.h \
rtld_db.h \
sac.h \
@@ -215,6 +216,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \
xlocale.h \
xti.h \
xti_inet.h \
+ zdoor.h \
zone.h
ISOHDRS = \
diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h
index d41dbb0520..0212224de5 100644
--- a/usr/src/head/libzonecfg.h
+++ b/usr/src/head/libzonecfg.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#ifndef _LIBZONECFG_H
@@ -42,6 +43,7 @@ extern "C" {
#include <netinet/in.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <sys/mac.h>
#include <stdio.h>
#include <rctl.h>
#include <zone.h>
@@ -127,6 +129,8 @@ extern "C" {
#define MAXAUTHS 4096
#define ZONE_MGMT_PROF "Zone Management"
+#define ZONE_INT32SZ 11 /* string to hold 32bit int. */
+
/* Owner, group, and mode (defined by packaging) for the config directory */
#define ZONE_CONFIG_UID 0 /* root */
#define ZONE_CONFIG_GID 3 /* sys */
@@ -150,9 +154,11 @@ extern "C" {
#define ALIAS_MAXSEMIDS "max-sem-ids"
#define ALIAS_MAXLOCKEDMEM "locked"
#define ALIAS_MAXSWAP "swap"
+#define ALIAS_MAXPHYSMEM "physical"
#define ALIAS_SHARES "cpu-shares"
#define ALIAS_CPUCAP "cpu-cap"
#define ALIAS_MAXPROCS "max-processes"
+#define ALIAS_ZFSPRI "zfs-io-priority"
/* Default name for zone detached manifest */
#define ZONE_DETACHED "SUNWdetached.xml"
@@ -191,15 +197,30 @@ struct zone_fstab {
char zone_fs_raw[MAXPATHLEN]; /* device to fsck */
};
+/*
+ * Generic resource attribute list.
+ * Key/value resource that can be attached to net or device.
+ */
+struct zone_res_attrtab {
+ char zone_res_attr_name[MAXNAMELEN];
+ char zone_res_attr_value[MAXNAMELEN];
+ struct zone_res_attrtab *zone_res_attr_next;
+};
+
struct zone_nwiftab {
char zone_nwif_address[INET6_ADDRSTRLEN]; /* shared-ip only */
char zone_nwif_allowed_address[INET6_ADDRSTRLEN]; /* excl-ip only */
char zone_nwif_physical[LIFNAMSIZ];
+ char zone_nwif_mac[MAXMACADDRLEN]; /* excl-ip only */
+ char zone_nwif_vlan_id[ZONE_INT32SZ]; /* excl-ip only */
+ char zone_nwif_gnic[LIFNAMSIZ]; /* excl-ip only */
char zone_nwif_defrouter[INET6_ADDRSTRLEN];
+ struct zone_res_attrtab *zone_nwif_attrp;
};
struct zone_devtab {
char zone_dev_match[MAXPATHLEN];
+ struct zone_res_attrtab *zone_dev_attrp;
};
struct zone_rctlvaltab {
@@ -230,10 +251,6 @@ struct zone_psettab {
char zone_importance[MAXNAMELEN];
};
-struct zone_mcaptab {
- char zone_physmem_cap[MAXNAMELEN];
-};
-
struct zone_pkgtab {
char zone_pkg_name[MAXNAMELEN];
char zone_pkg_version[ZONE_PKG_VERSMAX];
@@ -317,6 +334,8 @@ extern int zonecfg_set_bootargs(zone_dochandle_t, char *);
extern int zonecfg_get_sched_class(zone_dochandle_t, char *, size_t);
extern int zonecfg_set_sched(zone_dochandle_t, char *);
extern int zonecfg_get_dflt_sched_class(zone_dochandle_t, char *, int);
+extern zoneid_t zonecfg_get_did(zone_dochandle_t);
+extern void zonecfg_set_did(zone_dochandle_t);
/*
* Set/retrieve the brand for the zone
@@ -341,6 +360,15 @@ extern int zonecfg_find_mounts(char *, int(*)(const struct mnttab *,
void *), void *);
/*
+ * Resource key/value attributes (properties).
+ */
+extern int zonecfg_add_res_attr(struct zone_res_attrtab **,
+ struct zone_res_attrtab *);
+extern void zonecfg_free_res_attr_list(struct zone_res_attrtab *);
+extern int zonecfg_remove_res_attr(struct zone_res_attrtab **,
+ struct zone_res_attrtab *);
+
+/*
* Network interface configuration.
*/
extern int zonecfg_add_nwif(zone_dochandle_t, struct zone_nwiftab *);
@@ -422,13 +450,6 @@ extern int zonecfg_modify_pset(zone_dochandle_t, struct zone_psettab *);
extern int zonecfg_lookup_pset(zone_dochandle_t, struct zone_psettab *);
/*
- * mem-cap configuration.
- */
-extern int zonecfg_delete_mcap(zone_dochandle_t);
-extern int zonecfg_modify_mcap(zone_dochandle_t, struct zone_mcaptab *);
-extern int zonecfg_lookup_mcap(zone_dochandle_t, struct zone_mcaptab *);
-
-/*
* Temporary pool support functions.
*/
extern int zonecfg_destroy_tmp_pool(char *, char *, int);
@@ -485,7 +506,6 @@ extern int zonecfg_setdsent(zone_dochandle_t);
extern int zonecfg_getdsent(zone_dochandle_t, struct zone_dstab *);
extern int zonecfg_enddsent(zone_dochandle_t);
extern int zonecfg_getpsetent(zone_dochandle_t, struct zone_psettab *);
-extern int zonecfg_getmcapent(zone_dochandle_t, struct zone_mcaptab *);
extern int zonecfg_getpkgdata(zone_dochandle_t, uu_avl_pool_t *,
uu_avl_t *);
extern int zonecfg_setdevperment(zone_dochandle_t);
@@ -509,6 +529,7 @@ extern int zonecfg_set_limitpriv(zone_dochandle_t, char *);
* Higher-level routines.
*/
extern int zone_get_brand(char *, char *, size_t);
+extern zoneid_t zone_get_did(char *);
extern int zone_get_rootpath(char *, char *, size_t);
extern int zone_get_devroot(char *, char *, size_t);
extern int zone_get_zonepath(char *, char *, size_t);
@@ -517,7 +538,9 @@ extern int zone_set_state(char *, zone_state_t);
extern char *zone_state_str(zone_state_t);
extern int zonecfg_get_name_by_uuid(const uuid_t, char *, size_t);
extern int zonecfg_get_uuid(const char *, uuid_t);
+extern int zonecfg_set_uuid(const char *, const char *, const char *);
extern int zonecfg_default_brand(char *, size_t);
+extern int zonecfg_fix_obsolete(zone_dochandle_t);
/*
* Iterator for configured zones.
diff --git a/usr/src/head/netdb.h b/usr/src/head/netdb.h
index b26c26d40b..8d8a6a8b80 100644
--- a/usr/src/head/netdb.h
+++ b/usr/src/head/netdb.h
@@ -123,7 +123,10 @@ struct addrinfo {
struct addrinfo *ai_next; /* next structure in linked list */
};
-
+/*
+ * The flag 0x8000 is currently reserved for private use between libnsl and
+ * libsocket. See lib/libsocket/inet/getaddrinfo.c for more information.
+ */
/* addrinfo flags */
#define AI_PASSIVE 0x0008 /* intended for bind() + listen() */
#define AI_CANONNAME 0x0010 /* return canonical version of host */
diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h
index 5336c81677..e98cd25e16 100644
--- a/usr/src/head/regexp.h
+++ b/usr/src/head/regexp.h
@@ -394,12 +394,12 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBRA:
- braslist[*ep++] = (char *)lp;
+ braslist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
case CKET:
- braelist[*ep++] = (char *)lp;
+ braelist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
@@ -477,8 +477,8 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBACK:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
if (ecmp(bbeg, lp, ct)) {
lp += ct;
@@ -488,8 +488,8 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBACK | STAR:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
curlp = lp;
while (ecmp(bbeg, lp, ct))
lp += ct;
diff --git a/usr/src/head/resolv_joy.h b/usr/src/head/resolv_joy.h
new file mode 100644
index 0000000000..262cf6121d
--- /dev/null
+++ b/usr/src/head/resolv_joy.h
@@ -0,0 +1,472 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley
+ * 4.3 BSD under license from the regents of the University of
+ * California.
+ */
+
+/*
+ * BIND 4.9.4:
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ * --Copyright--
+ *
+ * End BIND 4.9.4
+ */
+
+/*
+ * Copyright (c) 1983, 1987, 1989
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * @(#)resolv.h 8.1 (Berkeley) 6/2/93
+ * $Id: resolv.h,v 8.52 2003/04/29 02:27:03 marka Exp $
+ */
+
+#ifndef _RESOLV_JOY_H
+#define _RESOLV_JOY_H
+
+#ifdef _RESOLV_H_
+#error "resolv.h and resolv_joy.h should not be used together"
+#endif
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <arpa/nameser.h>
+#include <sys/socket.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Revision information. This is the release date in YYYYMMDD format.
+ * It can change every day so the right thing to do with it is use it
+ * in preprocessor commands such as "#if (__RES > 19931104)". Do not
+ * compare for equality; rather, use it to determine whether your resolver
+ * is new enough to contain a certain feature.
+ */
+
+#define __RES 20090302
+
+#pragma redefine_extname dn_expand joy_dn_expand
+#pragma redefine_extname res_nsearch joy_res_nsearch
+#pragma redefine_extname res_ninit joy_res_ninit
+#pragma redefine_extname res_ndestroy joy_res_ndestroy
+#pragma redefine_extname res_gethostbyaddr joy_res_gethostbyaddr
+#pragma redefine_extname res_gethostbyname joy_res_gethostbyname
+#pragma redefine_extname res_gethostbyname2 joy_res_gethostbyname2
+#pragma redefine_extname res_endhostent joy_res_endhostent
+#pragma redefine_extname res_sethostent joy_res_sethostent
+#pragma redefine_extname __res_override_retry __joy_res_override_retry
+#pragma redefine_extname __res_unset_no_hosts_fallback \
+ __joy_res_unset_no_hosts_fallback
+#pragma redefine_extname __res_set_no_hosts_fallback \
+ __joy_res_set_no_hosts_fallback
+#pragma redefine_extname __h_errno __joy_h_errno
+#pragma redefine_extname __ns_get16 __joy_ns_get16
+#pragma redefine_extname __ns_get32 __joy_ns_get32
+
+
+#define RES_SET_H_ERRNO(r, x) __h_errno_set(r, x)
+struct __res_state; /* forward */
+
+void __h_errno_set(struct __res_state *res, int err);
+
+/*
+ * Resolver configuration file.
+ * Normally not present, but may contain the address of the
+ * initial name server(s) to query and the domain search list.
+ */
+
+#ifndef _PATH_RESCONF
+#define _PATH_RESCONF "/etc/resolv.conf"
+#endif
+
+#ifdef __STDC__
+#ifndef __P
+#define __P(x) x
+#endif
+#else
+#ifndef __P
+#define __P(x) ()
+#endif
+#endif /* __STDC__ */
+
+typedef enum { res_goahead, res_nextns, res_modified, res_done, res_error }
+ res_sendhookact;
+
+typedef res_sendhookact (*res_send_qhook)__P((struct sockaddr * const *ns,
+ const uchar_t **query,
+ int *querylen,
+ uchar_t *ans,
+ int anssiz,
+ int *resplen));
+
+typedef res_sendhookact (*res_send_rhook)__P((const struct sockaddr *ns,
+ const uchar_t *query,
+ int querylen,
+ uchar_t *ans,
+ int anssiz,
+ int *resplen));
+
+struct res_sym {
+ int number; /* Identifying number, like T_MX */
+ const char *name; /* Its symbolic name, like "MX" */
+ const char *humanname; /* Its fun name, like "mail exchanger" */
+};
+
+/*
+ * Global defines and variables for resolver stub.
+ */
+/* ADDRSORT and MAXADDR retained for compatibility; not used */
+#define ADDRSORT 1 /* enable the address-sorting option */
+#define MAXADDR 10 /* max # addresses to sort by */
+
+#define MAXNS 32 /* max # name servers we'll track */
+#define MAXDFLSRCH 3 /* # default domain levels to try */
+#define MAXDNSRCH 6 /* max # domains in search path */
+#define LOCALDOMAINPARTS 2 /* min levels in name that is "local" */
+
+#define RES_TIMEOUT 5 /* min. seconds between retries */
+#define MAXRESOLVSORT 10 /* number of net to sort on */
+#define RES_MAXNDOTS 15 /* should reflect bit field size */
+#define RES_MAXRETRANS 30 /* only for resolv.conf/RES_OPTIONS */
+#define RES_MAXRETRY 5 /* only for resolv.conf/RES_OPTIONS */
+#define RES_DFLRETRY 2 /* Default #/tries. */
+#define RES_MAXTIME 65535 /* Infinity, in milliseconds. */
+
+struct __res_state_ext;
+
+struct __res_state {
+ int retrans; /* retransmission time interval */
+ int retry; /* number of times to retransmit */
+#ifdef __sun
+ uint_t options; /* option flags - see below. */
+#else
+ ulong_t options; /* option flags - see below. */
+#endif
+ int nscount; /* number of name servers */
+ struct sockaddr_in
+ nsaddr_list[MAXNS]; /* address of name server */
+#define nsaddr nsaddr_list[0] /* for backward compatibility */
+ ushort_t id; /* current packet id */
+ char *dnsrch[MAXDNSRCH+1]; /* components of domain to search */
+ char defdname[256]; /* default domain (deprecated) */
+#ifdef __sun
+ uint_t pfcode; /* RES_PRF_ flags - see below. */
+#else
+ ulong_t pfcode; /* RES_PRF_ flags - see below. */
+#endif
+ unsigned ndots:4; /* threshold for initial abs. query */
+ unsigned nsort:4; /* number of elements in sort_list[] */
+ char unused[3];
+ struct {
+ struct in_addr addr;
+ unsigned int mask;
+ } sort_list[MAXRESOLVSORT];
+ res_send_qhook qhook; /* query hook */
+ res_send_rhook rhook; /* response hook */
+ int res_h_errno; /* last one set for this context */
+ int _vcsock; /* PRIVATE: for res_send VC i/o */
+ uint_t _flags; /* PRIVATE: see below */
+ uint_t _pad; /* make _u 64 bit aligned */
+ union {
+ /* On an 32-bit arch this means 512b total. */
+ char pad[72 - 4*sizeof (int) - 2*sizeof (void *)];
+ struct {
+ uint16_t nscount;
+ uint16_t nstimes[MAXNS]; /* ms. */
+ int nssocks[MAXNS];
+ struct __res_state_ext *ext; /* extention for IPv6 */
+ uchar_t _rnd[16]; /* PRIVATE: random state */
+ } _ext;
+ } _u;
+};
+
+typedef struct __res_state *res_state;
+
+union res_sockaddr_union {
+ struct sockaddr_in sin;
+#ifdef IN6ADDR_ANY_INIT
+ struct sockaddr_in6 sin6;
+#endif
+#ifdef ISC_ALIGN64
+ int64_t __align64; /* 64bit alignment */
+#else
+ int32_t __align32; /* 32bit alignment */
+#endif
+ char __space[128]; /* max size */
+};
+
+/*
+ * Resolver flags (used to be discrete per-module statics ints).
+ */
+#define RES_F_VC 0x00000001 /* socket is TCP */
+#define RES_F_CONN 0x00000002 /* socket is connected */
+#define RES_F_EDNS0ERR 0x00000004 /* EDNS0 caused errors */
+#define RES_F__UNUSED 0x00000008 /* (unused) */
+#define RES_F_LASTMASK 0x000000F0 /* ordinal server of last res_nsend */
+#define RES_F_LASTSHIFT 4 /* bit position of LASTMASK "flag" */
+#define RES_GETLAST(res) (((res)._flags & RES_F_LASTMASK) >> RES_F_LASTSHIFT)
+
+/* res_findzonecut2() options */
+#define RES_EXHAUSTIVE 0x00000001 /* always do all queries */
+#define RES_IPV4ONLY 0x00000002 /* IPv4 only */
+#define RES_IPV6ONLY 0x00000004 /* IPv6 only */
+
+/*
+ * Resolver options (keep these in synch with res_debug.c, please)
+ */
+#define RES_INIT 0x00000001 /* address initialized */
+#define RES_DEBUG 0x00000002 /* print debug messages */
+#define RES_AAONLY 0x00000004 /* authoritative answers only (!IMPL) */
+#define RES_USEVC 0x00000008 /* use virtual circuit */
+#define RES_PRIMARY 0x00000010 /* query primary server only (!IMPL) */
+#define RES_IGNTC 0x00000020 /* ignore trucation errors */
+#define RES_RECURSE 0x00000040 /* recursion desired */
+#define RES_DEFNAMES 0x00000080 /* use default domain name */
+#define RES_STAYOPEN 0x00000100 /* Keep TCP socket open */
+#define RES_DNSRCH 0x00000200 /* search up local domain tree */
+#define RES_INSECURE1 0x00000400 /* type 1 security disabled */
+#define RES_INSECURE2 0x00000800 /* type 2 security disabled */
+#define RES_NOALIASES 0x00001000 /* shuts off HOSTALIASES feature */
+#define RES_USE_INET6 0x00002000 /* use/map IPv6 in gethostbyname() */
+#define RES_ROTATE 0x00004000 /* rotate ns list after each query */
+#define RES_NOCHECKNAME 0x00008000 /* do not check names for sanity. */
+#define RES_KEEPTSIG 0x00010000 /* do not strip TSIG records */
+#define RES_BLAST 0x00020000 /* blast all recursive servers */
+#define RES_NO_NIBBLE 0x00040000 /* disable IPv6 nibble mode reverse */
+#define RES_NO_BITSTRING 0x00080000 /* disable IPv6 bitstring mode revrse */
+#define RES_NOTLDQUERY 0x00100000 /* don't unqualified name as a tld */
+#define RES_USE_DNSSEC 0x00200000 /* use DNSSEC using OK bit in OPT */
+/* KAME extensions: use higher bit to avoid conflict with ISC use */
+#define RES_USE_DNAME 0x10000000 /* use DNAME */
+#define RES_USE_EDNS0 0x40000000 /* use EDNS0 if configured */
+#define RES_NO_NIBBLE2 0x80000000 /* disable alternate nibble lookup */
+
+#define RES_DEFAULT (RES_RECURSE | RES_DEFNAMES | RES_DNSRCH)
+
+/*
+ * Resolver "pfcode" values. Used by dig.
+ */
+#define RES_PRF_STATS 0x00000001
+#define RES_PRF_UPDATE 0x00000002
+#define RES_PRF_CLASS 0x00000004
+#define RES_PRF_CMD 0x00000008
+#define RES_PRF_QUES 0x00000010
+#define RES_PRF_ANS 0x00000020
+#define RES_PRF_AUTH 0x00000040
+#define RES_PRF_ADD 0x00000080
+#define RES_PRF_HEAD1 0x00000100
+#define RES_PRF_HEAD2 0x00000200
+#define RES_PRF_TTLID 0x00000400
+#define RES_PRF_HEADX 0x00000800
+#define RES_PRF_QUERY 0x00001000
+#define RES_PRF_REPLY 0x00002000
+#define RES_PRF_INIT 0x00004000
+#define RES_PRF_TRUNC 0x00008000
+/* 0x00010000 */
+
+/* Things involving an internal (static) resolver context. */
+#ifdef _REENTRANT
+extern struct __res_state *__res_state(void);
+#define _res (*__res_state())
+#else
+#ifndef __BIND_NOSTATIC
+extern struct __res_state _res;
+#endif
+#endif
+
+#ifndef __BIND_NOSTATIC
+void fp_nquery __P((const uchar_t *, int, FILE *));
+void fp_query __P((const uchar_t *, FILE *));
+const char *hostalias __P((const char *));
+void p_query __P((const uchar_t *));
+void res_close __P((void));
+int res_init __P((void));
+int res_isourserver __P((const struct sockaddr_in *));
+int res_mkquery __P((int, const char *, int, int, const uchar_t *,
+ int, const uchar_t *, uchar_t *, int));
+int res_query __P((const char *, int, int, uchar_t *, int));
+int res_querydomain __P((const char *, const char *, int, int,
+ uchar_t *, int));
+int res_search __P((const char *, int, int, uchar_t *, int));
+int res_send __P((const uchar_t *, int, uchar_t *, int));
+int res_sendsigned __P((const uchar_t *, int, ns_tsig_key *,
+ uchar_t *, int));
+#endif /* __BIND_NOSTATIC */
+
+extern const struct res_sym __p_key_syms[];
+extern const struct res_sym __p_cert_syms[];
+extern const struct res_sym __p_class_syms[];
+extern const struct res_sym __p_type_syms[];
+extern const struct res_sym __p_rcode_syms[];
+
+int res_hnok __P((const char *));
+int res_ownok __P((const char *));
+int res_mailok __P((const char *));
+int res_dnok __P((const char *));
+int sym_ston __P((const struct res_sym *, const char *, int *));
+const char *sym_ntos __P((const struct res_sym *, int, int *));
+const char *sym_ntop __P((const struct res_sym *, int, int *));
+int b64_ntop __P((uchar_t const *, size_t, char *, size_t));
+int b64_pton __P((char const *, uchar_t *, size_t));
+int loc_aton __P((const char *ascii, uchar_t *binary));
+const char *loc_ntoa __P((const uchar_t *binary, char *ascii));
+int dn_skipname __P((const uchar_t *, const uchar_t *));
+void putlong __P((unsigned int, uchar_t *));
+void putshort __P((unsigned short, uchar_t *));
+const char *p_class __P((int));
+const char *p_time __P((unsigned int));
+const char *p_type __P((int));
+const char *p_rcode __P((int));
+const char *p_sockun __P((union res_sockaddr_union, char *, size_t));
+const uchar_t *p_cdnname __P((const uchar_t *, const uchar_t *, int,
+ FILE *));
+const uchar_t *p_cdname __P((const uchar_t *, const uchar_t *, FILE *));
+const uchar_t *p_fqnname __P((const uchar_t *cp, const uchar_t *msg,
+ int, char *, int));
+const uchar_t *p_fqname __P((const uchar_t *, const uchar_t *, FILE *));
+const char *p_option __P((uint_t option));
+char *p_secstodate __P((uint_t));
+int dn_count_labels __P((const char *));
+int dn_comp __P((const char *, uchar_t *, int,
+ uchar_t **, uchar_t **));
+int dn_expand __P((const uchar_t *, const uchar_t *,
+ const uchar_t *, char *, int));
+void res_rndinit __P((res_state));
+uint_t res_randomid __P((void));
+uint_t res_nrandomid __P((res_state));
+int res_nameinquery __P((const char *, int, int,
+ const uchar_t *, const uchar_t *));
+int res_queriesmatch __P((const uchar_t *, const uchar_t *,
+ const uchar_t *, const uchar_t *));
+const char *p_section __P((int section, int opcode));
+
+
+/* Things involving a resolver context. */
+int res_ninit __P((res_state));
+int res_nisourserver __P((const res_state,
+ const struct sockaddr_in *));
+void fp_resstat __P((const res_state, FILE *));
+void res_pquery __P((const res_state, const uchar_t *, int, FILE *));
+const char *res_hostalias __P((const res_state, const char *,
+ char *, size_t));
+int res_nquery __P((res_state,
+ const char *, int, int, uchar_t *, int));
+int res_nsearch __P((res_state, const char *, int,
+ int, uchar_t *, int));
+int res_nquerydomain __P((res_state,
+ const char *, const char *, int, int,
+ uchar_t *, int));
+int res_nmkquery __P((res_state,
+ int, const char *, int, int, const uchar_t *,
+ int, const uchar_t *, uchar_t *, int));
+int res_nsend __P((res_state, const uchar_t *, int, uchar_t *,
+ int));
+int res_nsendsigned __P((res_state, const uchar_t *, int,
+ ns_tsig_key *, uchar_t *, int));
+int res_findzonecut __P((res_state, const char *, ns_class, int,
+ char *, size_t, struct in_addr *, int));
+int res_findzonecut2 __P((res_state, const char *, ns_class, int,
+ char *, size_t, union res_sockaddr_union *,
+ int));
+void res_nclose __P((res_state));
+int res_nopt __P((res_state, int, uchar_t *, int, int));
+int res_nopt_rdata __P((res_state, int, uchar_t *, int, uchar_t *,
+ ushort_t, ushort_t, uchar_t *));
+void res_send_setqhook __P((res_send_qhook hook));
+void res_send_setrhook __P((res_send_rhook hook));
+int __res_vinit __P((res_state, int));
+void res_destroyservicelist __P((void));
+const char *res_servicename __P((uint16_t port, const char *proto));
+const char *res_protocolname __P((int num));
+void res_destroyprotolist __P((void));
+void res_buildprotolist __P((void));
+const char *res_get_nibblesuffix __P((res_state));
+const char *res_get_nibblesuffix2 __P((res_state));
+void res_ndestroy __P((res_state));
+uint16_t res_nametoclass __P((const char *buf, int *success));
+uint16_t res_nametotype __P((const char *buf, int *success));
+void res_setservers __P((res_state,
+ const union res_sockaddr_union *, int));
+int res_getservers __P((res_state,
+ union res_sockaddr_union *, int));
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_RESOLV_JOY_H */
diff --git a/usr/src/head/zdoor.h b/usr/src/head/zdoor.h
new file mode 100644
index 0000000000..f2d204042d
--- /dev/null
+++ b/usr/src/head/zdoor.h
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZDOOR_H
+#define _ZDOOR_H
+
+#include <sys/types.h>
+#include <zone.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct zdoor_handle *zdoor_handle_t;
+
+typedef struct zdoor_cookie {
+ char *zdc_zonename;
+ char *zdc_service;
+ void *zdc_biscuit;
+} zdoor_cookie_t;
+
+typedef struct zdoor_result {
+ char *zdr_data;
+ size_t zdr_size;
+} zdoor_result_t;
+
+typedef zdoor_result_t *(*zdoor_callback) (zdoor_cookie_t *cookie,
+ char *argp, size_t arpg_sz);
+
+#define ZDOOR_OK 0
+#define ZDOOR_ERROR -1
+#define ZDOOR_NOT_GLOBAL_ZONE -2
+#define ZDOOR_ZONE_NOT_RUNNING -3
+#define ZDOOR_ZONE_FORBIDDEN -4
+#define ZDOOR_ARGS_ERROR -5
+#define ZDOOR_OUT_OF_MEMORY -6
+
+extern zdoor_handle_t zdoor_handle_init();
+
+extern int zdoor_open(zdoor_handle_t handle, const char *zonename,
+ const char *service, void *biscuit, zdoor_callback callback);
+
+extern void * zdoor_close(zdoor_handle_t handle, const char *zonename,
+ const char *service);
+
+extern void zdoor_handle_destroy(zdoor_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZDOOR_H */
diff --git a/usr/src/head/zone.h b/usr/src/head/zone.h
index 34528a27f5..14af0f8ff3 100644
--- a/usr/src/head/zone.h
+++ b/usr/src/head/zone.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent Inc. All rights reserved.
*/
#ifndef _ZONE_H
@@ -56,7 +57,7 @@ extern int zone_get_id(const char *, zoneid_t *);
/* System call API */
extern zoneid_t zone_create(const char *, const char *,
const struct priv_set *, const char *, size_t, const char *, size_t, int *,
- int, int, const bslabel_t *, int);
+ int, int, const bslabel_t *, int, zoneid_t);
extern int zone_boot(zoneid_t);
extern int zone_destroy(zoneid_t);
extern ssize_t zone_getattr(zoneid_t, int, void *, size_t);
@@ -69,6 +70,7 @@ extern int zone_add_datalink(zoneid_t, datalink_id_t);
extern int zone_remove_datalink(zoneid_t, datalink_id_t);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
extern int zone_list_datalink(zoneid_t, int *, datalink_id_t *);
+extern const char *zone_get_nroot(void);
#ifdef __cplusplus
}
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 6d8588d839..372fc4327c 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -68,6 +68,7 @@ SUBDIRS += \
libcommputil \
libresolv \
libresolv2 .WAIT \
+ libresolv2_joy .WAIT \
libw .WAIT \
libintl .WAIT \
../cmd/sgs/librtld_db \
@@ -80,6 +81,8 @@ SUBDIRS += \
librt \
libadm \
libctf \
+ libbrand .WAIT \
+ libzonecfg .WAIT \
libdtrace \
libdtrace_jni \
libcurses \
@@ -158,8 +161,8 @@ SUBDIRS += \
libpcidb \
libm1 \
libm \
- libmvec
-
+ libmvec \
+ libvnd
SUBDIRS += \
passwdutil \
@@ -182,7 +185,6 @@ SUBDIRS += \
libxnet \
libipsecutil \
nsswitch \
- print \
libuutil \
libscf \
libinetsvc \
@@ -201,10 +203,10 @@ SUBDIRS += \
libwrap \
libxcurses \
libxcurses2 \
- libbrand .WAIT \
- libzonecfg \
+ libzdoor \
libzoneinfo \
libzonestat \
+ libsmartsshd \
libtsnet \
libtsol \
gss_mechs/mech_spnego \
@@ -219,7 +221,6 @@ SUBDIRS += \
raidcfg_plugins \
cfgadm_plugins \
libmail \
- lvm \
libsmedia \
libipp \
libdiskmgt \
@@ -236,10 +237,7 @@ SUBDIRS += \
libzfs_core \
libzfs \
libbe \
- pylibbe \
libzfs_jni \
- pyzfs \
- pysolaris \
libmapid \
brand \
policykit \
@@ -298,9 +296,6 @@ fm: \
#
NOWAIT_SUBDIRS= $(SUBDIRS:.WAIT=)
-DCSUBDIRS = \
- lvm
-
MSGSUBDIRS= \
abi \
auditd_plugins \
@@ -355,12 +350,11 @@ MSGSUBDIRS= \
libwanbootutil \
libzfs \
libzonecfg \
- lvm \
+ libzdoor \
+ libsmartsshd \
madv \
mpss \
pam_modules \
- pyzfs \
- pysolaris \
rpcsec_gss \
libreparse
MSGSUBDIRS += \
@@ -468,6 +462,7 @@ HDRSUBDIRS= \
libumem \
libunistat \
libuutil \
+ libvnd \
libwanboot \
libwanbootutil \
libwrap \
@@ -479,7 +474,6 @@ HDRSUBDIRS= \
libzonestat \
hal \
policykit \
- lvm \
pkcs11 \
passwdutil \
../cmd/sendmail/libmilter \
@@ -563,7 +557,7 @@ gss_mechs/mech_krb5: libgss libnsl libsocket libresolv pkcs11
libadt_jni: libbsm
libast: libsocket libm
libadutils: libldap5 libresolv libsocket libnsl
-nsswitch: libadutils libidmap
+nsswitch: libadutils libidmap libresolv2_joy
libbe: libzfs
libbsm: libtsol
libcmd: libsum libast libsocket libnsl
@@ -618,6 +612,8 @@ libpool: libnvpair libexacct
libpp: libast
libzonecfg: libc libsocket libnsl libuuid libnvpair libsysevent libsec \
libbrand libpool libscf
+libzdoor: libc libzonecfg libcontract
+libsmartsshd: libc libcontract
libproc: ../cmd/sgs/librtld_db ../cmd/sgs/libelf libctf libsaveargs
libproject: libpool libproc libsecdb
libtermcap: libcurses
@@ -633,8 +629,6 @@ librestart: libuutil libscf
libsaveargs: libdisasm
../cmd/sgs/libdl: ../cmd/sgs/libconv
../cmd/sgs/libelf: ../cmd/sgs/libconv
-pkcs11: libcryptoutil
-print: libldap5
udapl/udapl_tavor: udapl/libdat
libzfs: libdevid libgen libnvpair libuutil \
libadm libavl libefi libidmap libmd libzfs_core libm
@@ -659,9 +653,6 @@ sun_fc: libdevinfo libsysevent libnvpair
libsun_ima: libdevinfo libsysevent libnsl
sun_sas: libdevinfo libsysevent libnvpair libkstat libdevid
libgrubmgmt: libdevinfo libzfs libfstyp
-pylibbe: libbe libzfs
-pyzfs: libnvpair libzfs
-pysolaris: libsec libidmap
libreparse: libnvpair
libhotplug: libnvpair
cfgadm_plugins: libhotplug
diff --git a/usr/src/lib/Makefile.astmsg b/usr/src/lib/Makefile.astmsg
index 096805e825..ff8202d03a 100644
--- a/usr/src/lib/Makefile.astmsg
+++ b/usr/src/lib/Makefile.astmsg
@@ -47,8 +47,8 @@ MSGLIBNAME= $(LIBRARY:.a=)
ASTMSGCATALOG= $(ROOT)/usr/lib/locale/C/LC_MESSAGES/$(MSGLIBNAME)
$(DO_BUILD_AST_CATALOGS)ASTMSGCC= \
- PATH="/usr/ast/bin/:/bin:/usr/bin" \
- /usr/bin/ksh93 /usr/ast/bin/msgcc >>msgcc.out 2>&1
+ PATH="$(ASTBINDIR):/bin:/usr/bin" \
+ /usr/bin/ksh93 $(MSGCC) >>msgcc.out 2>&1
ASTMSGS= $(OBJECTS:%.o=msgs/%.mso)
diff --git a/usr/src/lib/Makefile.lib b/usr/src/lib/Makefile.lib
index 78ef2cd4b5..eeb89960f4 100644
--- a/usr/src/lib/Makefile.lib
+++ b/usr/src/lib/Makefile.lib
@@ -251,3 +251,15 @@ TARGETMACH= $(MACH)
# shouldn't override this - they should override $(CLOBBERFILES) instead.
#
CLOBBERTARGFILES= $(LIBS) $(DYNLIB) $(CLOBBERFILES)
+
+#
+# Define the default ctfdiff invocation used to check a list of types
+# supplied by a user of a library. The goal is to validate that a given
+# series of types is the same in both a 32-bit and 64-bit artifact. This
+# is only defined if we have a 64-bit build to do.
+#
+TYPECHECK_LIB32 = $(TYPECHECK_LIB:%=$(MACH)/%)
+TYPECHECK_LIB64 = $(TYPECHECK_LIB:%=$(MACH64)/%)
+TYPECHECK_LIST= $(TYPELIST:%=-T %)
+$(BUILD64)TYPECHECK.lib = $(CTFDIFF) -I $(TYPECHECK_LIST) $(TYPECHECK_LIB32) $(TYPECHECK_LIB64)
+TYPECHECK = $(TYPECHECK_LIB:%=%.typecheck)
diff --git a/usr/src/lib/Makefile.targ b/usr/src/lib/Makefile.targ
index 4769c64d54..61cb45969d 100644
--- a/usr/src/lib/Makefile.targ
+++ b/usr/src/lib/Makefile.targ
@@ -104,6 +104,9 @@ $(LINTLIB): $$(SRCS)
lintcheck: $$(SRCS)
$(LINT.c) $(LINTCHECKFLAGS) $(SRCS) $(LDLIBS)
+$(TYPECHECK): $(TYPECHECK_LIB32) $(TYPECHECK_LIB64)
+ $(TYPECHECK.lib)
+
clobber: clean
-$(RM) $(CLOBBERTARGFILES)
diff --git a/usr/src/lib/brand/Makefile b/usr/src/lib/brand/Makefile
index 05bfc54764..bd766f3f9d 100644
--- a/usr/src/lib/brand/Makefile
+++ b/usr/src/lib/brand/Makefile
@@ -30,7 +30,10 @@ include ../../Makefile.master
# Build everything in parallel; use .WAIT for dependencies
.PARALLEL:
-SUBDIRS= shared .WAIT sn1 solaris10 ipkg labeled $($(MACH)_SUBDIRS)
+i386_SUBDIRS= lx
+i386_MSGSUBDIRS= lx
+
+SUBDIRS= shared .WAIT sn1 sngl solaris10 ipkg labeled $($(MACH)_SUBDIRS)
MSGSUBDIRS= solaris10 shared $($(MACH)_MSGSUBDIRS)
all := TARGET= all
diff --git a/usr/src/lib/brand/lx/Makefile b/usr/src/lib/brand/lx/Makefile
new file mode 100644
index 0000000000..6643377ef6
--- /dev/null
+++ b/usr/src/lib/brand/lx/Makefile
@@ -0,0 +1,55 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+default: all
+
+include Makefile.lx
+
+# Build everything in parallel; use .WAIT for dependencies
+.PARALLEL:
+
+SUBDIRS= cmd librtld_db lx_support lx_brand lx_thunk netfiles zone \
+ lx_vdso testing .WAIT lx_nametoaddr
+MSGSUBDIRS= lx_brand lx_support zone
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint: $(SUBDIRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/Makefile.lx b/usr/src/lib/brand/lx/Makefile.lx
new file mode 100644
index 0000000000..4db4679cef
--- /dev/null
+++ b/usr/src/lib/brand/lx/Makefile.lx
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# lib/brand/lx/Makefile.lx
+#
+# include global definitions
+
+BRAND= lx
+
+include $(SRC)/lib/brand/Makefile.brand
+
diff --git a/usr/src/lib/brand/lx/cmd/Makefile b/usr/src/lib/brand/lx/cmd/Makefile
new file mode 100644
index 0000000000..07ca9ff81b
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+PROGS = lx_lockd lx_native lx_isaexec_wrapper lx_statd lx_thunk ifconfig
+
+include ../Makefile.lx
+
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(ROOTPROGS)
+
+.KEEP_STATE:
+
+lint:
+
+all: $(PROGS)
+
+install: all $(ROOTPROGS)
+
+clean:
+ $(RM) $(PROGS)
+
+clobber: clean
+ $(RM) $(ROOTPROGS)
diff --git a/usr/src/lib/brand/lx/cmd/ifconfig.sh b/usr/src/lib/brand/lx/cmd/ifconfig.sh
new file mode 100755
index 0000000000..d8e83fadb6
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/ifconfig.sh
@@ -0,0 +1,172 @@
+#!/bin/sh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+
+#
+# This script uses the native ifconfig, without the thunk library linked in,
+# to display NIC data in the Linux style. The native ifconfig with the thunk
+# library cannot be used by non-root users since the thunk library does a
+# chroot.
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+short=0
+ifname=""
+
+# This is running in Linux, there is likely no getopts command
+if [ $# -gt 0 ]; then
+ for i in "$@"
+ do
+ case "$i" in
+ "-a") # ignore - interfaces are brought up by the zone
+ ;;
+ "-s")
+ short=1
+ ;;
+ "-v") # ignore
+ ;;
+ -*) echo "Error: unsupported option $i"
+ exit 1
+ ;;
+ *)
+ if [ -n "$ifname" ]; then
+ echo "Error: interface configuration is not supported in a zone"
+ exit 1
+ fi
+ ifname=$i
+ ;;
+ esac
+ done
+fi
+
+LD_LIBRARY_PATH_32="/native/lib:/native/usr/lib"
+LD_BIND_NOW=1
+export LD_LIBRARY_PATH_32 LD_BIND_NOW
+
+# Start by getting the kstats we need
+/native/usr/lib/brand/lx/lx_native /native/lib/ld.so.1 \
+ /native/usr/bin/kstat -m link -c net | awk '{
+ if ($1 == "module:" && length(ifname) > 0)
+ printf("%s %s %s %s %s %s %s\n", ifname, ipckts, ierrs, rbytes,
+ opckts, oerrs, obytes)
+
+ if ($1 == "name:") ifname=$2
+ if ($1 == "ipackets") ipckts=$2
+ if ($1 == "ierrors") ierrs=$2
+ if ($1 == "rbytes") rbytes=$2
+ if ($1 == "opackets") opckts=$2
+ if ($1 == "oerrors") oerrs=$2
+ if ($1 == "obytes") obytes=$2
+} END {
+ if (length(ifname) > 0)
+ printf("%s %s %s %s %s %s %s\n", ifname, ipckts, ierrs, rbytes,
+ opckts, oerrs, obytes)
+}' >/tmp/ifstats.$$
+
+/native/usr/lib/brand/lx/lx_native /native/lib/ld.so.1 \
+ /native/usr/bin/kstat -m lo -c net | awk '{
+ if ($1 == "ipackets") ipckts=$2
+ if ($1 == "opackets") opckts=$2
+} END {
+ printf("lo0 %s 0 0 %s 0 0\n", ipckts, opckts)
+}' >>/tmp/ifstats.$$
+
+# Now get the interfaces and format the output
+/native/usr/lib/brand/lx/lx_native /native/lib/ld.so.1 /native/sbin/ifconfig |
+awk -v pid=$$ -v nm=$ifname -v short=$short 'BEGIN {
+ indent=" "
+ # load interface kstats
+ tfile="/tmp/ifstats." pid
+ while (getline < tfile > 0) {
+ stats[$1]=substr($0, length($1) + 2)
+ }
+
+ if (short)
+ printf("%s %s %s %s %s %s %s %s %s %s %s %s\n", "Iface", "MTU",
+ "Met", "RX-OK", "RX-ERR", "RX-DRP", "RX-OVR", "TX-OK", "TX-ERR",
+ "TX-DRP", "TX-OVR", "Flg")
+}
+
+function fmt() {
+ if (length(nm) > 0 && ifname != nm)
+ return
+
+ if (ifname == "lo0") {
+ encap="Local Loopback"
+ qlen=0
+ sflg="LRU"
+ } else {
+ encap="Ethernet"
+ qlen=1000
+ sflg="BMRU"
+ }
+
+ n = split(stats[ifname], s)
+ if (n != 6)
+ s[1] = s[2] = s[3] = s[4] = s[5] = s[6] = 0
+
+ if (short) {
+ printf("%-5s %5s %3s %5s %6s %6s %6s %5s %6s %6s %6s %s\n",
+ ifname, mtu, 0, s[1], s[2], 0, 0, s[4], s[5], 0, 0, sflg)
+ return
+ }
+
+ printf("%-9s Link encap:%s", ifname, encap)
+ if (length(enetaddr) > 0)
+ printf(" HWaddr %s", enetaddr)
+ printf("\n")
+ printf("%9s inet addr:%s", indent, inet)
+ if (length(bcast) > 0)
+ printf(" Bcast:%s", bcast)
+ printf(" Mask:%s\n", mask)
+ printf("%9s %s MTU:%s\n", indent, flags, mtu)
+ printf("%9s RX packets:%s errors:%s dropped:0 overruns:0 frame:0\n",
+ indent, s[1], s[2])
+ printf("%9s TX packets:%s errors:%s dropped:0 overruns:0 carrier:0\n",
+ indent, s[4], s[6])
+ printf("%9s collisons:0 txqueuelen:%s\n", indent, qlen)
+ printf("%9s RX bytes:%s TX bytes:%s\n", indent, s[3], s[6])
+ printf("\n")
+}
+
+{
+ if (substr($1, length($1), 1) == ":") {
+ if (length(ifname) > 0) {
+ # print prev entry and reset
+ fmt()
+ ifname=""
+ bcast=""
+ enetaddr=""
+ }
+
+ ifname=substr($1, 1, length($1) - 1)
+ mtu=$4
+ pos=index($2, "<") + 1
+ flags=substr($2, pos, length($2) - pos)
+ getline
+ inet=$2
+ mask=$4
+ if (NF > 4)
+ bcast=$6
+ else
+ bcast=""
+ } else if ($1 == "ether") {
+ enetaddr=$2
+ }
+}
+
+END {
+ if (length(ifname) > 0) {
+ fmt()
+ }
+}'
+
+rm -f /tmp/ifstats.$$
diff --git a/usr/src/lib/brand/lx/cmd/lx_isaexec_wrapper.sh b/usr/src/lib/brand/lx/cmd/lx_isaexec_wrapper.sh
new file mode 100644
index 0000000000..618570011c
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_isaexec_wrapper.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
+#
+
+#
+# All native executables must be run using the native linker. By default, the
+# kernel loads the linker at /lib/ld.so.1, which in an lx zone is the gcc
+# linker. Hence when we run the native executable below, we explicitly specify
+# /native/lib/ld.so.1 as our 32-bit linker and /native/lib/64/ld.so.1 as our
+# 64-bit linker. For convience we define "n" to be the native path prefix. The
+# initial lx_native argument is used as a way to tell the brand emulation that
+# it needs to set up the process to run as an unbranded process.
+#
+# If this script gets setup with a mode that makes it suid, then things won't
+# work because the script will be running with the incorrect name.
+#
+# XXX For now, we only do 32-bit
+#
+
+n=/native
+
+bname=`/usr/bin/basename $0`
+dname=`/usr/bin/dirname $0`
+echo $dname | grep "^/" >/dev/null || dname=`/bin/pwd`/$dname
+dname=`(cd $dname 2>/dev/null && /bin/pwd 2>/dev/null)`
+
+if [ ! -f $n$dname/$bname ]; then
+ echo "Error: \"$dname/$bname\" is not installed in the global zone"
+ exit 1
+fi
+
+exec $n/usr/lib/brand/lx/lx_native \
+ $n/lib/ld.so.1 \
+ -e LD_NOENVIRON=1 \
+ -e LD_NOCONFIG=1 \
+ -e LD_PRELOAD_32=$n/usr/lib/brand/lx/lx_thunk.so.1 \
+ -e LD_LIBRARY_PATH_32="$n/lib:$n/usr/lib" \
+ $n$dname/$bname "$@"
diff --git a/usr/src/lib/brand/lx/cmd/lx_lockd.sh b/usr/src/lib/brand/lx/cmd/lx_lockd.sh
new file mode 100644
index 0000000000..cb60d19749
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_lockd.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LD_LIBRARY_PATH=/usr/lib/brand/lx
+LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1
+LD_BIND_NOW=1
+export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW
+
+exec /native/usr/lib/brand/lx/lx_native \
+ /native/usr/lib/nfs/lockd -P -U 29 -G 29
diff --git a/usr/src/lib/brand/lx/cmd/lx_native.sh b/usr/src/lib/brand/lx/cmd/lx_native.sh
new file mode 100644
index 0000000000..8e8344a375
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_native.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+exit 0
diff --git a/usr/src/lib/brand/lx/cmd/lx_statd.sh b/usr/src/lib/brand/lx/cmd/lx_statd.sh
new file mode 100644
index 0000000000..998fd90af2
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_statd.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LD_LIBRARY_PATH=/usr/lib/brand/lx
+LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1
+LD_BIND_NOW=1
+export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW
+
+exec /native/usr/lib/brand/lx/lx_native \
+ /native/usr/lib/nfs/statd -P -U 29 -G 29
diff --git a/usr/src/lib/brand/lx/cmd/lx_thunk.sh b/usr/src/lib/brand/lx/cmd/lx_thunk.sh
new file mode 100644
index 0000000000..4e1e6cbc03
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_thunk.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+exec /native/usr/lib/brand/lx/lx_thunk
diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile b/usr/src/lib/brand/lx/librtld_db/Makefile
new file mode 100644
index 0000000000..2fc0a818f6
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+default: all
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS= $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile.com b/usr/src/lib/brand/lx/librtld_db/Makefile.com
new file mode 100644
index 0000000000..202cc0fe7b
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile.com
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_librtld_db.a
+VERS = .1
+COBJS = lx_librtld_db.o
+OBJECTS = $(COBJS) $(COBJS64)
+
+include $(SRC)/lib/Makefile.lib
+include ../../Makefile.lx
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+UTSBASE = $(SRC)/uts
+
+#
+# ATTENTION:
+# Librtl_db brand plugin libraries should NOT directly invoke any
+# libproc.so interfaces or be linked against libproc. If a librtl_db
+# brand plugin library uses libproc.so interfaces then it may break
+# any other librtld_db consumers (like mdb) that tries to attach
+# to a branded process. The only safe interfaces that the a librtld_db
+# brand plugin library can use to access a target process are the
+# proc_service(3PROC) apis.
+#
+DYNFLAGS += $(VERSREF) -M../common/mapfile-vers
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lrtld_db
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx \
+ -I$(SRC)/cmd/sgs/librtld_db/common \
+ -I$(SRC)/cmd/sgs/include \
+ -I$(SRC)/cmd/sgs/include/$(MACH)
+
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+#
+# The top level Makefiles define define TEXT_DOMAIN. But librtld_db.so.1
+# isn't internationalized and this library won't be either. The only
+# messages that this library can generate are messages used for debugging
+# the operation of the library itself.
+#
+DTEXTDOM =
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+pics/%64.o: ../common/%.c
+ $(COMPILE.c) -D_ELF64 $(PICFLAGS) -o $@ $<
+ $(POST_PROCESS_O)
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/Makefile b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
new file mode 100644
index 0000000000..726e7ef6d3
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+COBJS64 = lx_librtld_db64.o
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += -Mmapfile-vers
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
new file mode 100644
index 0000000000..4893b02998
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ rtld_db_brand_ops64;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
new file mode 100644
index 0000000000..50cf3ca3ba
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
@@ -0,0 +1,851 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/link.h>
+#include <libproc.h>
+#include <proc_service.h>
+#include <rtld_db.h>
+#include <synch.h>
+
+#include <sys/lx_brand.h>
+
+/*
+ * Overview of this library derived from the original "BrandZ" PSARC design
+ * document.
+ *
+ * Since Linux binaries are standard ELF objects, Illumos debug tools (i.e. mdb
+ * or ptools) are able to process them in essentially the same way that Illumos
+ * binaries are processed. The main objective is to retrieve symbols and
+ * thereby aid debugging and observability. Unfortunately, most Linux
+ * distributions strip(1) their binaries as a misguided "optimization" so the
+ * majority of the useful debugging information is lost.
+ *
+ * The debug tools use interfaces provided by librtld_db to debug live
+ * processes and core files. librtld_db discovers ELF objects which have been
+ * mapped into the target's address space and reports these back to the tool.
+ * The librtld_db library understands enough of the internals of the Illumos
+ * runtime linker to iterate over the linker's private link maps and process
+ * the objects it finds. librtld_db allows our tools to debug the Illumos
+ * portions of a branded process (e.g. the brand library, libc, etc.) but they
+ * can't understand any Linux objects that are mapped into the address space
+ * because the Illumos linker only has Illumos objects on its link maps.
+ *
+ * In order to give the tools visibility into Linux binaries, a brand helper
+ * framework is implemented in librtld_db. When librtld_db is asked to examine
+ * a branded target process or core file, it uses the AT_SUN_BRANDNAME aux
+ * vector to get our brand name (lx). It then dlopen-s this lx_librtld_db.so
+ * helper library.
+ *
+ * Once loaded, this helper library is responsible for finding any lx-specific
+ * information it needs, such as the Linux equivalent LDDATA aux entry and
+ * preparing to return details about the objects loaded into the address space
+ * by the Linux linker.
+ *
+ * When a debug tool asks to know what objects are loaded in the target,
+ * librtld_db walks the Illumos link maps and iterates over each object it
+ * finds there, handing information about each to the tool. It then calls down
+ * into this helper library, which does the same for the brand-specific objects
+ * used by the target.
+ *
+ * This debug-helper code contains a bunch of helpful ps_plog calls. To enable
+ * this output with the ptools (e.g. pmap) set LIBPROC_DEBUG=1 in your
+ * environment. To enable it with mdb set MDB_DEBUG=psvc in your environment.
+ */
+
+/*
+ * ATTENTION:
+ * Librtl_db brand plugin libraries should NOT directly invoke any
+ * libproc.so interfaces or be linked against libproc. If a librtl_db
+ * brand plugin library uses libproc.so interfaces then it may break
+ * any other librtld_db consumers (like mdb) that tries to attach
+ * to a branded process. The only safe interfaces that the a librtld_db
+ * brand plugin library can use to access a target process are the
+ * proc_service(3PROC) apis.
+ */
+
+/*
+ * M_DATA comes from some streams header file but is also redifined in
+ * _rtld_db.h, so nuke the old streams definition here.
+ */
+#ifdef M_DATA
+#undef M_DATA
+#endif /* M_DATA */
+
+/*
+ * For 32-bit versions of this library, this file gets compiled once.
+ * For 64-bit versions of this library, this file gets compiled twice,
+ * once with _ELF64 defined and once without. The expectation is that
+ * the 64-bit version of the library can properly deal with both 32-bit
+ * and 64-bit elf files, hence in the 64-bit library there are two copies
+ * of all the interfaces in this file, one set named *32 and one named *64.
+ *
+ * This also means that we need to be careful when declaring local pointers
+ * that point to objects in another processes address space, since these
+ * pointers may not match the current processes pointer width. Basically,
+ * we should avoid using data types that change size between 32 and 64 bit
+ * modes like: long, void *, uintptr_t, caddr_t, psaddr_t, size_t, etc.
+ * Instead we should declare all pointers as uint32_t. Then when we
+ * are compiled to deal with 64-bit targets we'll re-define uint32_t
+ * to be a uint64_t.
+ */
+#ifdef _LP64
+#ifdef _ELF64
+#define lx_ldb_get_dyns32 lx_ldb_get_dyns64
+#define lx_ldb_init32 lx_ldb_init64
+#define lx_ldb_fini32 lx_ldb_fini64
+#define lx_ldb_loadobj_iter32 lx_ldb_loadobj_iter64
+#define lx_ldb_getauxval32 lx_ldb_getauxval64
+#define lx_elf_props32 lx_elf_props64
+#define _rd_get_dyns32 _rd_get_dyns64
+#define _rd_get_ehdr32 _rd_get_ehdr64
+#define uint32_t uint64_t
+#define Elf32_Dyn Elf64_Dyn
+#define Elf32_Ehdr Elf64_Ehdr
+#define Elf32_Phdr Elf64_Phdr
+#endif /* _ELF64 */
+#endif /* _LP64 */
+
+/* Included from usr/src/cmd/sgs/librtld_db/common */
+#include <_rtld_db.h>
+
+typedef struct lx_rd {
+ rd_agent_t *lr_rap;
+ struct ps_prochandle *lr_php; /* proc handle pointer */
+ uint32_t lr_rdebug; /* address of lx r_debug */
+ uint32_t lr_exec; /* base address of executable */
+} lx_rd_t;
+
+typedef struct lx_link_map {
+ uint32_t lxm_addr; /* Base address shared object is loaded at. */
+ uint32_t lxm_name; /* Absolute file name object was found in. */
+ uint32_t lxm_ld; /* Dynamic section of the shared object. */
+ uint32_t lxm_next; /* Chain of loaded objects. */
+} lx_link_map_t;
+
+typedef struct lx_r_debug {
+ int r_version; /* Version number for this protocol. */
+ uint32_t r_map; /* Head of the chain of loaded objects. */
+
+ /*
+ * This is the address of a function internal to the run-time linker,
+ * that will always be called when the linker begins to map in a
+ * library or unmap it, and again when the mapping change is complete.
+ * The debugger can set a breakpoint at this address if it wants to
+ * notice shared object mapping changes.
+ */
+ uint32_t r_brk;
+ r_state_e r_state; /* defined the same way between lx/solaris */
+ uint32_t r_ldbase; /* Base address the linker is loaded at. */
+} lx_r_debug_t;
+
+static uint32_t
+lx_ldb_getauxval32(struct ps_prochandle *php, int type)
+{
+ const auxv_t *auxvp = NULL;
+
+ if (ps_pauxv(php, &auxvp) != PS_OK)
+ return ((uint32_t)-1);
+
+ while (auxvp->a_type != AT_NULL) {
+ if (auxvp->a_type == type)
+ return ((uint32_t)(uintptr_t)auxvp->a_un.a_ptr);
+ auxvp++;
+ }
+ return ((uint32_t)-1);
+}
+
+/*
+ * A key difference between the linux linker and ours' is that the linux
+ * linker adds the base address of segments to certain values in the
+ * segments' ELF header. As an example, look at the address of the
+ * DT_HASH hash table in a Solaris section - it is a relative address
+ * which locates the start of the hash table, relative to the beginning
+ * of the ELF file. However, when the linux linker loads a section, it
+ * modifies the in-memory ELF image by changing address of the hash
+ * table to be an absolute address. This is only done for libraries - not for
+ * executables.
+ *
+ * Solaris tools expect the relative address to remain relative, so
+ * here we will modify the in-memory ELF image so that it once again
+ * contains relative addresses.
+ *
+ * To accomplish this, we walk through all sections in the target.
+ * Linux sections are identified by pointing to the linux linker or libc in the
+ * DT_NEEDED section. For all matching sections, we subtract the segment
+ * base address to get back to relative addresses.
+ */
+static rd_err_e
+lx_ldb_get_dyns32(rd_helper_data_t rhd,
+ psaddr_t addr, void **dynpp, size_t *dynpp_sz)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ rd_agent_t *rap = lx_rd->lr_rap;
+ Elf32_Ehdr ehdr;
+ Elf32_Dyn *dynp = NULL;
+ size_t dynp_sz;
+ uint_t ndyns;
+ int i;
+
+ ps_plog("lx_ldb_get_dyns: invoked for object at 0x%p", addr);
+
+ /* Read in a copy of the ehdr */
+ if (_rd_get_ehdr32(rap, addr, &ehdr, NULL) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_ehdr() failed");
+ return (RD_ERR);
+ }
+
+ /* read out the PT_DYNAMIC elements for this object */
+ if (_rd_get_dyns32(rap, addr, &dynp, &dynp_sz) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_dyns() failed");
+ return (RD_ERR);
+ }
+
+ /*
+ * From here on out if we encounter an error we'll just return
+ * success and pass back the unmolested dynamic elements that
+ * we've already obtained.
+ */
+ if (dynpp != NULL)
+ *dynpp = dynp;
+ if (dynpp_sz != NULL)
+ *dynpp_sz = dynp_sz;
+ ndyns = dynp_sz / sizeof (Elf32_Dyn);
+
+ /* If this isn't a dynamic object, there's nothing left todo */
+ if (ehdr.e_type != ET_DYN) {
+ ps_plog("lx_ldb_get_dyns: done: not a shared object");
+ return (RD_OK);
+ }
+
+ /*
+ * Before we blindly start changing dynamic section addresses
+ * we need to figure out if the current object that we're looking
+ * at is a linux object or a solaris object. To do this first
+ * we need to find the string tab dynamic section element.
+ */
+ for (i = 0; i < ndyns; i++) {
+ if (dynp[i].d_tag == DT_STRTAB)
+ break;
+ }
+ if (i == ndyns) {
+ ps_plog("lx_ldb_get_dyns: "
+ "failed to find string tab in the dynamic section");
+ return (RD_OK);
+ }
+
+ /*
+ * Check if the strtab value looks like an offset or an address.
+ * It's an offset if the value is less then the base address that
+ * the object is loaded at, or if the value is less than the offset
+ * of the section headers in the same elf object. This check isn't
+ * perfect, but in practice it's good enough.
+ */
+ if ((dynp[i].d_un.d_ptr < addr) ||
+ (dynp[i].d_un.d_ptr < ehdr.e_shoff)) {
+ ps_plog("lx_ldb_get_dyns: "
+ "doesn't appear to be an lx object");
+ return (RD_OK);
+ }
+
+ /*
+ * This seems to be a a linux object, so we'll patch up the dynamic
+ * section addresses
+ */
+ ps_plog("lx_ldb_get_dyns: "
+ "patching up lx object dynamic section addresses");
+ for (i = 0; i < ndyns; i++) {
+ switch (dynp[i].d_tag) {
+ case DT_PLTGOT:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_REL:
+ case DT_DEBUG:
+ case DT_JMPREL:
+ case DT_VERSYM:
+ if (dynp[i].d_un.d_val > addr) {
+ dynp[i].d_un.d_ptr -= addr;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return (RD_OK);
+}
+
+static void
+lx_ldb_fini32(rd_helper_data_t rhd)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ ps_plog("lx_ldb_fini: cleaning up lx helper");
+ free(lx_rd);
+}
+
+/*
+ * The linux linker has an r_debug structure somewhere in its data section that
+ * contains the address of the head of the link map list. To find this, we will
+ * use the DT_DEBUG token in the executable's dynamic section. The linux linker
+ * wrote the address of its r_debug structure to the DT_DEBUG dynamic entry. We
+ * get the address of the executable's program headers from the
+ * AT_SUN_BRAND_LX_PHDR aux vector entry. From there, we calculate the
+ * address of the Elf header, and from there we can easily get to the DT_DEBUG
+ * entry.
+ */
+static rd_helper_data_t
+lx_ldb_init32(rd_agent_t *rap, struct ps_prochandle *php)
+{
+ lx_rd_t *lx_rd;
+ uint32_t addr, phdr_addr, dyn_addr;
+ uint32_t symtab, strtab, offs;
+ uint32_t vaddr, memsz;
+ caddr_t mem;
+ Elf32_Dyn *dyn;
+ Elf32_Phdr phdr, *ph, *dph, *phdrs;
+ Elf32_Ehdr ehdr;
+ Elf32_Sym *sym;
+ int i, dyn_count;
+
+ lx_rd = calloc(sizeof (lx_rd_t), 1);
+ if (lx_rd == NULL) {
+ ps_plog("lx_ldb_init: cannot allocate memory");
+ return (NULL);
+ }
+ lx_rd->lr_rap = rap;
+ lx_rd->lr_php = php;
+
+ phdr_addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_PHDR);
+ if (phdr_addr == (uint32_t)-1) {
+ ps_plog("lx_ldb_init: no LX_PHDR found in aux vector");
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found LX_PHDR auxv phdr at: 0x%p",
+ phdr_addr);
+
+ if (ps_pread(php, phdr_addr, &phdr, sizeof (phdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdr at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ /* The ELF header should be before the program header in memory */
+ lx_rd->lr_exec = addr = phdr_addr - phdr.p_offset;
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read ehdr at 0x%p",
+ lx_rd->lr_exec);
+ free(lx_rd);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: read ehdr at: 0x%p", addr);
+ ps_plog("lx_ldb_init: ehdr t %d ent 0x%p poff 0x%p psize %d pnum %d",
+ ehdr.e_type, ehdr.e_entry, ehdr.e_phoff, ehdr.e_phentsize,
+ ehdr.e_phnum);
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc phdrs memory");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, phdr_addr, phdrs, ehdr.e_phnum * ehdr.e_phentsize) !=
+ PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdrs at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ /* program headers */
+ ps_plog("lx_ldb_init: read %d phdrs at: 0x%p",
+ ehdr.e_phnum, phdr_addr);
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+ ps_plog("lx_ldb_init: ph[%d] 0x%p type %d", i,
+ (phdr_addr + ((char *)ph - (char *)phdrs)), ph->p_type);
+ if (ph->p_type == PT_DYNAMIC)
+ break;
+ }
+ if (i == ehdr.e_phnum) {
+ ps_plog("lx_ldb_init: no PT_DYNAMIC in executable");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found PT_DYNAMIC phdr[%d] at: 0x%p",
+ i, (phdr_addr + ((char *)ph - (char *)phdrs)));
+ ps_plog("lx_ldb_init: ph t 0x%x f 0x%x o 0x%p v 0x%p s %d",
+ ph->p_type, ph->p_flags, ph->p_offset, ph->p_vaddr, ph->p_filesz);
+
+ if ((dyn = malloc(ph->p_filesz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc for PT_DYNAMIC");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ /*
+ * Unclear why the dyn_addr works sometimes with one value and
+ * sometimes for the other, so we handle both cases.
+ */
+
+ dyn_count = ph->p_filesz / sizeof (Elf32_Dyn);
+ ps_plog("lx_ldb_init: dyn_count %d %d", dyn_count, sizeof (Elf32_Dyn));
+ dyn_addr = addr + ph->p_vaddr;
+ ps_plog("lx_ldb_init: dyn_addr 0x%p 0x%x = 0x%p",
+ addr, ph->p_offset, dyn_addr);
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p, "
+ "trying dyn_addr 0x%p",
+ dyn_addr, ph->p_vaddr);
+
+ dyn_addr = ph->p_vaddr;
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p",
+ dyn_addr);
+
+ free(lx_rd);
+ free(phdrs);
+ free(dyn);
+ return (NULL);
+ }
+ }
+ ps_plog("lx_ldb_init: read %d dynamic headers at: 0x%p",
+ dyn_count, dyn_addr);
+
+ for (i = 0; i < dyn_count; i++) {
+ if (dyn[i].d_tag == DT_DEBUG) {
+ lx_rd->lr_rdebug = dyn[i].d_un.d_ptr;
+ break;
+ }
+ }
+ free(phdrs);
+ free(dyn);
+
+ if (lx_rd->lr_rdebug != 0) {
+ ps_plog("lx_ldb_init: found DT_DEBUG: 0x%p", lx_rd->lr_rdebug);
+ return ((rd_helper_data_t)lx_rd);
+ }
+
+ ps_plog("lx_ldb_init: no DT_DEBUG found in exe; looking for r_debug");
+
+ /*
+ * If we didn't find DT_DEBUG, we're going to employ the same fallback
+ * as gdb: pawing through the dynamic linker's symbol table looking
+ * for the r_debug symbol.
+ */
+ addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_INTERP);
+
+ if (addr == (uint32_t)-1) {
+ ps_plog("lx_ldb_init: no interpreter; failing");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: reading interp ehdr at 0x%p", addr);
+
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp ehdr at 0x%p", addr);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ehdr.e_type != ET_DYN) {
+ ps_plog("lx_ldb_init: interp ehdr not of type ET_DYN");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ phdr_addr = addr + ehdr.e_phoff;
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc interp phdrs memory");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, phdr_addr, phdrs,
+ ehdr.e_phnum * ehdr.e_phentsize) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp phdrs at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: read %d interp phdrs at: 0x%p",
+ ehdr.e_phnum, phdr_addr);
+
+ vaddr = (uint32_t)-1;
+ memsz = 0;
+
+ for (i = 0, ph = phdrs, dph = NULL; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+ /*
+ * Keep track of our lowest PT_LOAD address, as this segment
+ * contains the DT_SYMTAB and DT_STRTAB.
+ */
+ if (ph->p_type == PT_LOAD && ph->p_vaddr < vaddr) {
+ vaddr = ph->p_vaddr;
+ memsz = ph->p_memsz;
+ }
+
+ if (ph->p_type == PT_DYNAMIC)
+ dph = ph;
+ }
+
+ if (vaddr == (uint32_t)-1 || memsz == 0) {
+ ps_plog("lx_ldb_init: no PT_LOAD section in interp");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: found interp PT_LOAD to be %d bytes at 0x%p",
+ memsz, vaddr);
+
+ if ((ph = dph) == NULL) {
+ ps_plog("lx_ldb_init: no PT_DYNAMIC in interp");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: found interp PT_DYNAMIC phdr[%d] at: 0x%p",
+ i, (phdr_addr + ((char *)ph - (char *)phdrs)));
+
+ if ((dyn = malloc(ph->p_filesz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc for interp PT_DYNAMIC");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ dyn_addr = addr + ph->p_offset;
+ dyn_count = ph->p_filesz / sizeof (Elf32_Dyn);
+
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp dynamic at 0x%p",
+ dyn_addr);
+ free(lx_rd);
+ free(phdrs);
+ free(dyn);
+ return (NULL);
+ }
+
+ free(phdrs);
+
+ ps_plog("lx_ldb_init: read %d interp dynamic headers at: 0x%p",
+ dyn_count, dyn_addr);
+
+ /*
+ * As noted in lx_ldb_get_dyns32(), in Linux, the PT_DYNAMIC table
+ * is adjusted to represent absolute addresses instead of offsets.
+ * This is not true for the interpreter, however -- where the values
+ * will be represented as offsets from the lowest PT_LOAD p_vaddr.
+ */
+ symtab = strtab = (uint32_t)-1;
+
+ for (i = 0; i < dyn_count; i++) {
+ if (dyn[i].d_tag == DT_STRTAB)
+ strtab = dyn[i].d_un.d_ptr - vaddr;
+
+ if (dyn[i].d_tag == DT_SYMTAB)
+ symtab = dyn[i].d_un.d_ptr - vaddr;
+ }
+
+ free(dyn);
+
+ if (strtab == (uint32_t)-1 || strtab > memsz) {
+ ps_plog("lx_ldb_init: didn't find valid interp strtab");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (symtab == (uint32_t)-1 || symtab > memsz) {
+ ps_plog("lx_ldb_init: didn't find valid interp symtab");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: strtab is 0x%p, symtab is 0x%p",
+ addr + strtab, addr + symtab);
+
+ if ((mem = malloc(memsz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't allocate interp "
+ "buffer of 0x%p bytes", memsz);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, addr, mem, memsz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp at 0x%p", addr);
+ free(lx_rd);
+ free(mem);
+ return (NULL);
+ }
+
+ /*
+ * We make an assumption that is made elsewhere in the Linux linker:
+ * that the DT_SYMTAB immediately precedes the DT_STRTAB.
+ */
+ for (offs = symtab; offs < strtab; offs += sizeof (Elf32_Sym)) {
+ uint32_t p;
+
+ sym = (Elf32_Sym *)&mem[offs];
+
+ if (sym->st_name > memsz) {
+ ps_plog("lx_ldb_init: invalid st_name at sym 0x%p",
+ addr + offs);
+ continue;
+ }
+
+ p = sym->st_name;
+
+ if (strtab + p > memsz) {
+ ps_plog("lx_ldb_init: invalid symbol address 0x%p, "
+ "memsz 0x%p", strtab + p, memsz);
+ continue;
+ }
+
+ if (mem[strtab + p] == '\0')
+ continue;
+
+ /* Sometimes we're pointing into the middle of a symbol? */
+ while ((strtab + p) > 0 && (strtab + p) < memsz &&
+ mem[strtab + p] != '\0')
+ p--;
+ p++;
+
+ ps_plog("lx_ldb_init: interp symbol (0x%p) %s",
+ strtab + p, &mem[strtab + p]);
+
+ if (strcmp(&mem[strtab + p], "_r_debug") == 0)
+ break;
+ }
+
+ if (offs >= strtab) {
+ ps_plog("lx_ldb_init: no _r_debug found in interpreter");
+ free(mem);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ lx_rd->lr_rdebug = (sym->st_value - vaddr) + addr;
+ ps_plog("lx_ldb_init: found _r_debug at 0x%p", lx_rd->lr_rdebug);
+ free(mem);
+
+ return ((rd_helper_data_t)lx_rd);
+}
+
+/*
+ * Given the address of an ELF object in the target, return its size and
+ * the proper link map ID.
+ */
+static size_t
+lx_elf_props32(struct ps_prochandle *php, uint32_t addr, psaddr_t *data_addr)
+{
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrs, *ph;
+ int i;
+ uint32_t min = (uint32_t)-1;
+ uint32_t max = 0;
+ size_t sz = NULL;
+
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read ELF header at 0x%p",
+ addr);
+ return (0);
+ }
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL)
+ return (0);
+
+ if (ps_pread(php, addr + ehdr.e_phoff, phdrs, ehdr.e_phnum *
+ ehdr.e_phentsize) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read program headers at 0x%p",
+ addr + ehdr.e_phoff);
+ return (0);
+ }
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+
+ if ((ph->p_flags & (PF_W | PF_R)) == (PF_W | PF_R)) {
+ *data_addr = ph->p_vaddr;
+ if (ehdr.e_type == ET_DYN)
+ *data_addr += addr;
+ if (*data_addr & (ph->p_align - 1))
+ *data_addr = *data_addr & (~(ph->p_align -1));
+ }
+
+ if (ph->p_vaddr < min)
+ min = ph->p_vaddr;
+
+ if (ph->p_vaddr > max) {
+ max = ph->p_vaddr;
+ sz = ph->p_memsz + max - min;
+ if (sz & (ph->p_align - 1))
+ sz = (sz & (~(ph->p_align - 1))) + ph->p_align;
+ }
+ }
+
+ free(phdrs);
+ return (sz);
+}
+
+static int
+lx_ldb_loadobj_iter32(rd_helper_data_t rhd, rl_iter_f *cb, void *client_data)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ struct ps_prochandle *php = lx_rd->lr_php;
+ lx_r_debug_t r_debug;
+ lx_link_map_t map;
+ uint32_t p = NULL;
+ int rc;
+ rd_loadobj_t exec;
+
+ if ((rc = ps_pread(php, (psaddr_t)lx_rd->lr_rdebug, &r_debug,
+ sizeof (r_debug))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux r_debug at 0x%p", lx_rd->lr_rdebug);
+ return (rc);
+ }
+
+ p = r_debug.r_map;
+
+ /*
+ * The first item on the link map list is for the executable, but it
+ * doesn't give us any useful information about it. We need to
+ * synthesize a rd_loadobj_t for the client.
+ *
+ * Linux doesn't give us the executable name, so we'll get it from
+ * the AT_EXECNAME entry instead.
+ */
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux link map at 0x%p", p);
+ return (rc);
+ }
+
+ bzero(&exec, sizeof (exec));
+ exec.rl_base = lx_rd->lr_exec;
+ exec.rl_dynamic = map.lxm_ld;
+ exec.rl_nameaddr = lx_ldb_getauxval32(php, AT_SUN_EXECNAME);
+ exec.rl_lmident = LM_ID_BASE;
+
+ exec.rl_bend = exec.rl_base +
+ lx_elf_props32(php, lx_rd->lr_exec, &exec.rl_data_base);
+
+ if ((*cb)(&exec, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "client callb failed for executable");
+ return (PS_ERR);
+ }
+ ps_plog("lx_ldb_loadobj_iter: exec base 0x%p dyn 0x%p",
+ exec.rl_base, exec.rl_dynamic);
+
+ for (p = map.lxm_next; p != NULL; p = map.lxm_next) {
+ rd_loadobj_t obj;
+
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) !=
+ PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read lk map at %p", p);
+ return (rc);
+ }
+
+ /*
+ * The linux link map has less information than the Solaris one.
+ * We need to go fetch the missing information from the ELF
+ * headers.
+ */
+
+ obj.rl_nameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_base = map.lxm_addr;
+ obj.rl_refnameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_plt_base = NULL;
+ obj.rl_plt_size = 0;
+ obj.rl_lmident = LM_ID_BASE;
+
+ ps_plog("lx_ldb_loadobj_iter: map base 0x%p 0x%p",
+ obj.rl_base, obj.rl_nameaddr);
+
+ /*
+ * Ugh - we have to walk the ELF stuff, find the PT_LOAD
+ * sections, and calculate the end of the file's mappings
+ * ourselves.
+ */
+
+ obj.rl_bend = map.lxm_addr +
+ lx_elf_props32(php, map.lxm_addr, &obj.rl_data_base);
+ obj.rl_padstart = obj.rl_base;
+ obj.rl_padend = obj.rl_bend;
+ obj.rl_dynamic = map.lxm_ld;
+ obj.rl_tlsmodid = 0;
+
+ ps_plog("lx_ldb_loadobj_iter: 0x%p to 0x%p",
+ obj.rl_base, obj.rl_bend);
+
+ if ((*cb)(&obj, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Client callback failed on %s", map.lxm_name);
+ return (rc);
+ }
+ }
+ return (RD_OK);
+}
+
+/*
+ * Librtld_db plugin linkage struct.
+ *
+ * When we get loaded by librtld_db, it will look for the symbol below
+ * to find our plugin entry points.
+ */
+rd_helper_ops_t RTLD_DB_BRAND_OPS = {
+ LM_ID_BRAND,
+ lx_ldb_init32,
+ lx_ldb_fini32,
+ lx_ldb_loadobj_iter32,
+ lx_ldb_get_dyns32
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
new file mode 100644
index 0000000000..5e328d6075
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+{
+ global:
+ rtld_db_brand_ops32;
+ local:
+ *;
+};
+
+#Externally defined symbols
+{
+ global:
+ ps_pauxv = NODIRECT PARENT;
+ ps_pdmodel = NODIRECT PARENT;
+ ps_pglobal_lookup = NODIRECT PARENT;
+ ps_pglobal_sym = NODIRECT PARENT;
+ ps_plog = NODIRECT PARENT;
+ ps_pread = NODIRECT PARENT;
+ ps_pwrite = NODIRECT PARENT;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/i386/Makefile b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
new file mode 100644
index 0000000000..b5f780c072
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile b/usr/src/lib/brand/lx/lx_brand/Makefile
new file mode 100644
index 0000000000..fc217b672c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../../Makefile.lib
+
+default: all
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint _msg: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com
new file mode 100644
index 0000000000..2c9021ac1d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com
@@ -0,0 +1,103 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+LX_CMN = $(SRC)/common/brand/lx
+
+LIBRARY = lx_brand.a
+VERS = .1
+COBJS = capabilities.o \
+ clock.o \
+ clone.o \
+ debug.o \
+ dir.o \
+ file.o \
+ fcntl.o \
+ fork.o \
+ id.o \
+ iovec.o \
+ lx_brand.o \
+ lx_thunk_server.o \
+ mem.o \
+ misc.o \
+ module.o \
+ mount.o \
+ open.o \
+ pgrp.o \
+ poll_select.o \
+ priority.o \
+ ptrace.o \
+ rlimit.o \
+ sched.o \
+ sendfile.o \
+ signal.o \
+ socket.o \
+ stat.o \
+ statfs.o \
+ sysctl.o \
+ sysv_ipc.o \
+ time.o \
+ truncate.o \
+ wait.o \
+ xattr.o
+
+CMNOBJS = lx_signum.o
+ASOBJS = lx_handler.o lx_runexe.o lx_crt.o
+OBJECTS = $(CMNOBJS) $(COBJS) $(ASOBJS)
+
+include ../../Makefile.lx
+include ../../../../Makefile.lib
+
+CSRCS = $(COBJS:%o=../common/%c) $(CMNOBJS:%o=$(LX_CMN)/%c)
+ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s)
+SRCS = $(CSRCS) $(ASSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lsocket -lmapmalloc -lproc -lrtld_db
+DYNFLAGS += $(DYNFLAGS_$(CLASS))
+DYNFLAGS += $(BLOCAL) $(ZNOVERSION) -Wl,-e_start -M../common/mapfile
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../ \
+ -I$(UTSBASE)/common/brand/lx
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../../Makefile.targ
+
+pics/%.o: $(ISASRCDIR)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/Makefile b/usr/src/lib/brand/lx/lx_brand/amd64/Makefile
new file mode 100644
index 0000000000..664cb8de56
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/Makefile
@@ -0,0 +1,51 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+#
+# lib/brand/lx/amd64/Makefile
+
+ISASRCDIR=.
+
+ASFLAGS += -P -D_ASM
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += -Wl,-I/native/lib/64/ld.so.1
+CPPFLAGS += -D_SYSCALL32
+
+POFILE= lx_brand.po
+MSGFILES= $(CSRCS)
+
+ASSYMDEP_OBJS = lx_handler.o
+
+$(ASSYMDEP_OBJS:%=pics/%): assym.h
+
+OFFSETS = ../$(MACH64)/offsets.in
+
+assym.h: $(OFFSETS)
+ $(OFFSETS_CREATE) $(CTF_FLAGS) < $(OFFSETS) > $@
+
+CLOBBERFILES += assym.h
+
+install: all $(ROOTLIBS64)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s
new file mode 100644
index 0000000000..13a88a3d50
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s
@@ -0,0 +1,62 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+void
+_start(void)
+{
+}
+
+#else /* lint */
+
+ /*
+ * C language startup routine for the lx brand shared library.
+ *
+ * That routine expects to be called with the following arguments:
+ * brand_init(int argc, char *argv[], char *envp[])
+ *
+ * There are no arguments explicitly passed to this entry point,
+ * routine, but we do know how our initial stack has been setup by
+ * the kernel. The stack format is documented in:
+ * usr/src/cmd/sgs/rtld/amd64/boot.s
+ *
+ * So this routine will troll through the stack to setup the argument
+ * values for the common brand library startup routine and then invoke
+ * it. This routine is modeled after the default crt1.s`_start()
+ * routines.
+ */
+ ENTRY_NP(_start)
+ pushq $0 / Build a stack frame. retpc = NULL
+ pushq $0 / fp = NULL
+ movq %rsp, %rbp / first stack frame
+
+ /*
+ * Calculate the location of the envp array by adding the size of
+ * the argv array to the start of the argv array.
+ */
+ movq 16(%rbp), %rdi / argc in %rdi (1st param)
+ leaq 24(%rbp), %rsi / &argv[0] in %rsi (2nd param)
+ leaq 32(%rbp,%rdi,8), %rdx / envp in %rdx (3rd param)
+ call lx_init
+
+ /* lx_init will never return. */
+ /*NOTREACHED*/
+ SET_SIZE(_start)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
new file mode 100644
index 0000000000..af8fae621f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
@@ -0,0 +1,428 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/segments.h>
+#include <sys/syscall.h>
+#include <sys/lx_brand.h>
+
+#if defined(_ASM)
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#endif /* _ASM */
+
+#include "assym.h"
+
+/* 64-bit signal syscall numbers */
+#define LX_SYS_sigreturn 513
+#define LX_SYS_rt_sigreturn 15
+
+/*
+ * Each JMP must occupy 16 bytes.
+ * The syscall offset is stored immediately above the red zone to avoid
+ * clobbering data there. Once lx_handler is reached, the stack will be
+ * advanced to account for both the red zone and the stored syscall offset.
+ */
+#define JMP \
+ movl $_CONST(. - lx_handler_table), -136(%rsp); \
+ jmp lx_handler; \
+ .align 16;
+
+#define JMP4 JMP; JMP; JMP; JMP
+#define JMP16 JMP4; JMP4; JMP4; JMP4
+#define JMP64 JMP16; JMP16; JMP16; JMP16
+#define JMP256 JMP64; JMP64; JMP64; JMP64
+
+/*
+ * Alternate jump table that turns on lx_traceflag before proceeding with
+ * the normal emulation routine.
+ */
+#define TJMP \
+ movl $_CONST(. - lx_handler_trace_table), -136(%rsp); \
+ jmp lx_handler_trace; \
+ .align 16;
+
+#define TJMP4 TJMP; TJMP; TJMP; TJMP
+#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4
+#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16
+#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64
+
+
+#if defined(lint)
+
+#include <sys/types.h>
+#include <sys/regset.h>
+#include <sys/signal.h>
+
+void
+lx_handler_table(void)
+{}
+
+void
+lx_handler(void)
+{}
+
+/* ARGSUSED */
+void
+lx_setup_clone(uintptr_t gs, void *retaddr, void *stk)
+{}
+
+/* ARGSUSED */
+void
+lx_sigdeliver(int sig, siginfo_t *sip, void *p, size_t stacksz,
+ void (*stack_frame_builder)(void), void (*lx_sighandler)(void),
+ uintptr_t gs)
+{}
+
+/* ARGSUSED */
+void
+lx_sigacthandler(int sig, siginfo_t *s, void *p)
+{}
+
+void
+lx_sigreturn_tramp(void)
+{}
+
+void
+lx_rt_sigreturn_tramp(void)
+{}
+
+/* ARGSUSED */
+void
+lx_sigreturn_tolibc(uintptr_t sp)
+{}
+
+#else /* lint */
+
+ /*
+ * On entry to this table, %rax will hold the return address. The
+ * location where we enter the table is a function of the system
+ * call number. The table needs the same alignment as the individual
+ * entries.
+ */
+ .align 16
+ ENTRY_NP(lx_handler_trace_table)
+ TJMP256
+ TJMP64
+ TJMP64
+ SET_SIZE(lx_handler_trace_table)
+
+ .align 16
+ ENTRY_NP(lx_handler_table)
+ JMP256
+ JMP64
+ JMP64
+ SET_SIZE(lx_handler_table)
+
+ ENTRY_NP(lx_handler_trace)
+ subq $136, %rsp /* skip red zone + syscall offset */
+ pushq %rsi
+ movq lx_traceflag@GOTPCREL(%rip), %rsi
+ movq $1, (%rsi)
+ popq %rsi
+ addq $136, %rsp
+ /*
+ * While we could just fall through to lx_handler(), we "tail-call" it
+ * instead to make ourselves a little more comprehensible to trace
+ * tools.
+ */
+ jmp lx_handler
+ SET_SIZE(lx_handler_trace)
+
+ ALTENTRY(lx_handler)
+ /*
+ * We are running on the Linux process's stack here so we have to
+ * account for the AMD64 ABI red zone of 128 bytes past the %rsp which
+ * the process can use as scratch space. In addition to the red zone,
+ * the syscall offset stored by the handler tables above must be
+ * accounted for. To that end, rsp is advanced by a further 8 bytes to
+ * include the syscall offset.
+ */
+ subq $136, %rsp /* red zone + syscall offset */
+
+ /*
+ * In order to keep the hander_table entries within 16 bytes, only 4
+ * bytes of the syscall offset are stored during dispatch.
+ * The upper 4 bytes are zeroed here to account for that.
+ */
+ movl $0, 4(%rsp)
+
+ /*
+ * %rbp isn't always going to be a frame pointer on Linux, but when
+ * it is, saving it here lets us have a coherent stack backtrace.
+ */
+ pushq %rbp
+
+ /*
+ * Fill in a lx_regs_t structure on the stack.
+ */
+ subq $SIZEOF_LX_REGS_T, %rsp
+
+ /*
+ * Save %rbp and then fill it with what would be its usual value as
+ * the frame pointer. The value we save for %rsp needs to be the
+ * stack pointer at the time of the syscall so we need to skip the
+ * red zone, saved %rbp and (what will be) the return address.
+ */
+ movq %rbp, LXR_RBP(%rsp)
+ movq %rsp, %rbp
+ addq $SIZEOF_LX_REGS_T, %rbp
+ movq %rbp, LXR_RSP(%rsp)
+ addq $144, LXR_RSP(%rsp) /* 128 byte red zone + 2 pointers */
+
+ movq $0, LXR_FS(%rsp)
+ movw %fs, LXR_FS(%rsp)
+ movq %rdi, LXR_RDI(%rsp)
+ movq %rsi, LXR_RSI(%rsp)
+ movq %rbx, LXR_RBX(%rsp)
+ movq %rdx, LXR_RDX(%rsp)
+ movq %rcx, LXR_RCX(%rsp)
+ movq %rax, LXR_RIP(%rsp) /* %rax holds the return addr. */
+ movq %r8, LXR_R8(%rsp)
+ movq %r9, LXR_R9(%rsp)
+ movq %r10, LXR_R10(%rsp)
+ movq %r11, LXR_R11(%rsp)
+ movq %r12, LXR_R12(%rsp)
+ movq %r13, LXR_R13(%rsp)
+ movq %r14, LXR_R14(%rsp)
+ movq %r15, LXR_R15(%rsp)
+
+ /*
+ * The kernel drops us into the middle of one of the tables above
+ * that then stores the table offset immediately above the 128 byte
+ * red zone and calls into lx_handler. That offset indicates the
+ * syscall number while %rax holds the return address for the syscall.
+ * We replace the value on the stack with the return address, and use
+ * the value to compute the system call number by dividing by the table
+ * entry size.
+ */
+ xchgq 8(%rbp), %rax /* just after the rbp we pushed */
+ shrq $4, %rax
+ movq %rax, LXR_RAX(%rsp)
+
+ /*
+ * Call lx_emulate() whose only argument is a pointer to the
+ * lx_regs_t structure we've placed on the stack.
+ */
+ movq %rsp, %rdi
+ call lx_emulate
+
+ /*
+ * We use this global symbol to identify this return site when
+ * walking the stack backtrace. It needs to remain immediately
+ * after the call to lx_emulate().
+ */
+ ALTENTRY(lx_emulate_done)
+
+ /*
+ * Restore the saved register state; we get %rbp and %rsp from
+ * the ordinary locations rather than the saved state.
+ */
+ movq LXR_RDI(%rsp), %rdi
+ movq LXR_RSI(%rsp), %rsi
+ movq LXR_RBX(%rsp), %rbx
+ movq LXR_RDX(%rsp), %rdx
+ movq LXR_RCX(%rsp), %rcx
+ movq LXR_RAX(%rsp), %rax
+ movq LXR_R8(%rsp), %r8
+ movq LXR_R9(%rsp), %r9
+ movq LXR_R10(%rsp), %r10
+ movq LXR_R11(%rsp), %r11
+ movq LXR_R12(%rsp), %r12
+ movq LXR_R13(%rsp), %r13
+ movq LXR_R14(%rsp), %r14
+ movq LXR_R15(%rsp), %r15
+ /* XXX movw LXR_FS(%rsp), %fs */
+
+ movq %rbp, %rsp
+ popq %rbp
+
+ /*
+ * Returning from lx_handler is complicated by our preservation of the
+ * red zone on the stack. The return address resides just above the
+ * red zone making it impossible to use 'retq' and return rsp to the
+ * correct value. Instead, rsp is manually moved to its original
+ * position and we jmp using the return address at the known stack
+ * offset above the red zone.
+ */
+ addq $136, %rsp /* red zone + return address */
+ jmpq *-136(%rsp)
+ SET_SIZE(lx_handler)
+
+ /*
+ * lx_setup_clone(lx_regs_t *regp, void *retaddr, void *stack)
+ * Restore the register state using arg0 (%rdi).
+ * Return to Linux app using arg1 (%rsi) with the Linux stack we got
+ * in arg2 (%rdx).
+ */
+ ENTRY_NP(lx_setup_clone)
+ /*
+ * arg0 is a ptr to an lx_regs_t struct. The AMD64 ABI says that the
+ * kernel clobbers %rcx and %r11 so we use those for working registers.
+ */
+ movq %rdi, %rcx /* arg0, use rcx as ptr */
+ movq %rsi, %r11 /* arg1, the return addr */
+ movq LXR_RDI(%rcx), %rdi
+ movq LXR_RSI(%rcx), %rsi
+ movq LXR_RBX(%rcx), %rbx
+ movq LXR_R8(%rcx), %r8
+ movq LXR_R9(%rcx), %r9
+ movq LXR_R10(%rcx), %r10
+ movq LXR_R12(%rcx), %r12
+ movq LXR_R13(%rcx), %r13
+ movq LXR_R14(%rcx), %r14
+ movq LXR_R15(%rcx), %r15
+
+ xorq %rbp, %rbp /* terminating stack */
+ popq %rax /* pop the clone_start() return address */
+ movq %rdx, %rsp /* arg2 is new stack pointer */
+ movq LXR_RDX(%rcx), %rdx
+ xorq %rax, %rax /* child returns 0 to SYS_clone() */
+ jmp *%r11 /* return to Linux app. using arg1 addr. */
+ SET_SIZE(lx_setup_clone)
+
+ /*
+ * lx_sigdeliver(int sig, siginfo_t *, ucontext_t *, int stack_size,
+ * void *stack_build_routine, void *signal_handler, void *glibc_gs)
+ *
+ * The final parameter (%gs) is ignored in the 64-bit code.
+ *
+ * we're called by:
+ * lx_call_user_handler(int sig, siginfo_t *sip, void *p)
+ *
+ * This routine allocates stack space for the lx_sigstack local
+ * variable structure, calls a routine to populate that structure, and
+ * then calls the Linux signal handler. This is written in assembly
+ * because of the way we directly jmp to the Linux signal handler
+ * with everything setup as if this function wasn't really here. We
+ * rely on the code in lx_rt_sigreturn() to cleanup the things we've
+ * pushed on the stack here.
+ *
+ * See lx_build_signal_frame() for the code which populates lx_sigstack.
+ *
+ * When we jump to the Linux signal handler, the stack will look
+ * like this:
+ *
+ * =================================================
+ * | %rbp |
+ * | =================================================
+ * | | stuff we saved in our prologue |
+ * | =================================================
+ * | | LX_SIGRT_MAGIC |
+ * | =================================================
+ * | | {unused word to maintain ABI stack alignment} |
+ * V =================================================
+ * | Linux local data built by lx stk_builder() |
+ * =================================================
+ *
+ * Unlike the 32-bit case, we don't reset %rbp before jumping into the
+ * Linux handler, since that would mean the handler would clobber our
+ * data in the stack frame it builds.
+ *
+ */
+ ENTRY_NP(lx_sigdeliver)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x40, %rsp /* an extra word to maintain alignmnt */
+ movq %rdi, -8(%rbp) /* sig */
+ movq %rsi, -16(%rbp) /* siginfo* */
+ movq %rdx, -24(%rbp) /* ucontext* */
+ movq %rcx, -32(%rbp) /* stack size */
+ movq %r8, -40(%rbp) /* stack builder */
+ movq %r9, -48(%rbp) /* Linux signal handler */
+
+ subq %rcx, %rsp /* create stack_size stack buffer */
+
+ movq $LX_SIGRT_MAGIC, %rcx /* load and place marker value onto */
+ movq %rcx, -56(%rbp) /* stack for lx_rt_sigreturn */
+
+ movq %rsp, %rcx /* arg3 - %rcx is stack pointer */
+ /* arg2 - %rdx is ucontext ptr */
+ /* arg1 - %rsi is siginfo ptr */
+ /* arg0 - %rdi is sig num */
+ call *%r8 /* stk_builder(sig, sip, ucp, sp) */
+
+ /* setup for jump to Linux signal hander */
+ movq -8(%rbp), %rdi /* arg0 %rdi is sig num */
+
+ /*
+ * If we had a NULL siginfo pointer as input then we never converted
+ * anything in the stack builder function and we need to pass along
+ * a null siginfo pointer to the Linux handler.
+ *
+ * arg1 %rsi is ptr to converted siginfo on stack or NULL
+ */
+ movq -16(%rbp), %rsi
+ cmp $0, %rsi
+ je 1f
+ movq %rsp, %rsi
+ addq $SI, %rsi
+1:
+ /*
+ * arg2 %rdx is ptr to converted ucontext on stk (uc member of
+ * lx_sigstack).
+ */
+ movq %rsp, %rdx
+ addq $UC, %rdx
+
+ movq -48(%rbp), %r9 /* fetch signal handler ptr */
+ jmp *%r9 /* jmp to the Linux signal handler */
+ SET_SIZE(lx_sigdeliver)
+
+ /*
+ * The libc routine that calls user signal handlers ends with a
+ * setcontext, so we would never return here even if we used a call
+ * rather than a jmp. However, we'll let the emulation unwind the stack
+ * with a brand call that combines the setcontext with the management
+ * of the syscall mode flag.
+ *
+ * Note that because libc_sigacthandler is an extern, it needs to be
+ * dereferenced via the GOT.
+ *
+ * IMPORTANT: Because libc apparently gets upset if extra data is
+ * left on its stack, this routine needs to be crafted
+ * in assembly so that the jmp to the libc interposer
+ * doesn't leave any cruft lying around.
+ *
+ * lx_sigacthandler(int sig, siginfo_t *s, void *p)
+ */
+ ENTRY_NP(lx_sigacthandler)
+ movq libc_sigacthandler@GOTPCREL(%rip), %rax
+ jmp *(%rax) /* jmp to libc's interposer */
+ SET_SIZE(lx_sigacthandler)
+
+ /*
+ * Trampoline code is called by the return at the end of a Linux
+ * signal handler to return control to the interrupted application
+ * via the lx_rt_sigreturn() syscall.
+ */
+ ENTRY_NP(lx_rt_sigreturn_tramp)
+ movq $LX_SYS_rt_sigreturn, %rax
+ syscall
+ SET_SIZE(lx_rt_sigreturn_tramp)
+
+ /*
+ * Manipulate the stack in the way necessary for it to appear to libc
+ * that the signal handler it invoked via call_user_handler() is
+ * returning.
+ */
+ ENTRY_NP(lx_sigreturn_tolibc)
+ movq %rdi, %rsp /* set %rsp to passed value */
+ popq %rbp /* restore proper %rbp */
+ ret /* return to lx_call_user_handler */
+ SET_SIZE(lx_sigreturn_tolibc)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_runexe.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_runexe.s
new file mode 100644
index 0000000000..70cd75cf41
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_runexe.s
@@ -0,0 +1,46 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+/*ARGSUSED*/
+void
+lx_runexe(void *argv, void *entry)
+{
+}
+
+#else /* lint */
+
+ /*
+ * Set our stack pointer, clear the general registers,
+ * and jump to the brand linker's entry point.
+ */
+ ENTRY_NP(lx_runexe)
+ movq %rdi, %rax / %rax = &argv[0]
+ movq %rsi, %rbx / Brand linker's entry point in %rbx
+ subq $8, %rax / Top of stack - must point at argc
+ movq %rax, %rsp / Set %rsp to what linkers expect
+
+ movq $0, %rdx
+
+ jmp *%rbx / And away we go...
+
+ /* target will never return. */
+ SET_SIZE(lx_runexe)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/offsets.in b/usr/src/lib/brand/lx/lx_brand/amd64/offsets.in
new file mode 100644
index 0000000000..207063a886
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/offsets.in
@@ -0,0 +1,43 @@
+\
+\ This file and its contents are supplied under the terms of the
+\ Common Development and Distribution License ("CDDL"), version 1.0.
+\ You may only use this file in accordance with the terms of version
+\ 1.0 of the CDDL.
+\
+\ A full copy of the text of the CDDL should have accompanied this
+\ source. A copy of the CDDL is also available via the Internet at
+\ http://www.illumos.org/license/CDDL.
+\
+\ Copyright 2014 Joyent, Inc. All rights reserved.
+\
+
+#include <sys/lx_brand.h>
+#include <sys/lx_sigstack.h>
+
+lx_regs_t SIZEOF_LX_REGS_T
+ lxr_fs
+ lxr_rdi
+ lxr_rsi
+ lxr_rbp
+ lxr_rsp
+ lxr_rbx
+ lxr_rdx
+ lxr_rcx
+ lxr_rax
+ lxr_r8
+ lxr_r9
+ lxr_r10
+ lxr_r11
+ lxr_r12
+ lxr_r13
+ lxr_r14
+ lxr_r15
+ lxr_rip
+ lxr_orig_rax
+
+lx_sigstack_t SIZEOF_LX_SIGSTACK_T
+ retaddr
+ si
+ uc
+ fpstate
+ pad
diff --git a/usr/src/lib/brand/lx/lx_brand/common/capabilities.c b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c
new file mode 100644
index 0000000000..81f3bbddb4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c
@@ -0,0 +1,484 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * LX Brand emulation of capget/capset syscalls
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+#include <sys/syscall.h>
+#include <alloca.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/lx_misc.h>
+#include <priv.h>
+
+typedef struct {
+ uint32_t version;
+ int pid;
+} lx_cap_user_header_t;
+
+typedef struct {
+ uint32_t effective;
+ uint32_t permitted;
+ uint32_t inheritable;
+} lx_cap_user_data_t;
+
+typedef struct {
+ priv_set_t *p_effective;
+ priv_set_t *p_permitted;
+ priv_set_t *p_inheritable;
+} lx_cap_privs_t;
+
+#define LX_CAP_UPDATE_PERMITTED 0x1
+#define LX_CAP_UPDATE_EFFECTIVE 0x2
+#define LX_CAP_UPDATE_INHERITABLE 0x4
+
+
+#define LX_CAP_MAXLEN 2
+
+typedef struct {
+ uint32_t effective[LX_CAP_MAXLEN];
+ uint32_t permitted[LX_CAP_MAXLEN];
+ uint32_t inheritable[LX_CAP_MAXLEN];
+} lx_cap_data_t;
+
+#define LX_CAP_VERSION_1 0x19980330
+#define LX_CAP_VERSION_2 0x20071026 /* deprecated by Linux */
+#define LX_CAP_VERSION_3 0x20080522
+
+/*
+ * Even though we lack mappings for capabilities higher than 36, it's valuable
+ * to test all the way out to the end of the second field. This ensures that
+ * new capabilities we lack support for are not silently accepted.
+ */
+#define LX_CAP_MAX_CHECK 63
+#define LX_CAP_MAX_VALID 36
+
+#define LX_CAP_CAPISSET(id, cap) \
+ (((id < 32) && (((0x1 << id) & cap[0]) != 0)) || \
+ ((id >= 32) && (((0x1 << (id - 32) & cap[1]) != 0))))
+
+#define LX_CAP_CAPSET(id, cap) \
+ if (id < 32) { cap[0] |= (0x1 << id); } \
+ else { cap[1] |= (0x1 << (id - 32)); }
+
+static const char *lx_cap_map_chown[] = {
+ PRIV_FILE_CHOWN,
+ PRIV_FILE_CHOWN_SELF,
+ NULL
+};
+static const char *lx_cap_map_dac_override[] = {
+ PRIV_FILE_DAC_READ,
+ PRIV_FILE_DAC_WRITE,
+ PRIV_FILE_DAC_EXECUTE,
+ NULL
+};
+static const char *lx_cap_map_dac_read_search[] = {
+ PRIV_FILE_DAC_SEARCH,
+ PRIV_FILE_DAC_READ,
+ NULL
+};
+static const char *lx_cap_map_fowner[] = { PRIV_FILE_OWNER, NULL };
+static const char *lx_cap_map_fsetid[] = { PRIV_FILE_SETID, NULL };
+static const char *lx_cap_map_kill[] = { PRIV_PROC_OWNER, NULL };
+/*
+ * One way that Linux capabilities(7) differs from Illumos privileges(5) is
+ * that it distinguishes between setuid and setgroups rights. This will be a
+ * problem if an lx-branded process requests to drop only CAP_SETUID but not
+ * CAP_SETGID.
+ *
+ * In that case, CAP_SETUID will be maintained.
+ */
+static const char *lx_cap_map_setgid[] = { PRIV_PROC_SETID, NULL };
+static const char *lx_cap_map_setuid[] = { PRIV_PROC_SETID, NULL };
+static const char *lx_cap_map_linux_immutable[] = { PRIV_FILE_FLAG_SET, NULL };
+static const char *lx_cap_map_bind_service[] = { PRIV_NET_PRIVADDR, NULL };
+static const char *lx_cap_map_net_admin[] = {
+ PRIV_SYS_IPC_CONFIG,
+ PRIV_SYS_DL_CONFIG,
+ NULL
+};
+static const char *lx_cap_map_net_raw[] = {
+ PRIV_NET_RAWACCESS,
+ PRIV_NET_ICMPACCESS,
+ NULL
+};
+static const char *lx_cap_map_ipc_lock[] = { PRIV_PROC_LOCK_MEMORY, NULL };
+static const char *lx_cap_map_ipc_owner[] = {
+ PRIV_IPC_DAC_READ,
+ PRIV_IPC_DAC_WRITE,
+ PRIV_IPC_OWNER,
+ NULL
+};
+static const char *lx_cap_map_sys_chroot[] = { PRIV_PROC_CHROOT, NULL };
+static const char *lx_cap_map_sys_admin[] = {
+ PRIV_SYS_MOUNT,
+ PRIV_SYS_ADMIN,
+ NULL
+};
+static const char *lx_cap_map_sys_nice[] = { PRIV_PROC_PRIOUP, NULL };
+static const char *lx_cap_map_sys_resource[] = { PRIV_SYS_RESOURCE, NULL };
+static const char *lx_cap_map_audit_write[] = { PRIV_PROC_AUDIT, NULL };
+static const char *lx_cap_map_audit_control[] = { PRIV_SYS_AUDIT, NULL };
+
+/*
+ * Mapping of Linux capabilities -> Illumos privileges
+ * The ID definitions can be found in the Linux sources here:
+ * include/uapi/linux/capability.h
+ *
+ * Order is critical.
+ */
+static const char ** lx_cap_mapping[LX_CAP_MAX_VALID + 1] = {
+ lx_cap_map_chown, /* CAP_CHOWN */
+ lx_cap_map_dac_override, /* CAP_DAC_OVERRIDE */
+ lx_cap_map_dac_read_search, /* CAP_DAC_READ_SEARCH */
+ lx_cap_map_fowner, /* CAP_FOWNER */
+ lx_cap_map_fsetid, /* CAP_FSETID */
+ lx_cap_map_kill, /* CAP_KILL */
+ lx_cap_map_setgid, /* CAP_SETGID */
+ lx_cap_map_setuid, /* CAP_SETUID */
+ NULL, /* CAP_SETPCAP */
+ lx_cap_map_linux_immutable, /* CAP_LINUX_IMMUTABLE */
+ lx_cap_map_bind_service, /* CAP_BIND_SERVICE */
+ NULL, /* CAP_BROADCAST */
+ lx_cap_map_net_admin, /* CAP_NET_ADMIN */
+ lx_cap_map_net_raw, /* CAP_NET_RAW */
+ lx_cap_map_ipc_lock, /* CAP_IPC_LOCK */
+ lx_cap_map_ipc_owner, /* CAP_IPC_OWNER */
+ NULL, /* CAP_MODULE */
+ NULL, /* CAP_RAWIO */
+ lx_cap_map_sys_chroot, /* CAP_SYS_CHROOT */
+ NULL, /* CAP_PTRACE */
+ NULL, /* CAP_PACCT */
+ lx_cap_map_sys_admin, /* CAP_SYS_ADMIN */
+ NULL, /* CAP_BOOT */
+ lx_cap_map_sys_nice, /* CAP_SYS_NICE */
+ lx_cap_map_sys_resource, /* CAP_SYS_RESOURCE */
+ NULL, /* CAP_SYS_TIME */
+ NULL, /* CAP_SYS_TTY_CONFIG */
+ NULL, /* CAP_MKNOD */
+ NULL, /* CAP_LEASE */
+ lx_cap_map_audit_write, /* CAP_AUDIT_WRITE */
+ lx_cap_map_audit_control, /* CAP_AUDIT_CONTROL */
+ NULL, /* CAP_SETFCAP */
+ NULL, /* CAP_MAC_OVERRIDE */
+ NULL, /* CAP_MAC_ADMIN */
+ NULL, /* CAP_SYSLOG */
+ NULL, /* CAP_WAKE_ALARM */
+ NULL /* CAP_BLOCK_SUSPEND */
+};
+
+/* track priv_set_t size, set on entry to lx_capset/lx_capget */
+static unsigned int lx_cap_priv_size = 0;
+
+/* safely allocate priv_set_t triplet on the stack */
+#define LX_CAP_ALLOC_PRIVS(ptr) \
+ do { \
+ ptr = SAFE_ALLOCA(sizeof (lx_cap_privs_t) + \
+ (3 * lx_cap_priv_size)); \
+ if (ptr != NULL) { \
+ ptr->p_effective = (void *) ptr + \
+ sizeof (lx_cap_privs_t); \
+ ptr->p_permitted = (void *) ptr + \
+ sizeof (lx_cap_privs_t) + \
+ lx_cap_priv_size;\
+ ptr->p_inheritable = (void *) ptr + \
+ sizeof (lx_cap_privs_t) + \
+ 2 * lx_cap_priv_size; \
+ } \
+ } while (0)
+
+static long
+lx_cap_update_priv(priv_set_t *priv, const uint32_t cap[])
+{
+ int i, j;
+ boolean_t cap_set;
+ boolean_t priv_set;
+ boolean_t updated = B_FALSE;
+ for (i = 0; i <= LX_CAP_MAX_CHECK; i++) {
+ cap_set = LX_CAP_CAPISSET(i, cap);
+ if (lx_cap_mapping[i] == NULL || i > LX_CAP_MAX_VALID) {
+ /* don't allow setting unsupported caps */
+ if (cap_set)
+ return (-1);
+ else
+ continue;
+ }
+ for (j = 0; lx_cap_mapping[i][j] != NULL; j++) {
+ priv_set = priv_ismember(priv, lx_cap_mapping[i][j]);
+ if (priv_set && !cap_set) {
+ priv_delset(priv, lx_cap_mapping[i][j]);
+ updated = B_TRUE;
+ } else if (!priv_set && cap_set) {
+ priv_addset(priv, lx_cap_mapping[i][j]);
+ updated = B_TRUE;
+ }
+ }
+ }
+ if (updated)
+ return (1);
+ else
+ return (0);
+}
+
+static long
+lx_cap_to_priv(lx_cap_data_t *cap, lx_cap_privs_t *priv)
+{
+ long changes = 0;
+ long result;
+
+ result = lx_cap_update_priv(priv->p_permitted, cap->permitted);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_PERMITTED;
+
+ result = lx_cap_update_priv(priv->p_effective, cap->effective);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_EFFECTIVE;
+
+ result = lx_cap_update_priv(priv->p_inheritable, cap->inheritable);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_INHERITABLE;
+
+ return (changes);
+}
+
+static void
+lx_cap_from_priv(const priv_set_t *priv, uint32_t cap[])
+{
+ int i, j;
+ boolean_t valid;
+ memset(cap, '\0', sizeof (uint32_t) * LX_CAP_MAXLEN);
+ for (i = 0; i <= LX_CAP_MAX_VALID; i++) {
+ if (lx_cap_mapping[i] == NULL) {
+ continue;
+ }
+ valid = B_TRUE;
+ for (j = 0; lx_cap_mapping[i][j] != NULL; j++) {
+ if (!priv_ismember(priv,
+ lx_cap_mapping[i][j])) {
+ valid = B_FALSE;
+ }
+ }
+ if (valid) {
+ LX_CAP_CAPSET(i, cap);
+ }
+ }
+}
+
+static long
+lx_cap_read_cap(const lx_cap_user_header_t *uhp, const lx_cap_user_data_t *udp,
+ lx_cap_data_t *cd)
+{
+ lx_cap_user_header_t uh;
+ lx_cap_user_data_t ud_buf;
+ int cap_count;
+ int i;
+
+ if (uucopy(uhp, &uh, sizeof (uh)) != 0)
+ return (-errno);
+
+ switch (uh.version) {
+ case LX_CAP_VERSION_1:
+ cap_count = 1;
+ break;
+ case LX_CAP_VERSION_2:
+ case LX_CAP_VERSION_3:
+ cap_count = 2;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ /* Only allow capset on calling process */
+ if (uh.pid != 0 && uh.pid != getpid())
+ return (-EPERM);
+
+ /* zero the struct in case cap_count < 2 */
+ memset(cd, '\0', sizeof (lx_cap_data_t));
+
+ for (i = 0; i < cap_count; i++) {
+ if (uucopy(udp + i, &ud_buf, sizeof (ud_buf)) != 0)
+ return (-errno);
+ cd->permitted[i] = ud_buf.permitted;
+ cd->effective[i] = ud_buf.effective;
+ cd->inheritable[i] = ud_buf.inheritable;
+ }
+ return (0);
+}
+
+long
+lx_capget(uintptr_t p1, uintptr_t p2)
+{
+ const priv_impl_info_t *impl;
+ lx_cap_user_header_t *uhp = (lx_cap_user_header_t *)p1;
+ lx_cap_user_data_t *udp = (lx_cap_user_data_t *)p2;
+ lx_cap_user_header_t uh;
+ lx_cap_privs_t *privs;
+ lx_cap_data_t cd_result;
+ lx_cap_user_data_t cd_buf;
+ int cap_count;
+ int i;
+
+ if (lx_cap_priv_size == 0) {
+ impl = getprivimplinfo();
+ lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize;
+ }
+
+ if (uucopy(uhp, &uh, sizeof (uh)) != 0)
+ return (-errno);
+
+ switch (uh.version) {
+ case LX_CAP_VERSION_1:
+ cap_count = 1;
+ break;
+ case LX_CAP_VERSION_2:
+ case LX_CAP_VERSION_3:
+ cap_count = 2;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ /*
+ * Only allow capget on the calling process.
+ * If a pid is specified, lie about being able to locate it.
+ */
+ if (uh.pid > 0 && uh.pid != getpid())
+ return (-ESRCH);
+ if (uh.pid < 0)
+ return (-EINVAL);
+
+ LX_CAP_ALLOC_PRIVS(privs);
+ if (privs == NULL)
+ return (-ENOMEM);
+
+ if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0)
+ return (-errno);
+
+ lx_cap_from_priv(privs->p_permitted, cd_result.permitted);
+ lx_cap_from_priv(privs->p_effective, cd_result.effective);
+ lx_cap_from_priv(privs->p_inheritable, cd_result.inheritable);
+
+ /* convert to output format */
+ for (i = 0; i < cap_count; i++) {
+ cd_buf.effective = cd_result.effective[i];
+ cd_buf.permitted = cd_result.permitted[i];
+ cd_buf.inheritable = cd_result.inheritable[i];
+ if (uucopy(&cd_buf, udp + i, sizeof (cd_buf)) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
+
+long
+lx_capset(uintptr_t p1, uintptr_t p2)
+{
+ const priv_impl_info_t *impl;
+ lx_cap_data_t cd;
+ lx_cap_privs_t *privs;
+ long result;
+
+ if (lx_cap_priv_size == 0) {
+ impl = getprivimplinfo();
+ lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize;
+ }
+
+ /* verify header and read in desired capabilities */
+ result = lx_cap_read_cap((lx_cap_user_header_t *)p1,
+ (lx_cap_user_data_t *)p2, &cd);
+ if (result != 0)
+ return (result);
+
+ LX_CAP_ALLOC_PRIVS(privs);
+ if (privs == NULL)
+ return (-ENOMEM);
+
+ /* fetch current privs to compare against */
+ if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0)
+ return (-errno);
+
+
+ result = lx_cap_to_priv(&cd, privs);
+ if (result < 0)
+ return (-EPERM);
+
+ /* report success if no changes needed */
+ if (result == 0)
+ return (0);
+
+ /* Ensure the effective/inheritable caps aren't > permitted */
+ if (!priv_issubset(privs->p_effective, privs->p_permitted) ||
+ !priv_issubset(privs->p_inheritable, privs->p_permitted))
+ return (-EPERM);
+
+ /*
+ * Here is where things become racy. Linux updates all three
+ * capability sets simultaneously in the capset syscall. In order to
+ * emulate capabilities via privileges, three setppriv operations are
+ * required in sequence. If one or two should fail, there is not a
+ * mechanism to convey the incomplete operation to the caller.
+ *
+ * We do two things to make this less risky:
+ * 1. Verify that both the desired effective and inheritable
+ * sets are subsets of the desired permitted set.
+ * 2. Perform the setppriv of the permitted set first.
+ *
+ * Should the setppriv(permitted) fail, we can safely bail out with an
+ * error. If it succeeds, the setppriv of effective and inheritable
+ * are likely to succeed given that they've been verified legal.
+ *
+ * If the partial error does happen, we'll be forced to report failure
+ * even though the privileges were altered.
+ */
+
+ if ((result & LX_CAP_UPDATE_PERMITTED) != 0) {
+ /* failure here is totally safe */
+ if (setppriv(PRIV_SET, PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ }
+ if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) {
+ /* failure here is a bummer */
+ if (setppriv(PRIV_SET, PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ }
+ if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) {
+ /* failure here is a major bummer */
+ if (setppriv(PRIV_SET, PRIV_INHERITABLE,
+ privs->p_inheritable) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clock.c b/usr/src/lib/brand/lx/lx_brand/common/clock.c
new file mode 100644
index 0000000000..898bfb5d59
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clock.c
@@ -0,0 +1,303 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <sys/resource.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <lx_signum.h>
+
+/*
+ * Translating from the Linux clock types to the Illumos types is a bit of a
+ * mess.
+ *
+ * Linux uses different values for it clock identifiers, so we have to do basic
+ * translations between the two. Thankfully, both Linux and Illumos implement
+ * the same POSIX SUSv3 clock types, so the semantics should be identical.
+ *
+ * However, CLOCK_REALTIME and CLOCK_HIGHRES (CLOCK_MONOTONIC) are the only two
+ * clock backends currently implemented on Illumos. Functions in the kernel
+ * that use the CLOCK_BACKEND macro will return an error for any clock type
+ * that does not exist in the clock_backend array. These functions are
+ * clock_settime, clock_gettime, clock_getres and timer_create.
+ *
+ * For reference, the kernel's clock_backend array looks like this:
+ *
+ * clock_backend[CLOCK_MAX] (6 entries)
+ * 0 __CLOCK_REALTIME0 valid ptr. (obs. same as CLOCK_REALTIME)
+ * 1 CLOCK_VIRTUAL NULL
+ * 2 CLOCK_THREAD_CPUTIME_ID NULL
+ * 3 CLOCK_REALTIME valid ptr.
+ * 4 CLOCK_MONOTONIC (CLOCK_HIGHRES) valid ptr.
+ * 5 CLOCK_PROCESS_CPUTIME_ID NULL
+ *
+ * See the comment on clock_highres_timer_create for full details but a zone
+ * needs the proc_clock_highres privilege to use the CLOCK_HIGHRES clock so it
+ * will generally be unusable by lx for timer_create.
+ */
+
+static int ltos_clock[] = {
+ CLOCK_REALTIME,
+ CLOCK_MONOTONIC,
+ CLOCK_PROCESS_CPUTIME_ID,
+ CLOCK_THREAD_CPUTIME_ID
+};
+
+/*
+ * Since the Illumos CLOCK_HIGHRES clock requires elevated privs, which can
+ * lead to a DOS, we use the only other option (CLOCK_REALTIME) when given
+ * LX_CLOCK_MONOTONIC.
+ */
+static int ltos_timer[] = {
+ CLOCK_REALTIME,
+ CLOCK_REALTIME,
+ CLOCK_THREAD_CPUTIME_ID, /* XXX thread, not process but fails */
+ CLOCK_THREAD_CPUTIME_ID
+};
+
+#define LX_CLOCK_REALTIME 0
+#define LX_CLOCK_MONOTONIC 1
+#define LX_CLOCK_PROCESS_CPUTIME_ID 2
+#define LX_CLOCK_THREAD_CPUTIME_ID 3
+
+#define LX_CLOCK_MAX (sizeof (ltos_clock) / sizeof (ltos_clock[0]))
+#define LX_TIMER_MAX (sizeof (ltos_timer) / sizeof (ltos_timer[0]))
+
+#define LX_SIGEV_PAD_SIZE ((64 - \
+ (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
+
+typedef struct {
+ union sigval lx_sigev_value; /* same layout for both */
+ int lx_sigev_signo;
+ int lx_sigev_notify;
+ union {
+ int lx_pad[LX_SIGEV_PAD_SIZE];
+ int lx_tid;
+ struct {
+ void (*lx_notify_function)(union sigval);
+ void *lx_notify_attribute;
+ } lx_sigev_thread;
+ } lx_sigev_un;
+} lx_sigevent_t;
+
+/* sigevent sigev_notify conversion table */
+static int ltos_sigev[] = {
+ SIGEV_SIGNAL,
+ SIGEV_NONE,
+ SIGEV_THREAD,
+ 0, /* Linux skips event 3 */
+ SIGEV_THREAD /* the Linux SIGEV_THREAD_ID */
+};
+
+#define LX_SIGEV_MAX (sizeof (ltos_sigev) / sizeof (ltos_sigev[0]))
+
+static long
+get_cputime(int who, struct timespec *tp)
+{
+ struct timespec ts;
+ struct rusage ru;
+ long ns;
+
+ if (getrusage(who, &ru) != 0)
+ return (-EINVAL);
+
+ ts.tv_sec = ru.ru_utime.tv_sec + ru.ru_stime.tv_sec;
+ ns = (ru.ru_utime.tv_usec + ru.ru_stime.tv_usec) * 1000;
+ if (ns > NANOSEC) {
+ ts.tv_sec += 1;
+ ns -= NANOSEC;
+ }
+ ts.tv_nsec = ns;
+ return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0);
+}
+
+long
+lx_clock_gettime(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (tp == NULL)
+ return (-EFAULT);
+
+ if (clock == LX_CLOCK_PROCESS_CPUTIME_ID) {
+ return (get_cputime(RUSAGE_SELF, tp));
+ } else if (clock == LX_CLOCK_THREAD_CPUTIME_ID) {
+ return (get_cputime(RUSAGE_LWP, tp));
+ }
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (clock_gettime(ltos_clock[clock], &ts) < 0)
+ return (-errno);
+
+ return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0);
+}
+
+long
+lx_clock_settime(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (uucopy(tp, &ts, sizeof (struct timespec)) < 0)
+ return (-EFAULT);
+
+ return ((clock_settime(ltos_clock[clock], &ts) < 0) ? -errno : 0);
+}
+
+long
+lx_clock_getres(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (clock_getres(ltos_clock[clock], &ts) < 0)
+ return (-errno);
+
+ /* the timespec pointer is allowed to be NULL */
+ if (tp == NULL)
+ return (0);
+
+ return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0);
+}
+
+long
+lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp,
+ struct timespec *rmtp)
+{
+ int ret = 0;
+ int err;
+ struct timespec rqt, rmt;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0)
+ return (-EFAULT);
+
+ /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */
+ if ((err = clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt))
+ != 0) {
+ if (err != EINTR)
+ return (-err);
+ ret = -EINTR;
+ /*
+ * We fall through in case we have to pass back the remaining
+ * time.
+ */
+ }
+
+ /*
+ * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is
+ * non-NULL.
+ */
+ if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) &&
+ (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0))
+ return (-EFAULT);
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+long
+lx_adjtimex(void *tp)
+{
+ return (-EPERM);
+}
+
+/*
+ * The Illumos timer_create man page says it accepts the following clocks:
+ * CLOCK_REALTIME (3) wall clock
+ * CLOCK_VIRTUAL (1) user CPU usage clock - No Backend
+ * CLOCK_PROF (2) user and system CPU usage clock - No Backend
+ * CLOCK_HIGHRES (4) non-adjustable, high-resolution clock
+ * However, in reality the Illumos timer_create only accepts CLOCK_REALTIME
+ * and CLOCK_HIGHRES, and since we can't use CLOCK_HIGHRES in a zone, we're
+ * down to one clock.
+ */
+long
+lx_timer_create(int clock, struct sigevent *lx_sevp, timer_t *tid)
+{
+ lx_sigevent_t lev;
+ struct sigevent sev;
+
+ if (clock < 0 || clock > LX_TIMER_MAX)
+ return (-EINVAL);
+
+ /* We have to convert the Linux sigevent layout to the Illumos layout */
+ if (uucopy(lx_sevp, &lev, sizeof (lev)) < 0)
+ return (-EFAULT);
+
+ if (lev.lx_sigev_notify < 0 || lev.lx_sigev_notify > LX_SIGEV_MAX)
+ return (-EINVAL);
+
+ sev.sigev_notify = ltos_sigev[lev.lx_sigev_notify];
+ sev.sigev_signo = ltos_signo[lev.lx_sigev_signo];
+ sev.sigev_value = lev.lx_sigev_value;
+
+ /*
+ * The sigevent sigev_notify_function and sigev_notify_attributes
+ * members are not used by timer_create, so no conversion is needed.
+ */
+
+ return ((timer_create(ltos_timer[clock], &sev, tid) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_settime(timer_t tid, int flags, struct itimerspec *new_val,
+ struct itimerspec *old_val)
+{
+ return ((timer_settime(tid, flags, new_val, old_val) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_gettime(timer_t tid, struct itimerspec *val)
+{
+ return ((timer_gettime(tid, val) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_getoverrun(timer_t tid)
+{
+ int val;
+
+ val = timer_getoverrun(tid);
+ return ((val < 0) ? -errno : val);
+}
+
+long
+lx_timer_delete(timer_t tid)
+{
+ return ((timer_delete(tid) < 0) ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
new file mode 100644
index 0000000000..4a77ceb01e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -0,0 +1,628 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <ucontext.h>
+#include <thread.h>
+#include <strings.h>
+#include <libintl.h>
+#include <sys/regset.h>
+#include <sys/syscall.h>
+#include <sys/inttypes.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/segments.h>
+#include <signal.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_thread.h>
+#include <sys/fork.h>
+
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_VFORK 0x00004000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+#define SHARED_AS \
+ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \
+ | LX_CLONE_THREAD)
+#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK)
+#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH)
+
+#define IS_FORK(f) (((f) & SHARED_AS) == 0)
+#define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK)
+
+#define LX_EXIT 1
+#define LX_EXIT_GROUP 2
+
+/*
+ * This is dicey. This seems to be an internal glibc structure, and not
+ * part of any external interface. Thus, it is subject to change without
+ * notice. FWIW, clone(2) itself seems to be an internal (or at least
+ * unstable) interface, since strace(1) shows it differently than the man
+ * page.
+ */
+struct lx_desc
+{
+ uint32_t entry_number;
+ uint32_t base_addr;
+ uint32_t limit;
+ uint32_t seg_32bit:1;
+ uint32_t contents:2;
+ uint32_t read_exec_only:1;
+ uint32_t limit_in_pages:1;
+ uint32_t seg_not_present:1;
+ uint32_t useable:1;
+ uint32_t empty:25;
+};
+
+struct clone_state {
+ void *c_retaddr; /* instr after clone()'s int80 */
+ int c_flags; /* flags to clone(2) */
+ int c_sig; /* signal to send on thread exit */
+ void *c_stk; /* %esp of new thread */
+ void *c_ptidp;
+ struct lx_desc *c_ldtinfo; /* thread-specific segment */
+ void *c_ctidp;
+#if defined(_LP64)
+ lx_regs_t c_regs; /* original register state */
+#else
+ uintptr_t c_gs; /* Linux's %gs */
+#endif
+ sigset_t c_sigmask; /* signal mask */
+ lx_affmask_t c_affmask; /* CPU affinity mask */
+ volatile int *c_clone_res; /* pid/error returned to cloner */
+};
+
+extern void lx_setup_clone(uintptr_t, void *, void *);
+
+/*
+ * Counter incremented when we vfork(2) ourselves, and decremented when the
+ * vfork(2)ed child exit(2)s or exec(2)s.
+ */
+static int is_vforked = 0;
+
+long
+lx_exit(uintptr_t p1)
+{
+ int ret, status = (int)p1;
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (is_vforked != 0) {
+ is_vforked--;
+ _exit(status);
+ }
+
+ if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal("exit: unable to read thread-specific data: %s",
+ strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_exit = LX_EXIT;
+ lx_tsd->lxtsd_exit_status = status;
+
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT);
+
+ /*
+ * Block all signals in the exit context to avoid taking any signals
+ * (to the degree possible) while exiting.
+ */
+ (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask);
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ * For 64-bit code, since we know we are unwinding the stack back to
+ * lx_init, we need to unwind the syscall mode flag "stack" as well.
+ */
+#if defined(_LP64)
+ (void) syscall(SYS_brand, B_UNWIND_NTV_SYSC_FLAG);
+#endif
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal("exit: unable to set exit context: %s", strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+long
+lx_group_exit(uintptr_t p1)
+{
+ int ret, status = (int)p1;
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (is_vforked != 0) {
+ is_vforked--;
+ _exit(status);
+ }
+
+ if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal("group_exit: unable to read thread-specific "
+ "data: %s", strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_exit = LX_EXIT_GROUP;
+ lx_tsd->lxtsd_exit_status = status;
+
+ /*
+ * Block all signals in the exit context to avoid taking any signals
+ * (to the degree possible) while exiting.
+ */
+ (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask);
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ * For 64-bit code, since we know we are unwinding the stack back to
+ * lx_init, we need to unwind the syscall mode flag "stack" as well.
+ */
+#if defined(_LP64)
+ (void) syscall(SYS_brand, B_UNWIND_NTV_SYSC_FLAG);
+#endif
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal("group_exits: unable to set exit context: %s",
+ strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+static void *
+clone_start(void *arg)
+{
+ int rval;
+ struct clone_state *cs = (struct clone_state *)arg;
+ lx_tsd_t lx_tsd;
+
+ /*
+ * Let the kernel finish setting up all the needed state for this
+ * new thread.
+ *
+ * We already created the thread using the thr_create(3C) library
+ * call, so most of the work required to emulate lx_clone(2) has
+ * been done by the time we get to this point. Instead of creating
+ * a new brandsys(2) subcommand to perform the last few bits of
+ * bookkeeping, we just use the lx_clone() slot in the syscall
+ * table.
+ */
+ lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()");
+ lx_debug("\tLX_SYS_clone(0x%x, 0x%p, 0x%p, 0x%p, 0x%p)",
+ cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp);
+
+ rval = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_clone,
+ cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp,
+ NULL);
+
+ /*
+ * At this point the parent is waiting for cs->c_clone_res to go
+ * non-zero to indicate the thread has been cloned. The value set
+ * in cs->c_clone_res will be used for the return value from
+ * clone().
+ */
+ if (rval < 0) {
+ *(cs->c_clone_res) = -errno;
+ lx_debug("\tkernel clone failed, errno %d\n", errno);
+ return (NULL);
+ }
+
+ if (lx_sched_setaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) != 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal("Unable to set affinity mask in child thread: %s",
+ strerror(errno));
+ }
+
+ /* Initialize the thread specific data for this thread. */
+ bzero(&lx_tsd, sizeof (lx_tsd));
+#if defined(_ILP32)
+ lx_tsd.lxtsd_gs = cs->c_gs;
+#endif
+
+ /*
+ * Use the address of the stack-allocated lx_tsd as the
+ * per-thread storage area to cache various values for later
+ * use.
+ *
+ * This address is only used by this thread, so there is no
+ * danger of other threads using this storage area, nor of it
+ * being accessed once this stack frame has been freed.
+ */
+ if (thr_setspecific(lx_tsd_key, &lx_tsd) != 0) {
+ *(cs->c_clone_res) = -errno;
+ lx_err_fatal("Unable to set thread-specific ptr for clone: %s",
+ strerror(rval));
+ }
+
+ /*
+ * Save the current context of this thread.
+ *
+ * We'll restore this context when this thread attempts to exit.
+ */
+ if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal("Unable to initialize thread-specific exit "
+ "context: %s", strerror(errno));
+ }
+
+ /*
+ * Do the final stack twiddling, reset %gs, and return to the
+ * clone(2) path.
+ */
+ if (lx_tsd.lxtsd_exit == 0) {
+ if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal("Unable to release held signals for child "
+ "thread: %s", strerror(errno));
+ }
+
+ /*
+ * Let the parent know that the clone has (effectively) been
+ * completed.
+ */
+ *(cs->c_clone_res) = rval;
+
+#if defined(_LP64)
+ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
+ lx_setup_clone((uintptr_t)&cs->c_regs, cs->c_retaddr,
+ cs->c_stk);
+#else
+ lx_setup_clone(cs->c_gs, cs->c_retaddr, cs->c_stk);
+#endif
+
+ /* lx_setup_clone() should never return. */
+ assert(0);
+ }
+
+ /*
+ * We are here because the Linux application called the exit() or
+ * exit_group() system call. In turn the brand library did a
+ * setcontext() to jump to the thread context state saved in
+ * getcontext(), above.
+ */
+ if (lx_tsd.lxtsd_exit == LX_EXIT)
+ thr_exit((void *)(long)lx_tsd.lxtsd_exit_status);
+ else
+ exit(lx_tsd.lxtsd_exit_status);
+
+ assert(0);
+ /*NOTREACHED*/
+}
+
+/*
+ * See glibc sysdeps/unix/sysv/linux/x86_64/clone.S code for x64 argument order
+ * and the Linux kernel/fork.c code for the various ways arguments can be passed
+ * to the clone syscall (CONFIG_CLONE_BACKWARDS, et al).
+ */
+long
+lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ struct clone_state *cs;
+ int flags = (int)p1;
+ void *cldstk = (void *)p2;
+ void *ptidp = (void *)p3;
+#if defined(_LP64)
+ void *ctidp = (void *)p4;
+ struct lx_desc *ldtinfo = (void *)p5;
+#else /* is 32bit */
+ struct lx_desc *ldtinfo = (void *)p4;
+ void *ctidp = (void *)p5;
+#endif
+ thread_t tid;
+ volatile int clone_res;
+ int sig;
+ int rval;
+ int pid;
+ lx_regs_t *rp;
+ sigset_t sigmask;
+ int fork_flags = 0;
+
+ if (flags & LX_CLONE_SETTLS) {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
+ "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp);
+ } else {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
+ flags, cldstk, ptidp);
+ }
+
+ /*
+ * Only supported for pid 0 on Linux
+ */
+ if (flags & LX_CLONE_PID)
+ return (-EINVAL);
+
+ /*
+ * CLONE_THREAD requires CLONE_SIGHAND.
+ *
+ * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
+ * in kernel 2.4 and prior.
+ * In kernel 2.6 (and later) CLONE_DETACHED was dropped completely, so
+ * we no longer have this requirement.
+ */
+
+ if (flags & CLONE_TD) {
+ if (!(flags & LX_CLONE_SIGHAND))
+ return (-EINVAL);
+ if (strncmp(lx_release, "2.4", 3) == 0 &&
+ (flags & CLONE_TD) != CLONE_TD)
+ return (-EINVAL);
+ }
+
+ rp = lx_syscall_regs();
+
+ /* test if pointer passed by user are writable */
+ if (flags & LX_CLONE_PARENT_SETTID) {
+ if (uucopy(ptidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ptidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+ if (flags & LX_CLONE_CHILD_SETTID) {
+ if (uucopy(ctidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ctidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+
+ /* See if this is a fork() operation or a thr_create(). */
+ if (IS_FORK(flags) || IS_VFORK(flags)) {
+ if (flags & LX_CLONE_PARENT) {
+ lx_unsupported("clone(2) only supports CLONE_PARENT "
+ "for threads.\n");
+ return (-ENOTSUP);
+ }
+
+ if (flags & LX_CLONE_PTRACE)
+ lx_ptrace_fork();
+
+ if ((flags & LX_CSIGNAL) == 0)
+ fork_flags |= FORK_NOSIGCHLD;
+
+ if (flags & LX_CLONE_VFORK) {
+ is_vforked++;
+ rval = vforkx(fork_flags);
+ if (rval != 0)
+ is_vforked--;
+ } else {
+ rval = forkx(fork_flags);
+ if (rval == 0 && lx_is_rpm)
+ (void) sleep(lx_rpm_delay);
+ }
+
+ /*
+ * Since we've already forked, we can't do much if uucopy
+ * fails, so we just ignore failure. Failure is unlikely since
+ * we've tested the memory before we did the fork.
+ */
+ if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
+ (void) uucopy(&rval, ptidp, sizeof (int));
+ }
+
+ if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
+ /*
+ * lx_getpid should not fail, and if it does, there's
+ * not much we can do about it since we've already
+ * forked, so on failure, we just don't copy the
+ * memory.
+ */
+ pid = lx_getpid();
+ if (pid >= 0)
+ (void) uucopy(&pid, ctidp, sizeof (int));
+ }
+
+ /* Parent just returns */
+ if (rval != 0)
+ return ((rval < 0) ? -errno : rval);
+
+
+ /*
+ * Set up additional data in the lx_proc_data structure as
+ * necessary.
+ */
+ rval = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_clone,
+ flags, cldstk, ptidp, ldtinfo, ctidp, NULL);
+ if (rval < 0) {
+ return (rval);
+ }
+
+ /*
+ * lx_setup_clone() doesn't return below, so stop now, if
+ * necessary.
+ */
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE);
+
+ /*
+ * If provided, the child needs its new stack set up.
+ */
+ if (cldstk) {
+#if defined(_LP64)
+ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
+ lx_setup_clone((uintptr_t)rp, (void *)rp->lxr_rip,
+ cldstk);
+#else
+ lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk);
+#endif
+ /* lx_setup_clone() should never return. */
+ assert(0);
+ }
+
+ return (0);
+ }
+
+ /*
+ * We have very restricted support.... only exactly these flags are
+ * supported
+ */
+ if (((flags & SHARED_AS) != SHARED_AS)) {
+ lx_unsupported("clone(2) requires that all or none of "
+ "CLONE_VM, CLONE_FS, CLONE_FILES, CLONE_THREAD and "
+ "CLONE_SIGHAND be set.\n");
+ return (-ENOTSUP);
+ }
+
+ if (cldstk == NULL) {
+ lx_unsupported("clone(2) requires the caller to allocate the "
+ "child's stack.\n");
+ return (-ENOTSUP);
+ }
+
+ /*
+ * If we want a signal-on-exit, ensure that the signal is valid.
+ */
+ if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) {
+ lx_unsupported("clone(2) passed unsupported signal: %d", sig);
+ return (-ENOTSUP);
+ }
+
+ /*
+ * To avoid malloc() here, we steal a part of the new thread's
+ * stack to store all the info that thread might need for
+ * initialization. We also make it 64-bit aligned for good
+ * measure.
+ */
+ cs = (struct clone_state *)
+ ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8));
+ cs->c_flags = flags;
+ cs->c_sig = sig;
+ cs->c_stk = cldstk;
+ cs->c_ptidp = ptidp;
+ cs->c_ldtinfo = ldtinfo;
+ cs->c_ctidp = ctidp;
+ cs->c_clone_res = &clone_res;
+#if defined(_LP64)
+ /*
+ * The AMD64 ABI says that the kernel clobbers %rcx and %r11. We
+ * return a value in %rax. The new %rsp and %rip will be setup in
+ * lx_setup_clone. Thus, we don't worry about passing/restoring those
+ * registers.
+ */
+ cs->c_regs.lxr_rdi = rp->lxr_rdi;
+ cs->c_regs.lxr_rsi = rp->lxr_rsi;
+ cs->c_regs.lxr_rbx = rp->lxr_rbx;
+ cs->c_regs.lxr_rdx = rp->lxr_rdx;
+ cs->c_regs.lxr_rdi = rp->lxr_rdi;
+ cs->c_regs.lxr_r8 = rp->lxr_r8;
+ cs->c_regs.lxr_r9 = rp->lxr_r9;
+ cs->c_regs.lxr_r10 = rp->lxr_r10;
+ cs->c_regs.lxr_r12 = rp->lxr_r12;
+ cs->c_regs.lxr_r13 = rp->lxr_r13;
+ cs->c_regs.lxr_r14 = rp->lxr_r14;
+ cs->c_regs.lxr_r15 = rp->lxr_r15;
+#else
+ cs->c_gs = rp->lxr_gs;
+#endif
+
+ if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) == -1)
+ lx_err_fatal("Unable to get affinity mask for parent "
+ "thread: %s", strerror(errno));
+
+ /*
+ * We want the new thread to return directly to the return site for
+ * the system call.
+ */
+#if defined(_LP64)
+ cs->c_retaddr = (void *)rp->lxr_rip;
+#else
+ cs->c_retaddr = (void *)rp->lxr_eip;
+#endif
+ clone_res = 0;
+
+ (void) sigfillset(&sigmask);
+
+ /*
+ * Block all signals because the thread we create won't be able to
+ * properly handle them until it's fully set up.
+ */
+ if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) {
+ lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
+ return (-errno);
+ }
+
+ rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
+
+ /*
+ * Release any pending signals
+ */
+ (void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL);
+
+ /*
+ * Wait for the child to be created and have its tid assigned.
+ */
+ if (rval == 0) {
+ while (clone_res == 0)
+ ;
+
+ rval = clone_res;
+ }
+
+ if (rval == 0)
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE);
+
+ return (rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/debug.c b/usr/src/lib/brand/lx/lx_brand/common/debug.c
new file mode 100644
index 0000000000..77131c70d3
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/debug.c
@@ -0,0 +1,154 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <thread.h>
+#include <unistd.h>
+
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+
+/* internal debugging state */
+static char *lx_debug_path = NULL; /* debug output file path */
+static char lx_debug_path_buf[MAXPATHLEN];
+
+void
+lx_debug_enable(void)
+{
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ lx_debug("lx_debug: debugging output enabled: %s", lx_debug_path);
+}
+
+void
+lx_debug_init(void)
+{
+ if (getenv("LX_DEBUG") == NULL)
+ return;
+
+ /*
+ * It's OK to use this value without any locking, as all callers can
+ * use the return value to decide whether extra work should be done
+ * before calling lx_debug().
+ *
+ * If debugging is disabled after a routine calls this function it
+ * doesn't really matter as lx_debug() will see debugging is disabled
+ * and will not output anything.
+ */
+ lx_debug_enabled = 1;
+
+ /* check if there's a debug log file specified */
+ lx_debug_path = getenv("LX_DEBUG_FILE");
+ if (lx_debug_path == NULL) {
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ }
+
+ (void) strlcpy(lx_debug_path_buf, lx_debug_path,
+ sizeof (lx_debug_path_buf));
+ lx_debug_path = lx_debug_path_buf;
+
+ lx_debug("lx_debug: debugging output ENABLED to path: \"%s\"",
+ lx_debug_path);
+}
+
+void
+lx_debug(const char *msg, ...)
+{
+ va_list ap;
+ char *buf;
+ int rv, fd, n;
+ int errno_backup;
+ int size = LX_MSG_MAXLEN + 1;
+
+ if (lx_debug_enabled == 0)
+ return;
+
+ /*
+ * If debugging is not enabled, we do not wish to have a large stack
+ * footprint. The buffer allocation is thus done conditionally,
+ * rather than as regular automatic storage.
+ */
+ if ((buf = SAFE_ALLOCA(size)) == NULL)
+ return;
+
+ errno_backup = errno;
+
+ /* prefix the message with pid/tid */
+ if ((n = snprintf(buf, size, "%u/%u: ", getpid(), thr_self())) == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* format the message */
+ va_start(ap, msg);
+ rv = vsnprintf(&buf[n], size - n, msg, ap);
+ va_end(ap);
+ if (rv == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* add a carrige return if there isn't one already */
+ if ((buf[strlen(buf) - 1] != '\n') &&
+ (strlcat(buf, "\n", size) >= size)) {
+ errno = errno_backup;
+ return;
+ }
+
+ /*
+ * Open the debugging output file. note that we don't protect
+ * ourselves against exec or fork1 here. if an mt process were
+ * to exec/fork1 while we're doing this they'd end up with an
+ * extra open desciptor in their fd space. a'well. shouldn't
+ * really matter.
+ */
+ if ((fd = open(lx_debug_path,
+ O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, 0666)) == -1) {
+ return;
+ }
+ (void) fchmod(fd, 0666);
+
+ /* we retry in case of EINTR */
+ do {
+ rv = write(fd, buf, strlen(buf));
+ } while ((rv == -1) && (errno == EINTR));
+ (void) fsync(fd);
+
+ (void) close(fd);
+ errno = errno_backup;
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/dir.c b/usr/src/lib/brand/lx/lx_brand/common/dir.c
new file mode 100644
index 0000000000..0fda763c19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/dir.c
@@ -0,0 +1,275 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <string.h>
+#include <stddef.h>
+#include <errno.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+
+#define LX_NAMEMAX 256
+
+struct lx_old_dirent {
+ long d_ino; /* not l_ino_t */
+ long d_off;
+ ushort_t d_reclen;
+ char d_name[LX_NAMEMAX];
+};
+
+/*
+ * The positioning of d_type depends on d_reclen, since it follows
+ * d_name which varies in size. Because d_type is not being emulated
+ * beyond DT_UNKNOWN (0), this can be achieved by zero-filling the
+ * structure to d_reclen bytes after the end of d_name.
+ */
+struct lx_dirent {
+ long d_ino;
+ long d_off;
+ ushort_t d_reclen;
+ char d_name[LX_NAMEMAX];
+ uchar_t d_type;
+};
+
+/* base definition of linux_dirent from readdir.c - sizeof is 12 */
+typedef struct {
+ ulong_t d_ino;
+ ulong_t d_off;
+ ushort_t d_reclen;
+ char d_name[1];
+} lx_linux_dirent_t;
+
+/*
+ * Record must be long enough to house d_name string, null terminator and
+ * d_type field. It's then padded to nearest 8-byte boundary
+ */
+#define LX_RECLEN(namelen) \
+ ((offsetof(struct lx_dirent, d_name) + 2 + (namelen) + 7) & ~7)
+
+/*
+ * Bytes after d_name string until d_reclen should be zeroed.
+ * Includes zero-terminating d_name
+ */
+#define LX_ZEROLEN(namelen) \
+ (LX_RECLEN(namelen) - \
+ ((offsetof(struct lx_dirent, d_name) + (namelen))))
+
+struct lx_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ ushort_t d_reclen;
+ uchar_t d_type;
+ char d_name[LX_NAMEMAX];
+};
+
+#define LX_RECLEN64(namelen) \
+ ((offsetof(struct lx_dirent64, d_name) + 1 + (namelen) + 7) & ~7)
+
+/*
+ * Read in one dirent structure from fd into dirp.
+ * p3 (count) is ignored.
+ */
+/*ARGSUSED*/
+long
+lx_readdir(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct lx_old_dirent *dirp = (struct lx_old_dirent *)p2;
+ uint_t count = sizeof (struct lx_old_dirent);
+ int rc = 0;
+ struct lx_old_dirent _ld;
+ struct dirent *sd = (struct dirent *)&_ld;
+
+ /*
+ * The return value from getdents is not applicable, as
+ * it might have squeezed more than one dirent in the buffer
+ * we provided.
+ *
+ * getdents() will deal with the case of dirp == NULL
+ */
+ if ((rc = getdents(fd, sd, count)) < 0)
+ return (-errno);
+
+ /*
+ * Set rc 1 (pass), or 0 (end of directory).
+ */
+ rc = (sd->d_reclen == 0) ? 0 : 1;
+
+ if (uucopy(sd, dirp, count) != 0)
+ return (-errno);
+
+ return (rc);
+}
+
+/*
+ * Read in dirent structures from p1 (fd) into p2 (buffer).
+ * p3 (count) is the size of the memory area.
+ */
+long
+lx_getdents(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ void *sbuf, *lbuf;
+ int lbufsz = (uint_t)p3;
+ int sbufsz;
+ int namelen;
+ struct dirent *sd;
+ struct lx_dirent *ld;
+ int bytes, rc;
+
+ /*
+ * readdir will pass in the full size, but some test code calls getdents
+ * directly and uses the bare struct. For these, just pretend we got
+ * a single full-size entry so we can obtain the proper errno.
+ */
+ if (lbufsz == sizeof (lx_linux_dirent_t))
+ lbufsz = sizeof (struct lx_dirent);
+
+ if (lbufsz < sizeof (struct lx_dirent))
+ return (-EINVAL);
+
+ /*
+ * The Linux dirent is bigger than the Solaris dirent. To
+ * avoid inadvertently consuming more of the directory than we can
+ * pass back to the Linux app, we hand the kernel a smaller buffer
+ * than the app handed us.
+ */
+ sbufsz = (lbufsz / 32) * 24;
+
+ sbuf = SAFE_ALLOCA(sbufsz);
+ lbuf = SAFE_ALLOCA(lbufsz);
+ if (sbuf == NULL || lbuf == NULL)
+ return (-ENOMEM);
+
+ if ((bytes = getdents(fd, sbuf, sbufsz)) < 0)
+ return (-errno);
+
+ /* munge the Solaris buffer to a linux buffer. */
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent *)lbuf;
+ rc = 0;
+ while (bytes > 0) {
+ namelen = strlen(sd->d_name);
+ if (namelen >= LX_NAMEMAX)
+ namelen = LX_NAMEMAX - 1;
+ ld->d_ino = (uint64_t)sd->d_ino;
+ ld->d_off = (int64_t)sd->d_off;
+
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ /*
+ * Zero out the rest of the record.
+ * This effective sets ld->d_type = 0 and zero-terminates d_name
+ */
+ memset(ld->d_name + namelen, '\0', LX_ZEROLEN(namelen));
+ ld->d_reclen = (ushort_t)LX_RECLEN(namelen);
+
+ bytes -= (int)sd->d_reclen;
+ rc += (int)ld->d_reclen;
+
+ sd = (struct dirent *)(void *)((caddr_t)sd + sd->d_reclen);
+ ld = (struct lx_dirent *)(void *)((caddr_t)ld + ld->d_reclen);
+ }
+
+ /* now copy the lbuf to the userland buffer */
+ assert(rc <= lbufsz);
+ if (uucopy(lbuf, buf, rc) != 0)
+ return (-EFAULT);
+
+ return (rc);
+}
+
+/*
+ * Read in dirent64 structures from p1 (fd) into p2 (buffer).
+ * p3 (count) is the size of the memory area.
+ */
+long
+lx_getdents64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (uint_t)p1;
+ void *buf = (void *)p2;
+ void *sbuf, *lbuf;
+ int lbufsz = (uint_t)p3;
+ int sbufsz;
+ int namelen;
+ struct dirent *sd;
+ struct lx_dirent64 *ld;
+ int bytes, rc;
+
+ if (lbufsz < sizeof (struct lx_dirent64))
+ return (-EINVAL);
+
+ /*
+ * The Linux dirent64 is bigger than the Solaris dirent64. To
+ * avoid inadvertently consuming more of the directory than we can
+ * pass back to the Linux app, we hand the kernel a smaller buffer
+ * than the app handed us.
+ */
+ sbufsz = (lbufsz / 32) * 24;
+
+ sbuf = SAFE_ALLOCA(sbufsz);
+ lbuf = SAFE_ALLOCA(lbufsz);
+ if (sbuf == NULL || lbuf == NULL)
+ return (-ENOMEM);
+
+ if ((bytes = getdents(fd, sbuf, sbufsz)) < 0)
+ return (-errno);
+
+ /* munge the Solaris buffer to a linux buffer. */
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent64 *)lbuf;
+ rc = 0;
+ while (bytes > 0) {
+ namelen = strlen(sd->d_name);
+ if (namelen >= LX_NAMEMAX)
+ namelen = LX_NAMEMAX - 1;
+ ld->d_ino = (uint64_t)sd->d_ino;
+ ld->d_off = (int64_t)sd->d_off;
+ ld->d_type = 0;
+
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ ld->d_name[namelen] = 0;
+ ld->d_reclen = (ushort_t)LX_RECLEN64(namelen);
+
+ bytes -= (int)sd->d_reclen;
+ rc += (int)ld->d_reclen;
+
+ sd = (struct dirent *)(void *)((caddr_t)sd + sd->d_reclen);
+ ld = (struct lx_dirent64 *)(void *)((caddr_t)ld + ld->d_reclen);
+ }
+
+ /* now copy the lbuf to the userland buffer */
+ assert(rc <= lbufsz);
+ if (uucopy(lbuf, buf, rc) != 0)
+ return (-EFAULT);
+
+ return (rc);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fcntl.c b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
new file mode 100644
index 0000000000..a9f56064b4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
@@ -0,0 +1,455 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/filio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stropts.h>
+#include <libintl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include <sys/lx_fcntl.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+static int lx_fcntl_com(int fd, int cmd, ulong_t arg);
+static void ltos_flock(struct lx_flock *l, struct flock *s);
+static void stol_flock(struct flock *s, struct lx_flock *l);
+static void ltos_flock64(struct lx_flock64 *l, struct flock64 *s);
+static void stol_flock64(struct flock64 *s, struct lx_flock64 *l);
+static short ltos_type(short l_type);
+static short stol_type(short l_type);
+static int lx_fcntl_getfl(int fd);
+static int lx_fcntl_setfl(int fd, ulong_t arg);
+
+long
+lx_dup2(uintptr_t p1, uintptr_t p2)
+{
+ int oldfd = (int)p1;
+ int newfd = (int)p2;
+ int rc;
+
+ rc = fcntl(oldfd, F_DUP2FD, newfd);
+ return ((rc == -1) ? -errno : rc);
+}
+
+long
+lx_dup3(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int oldfd = (int)p1;
+ int newfd = (int)p2;
+ int flags = (int)p3;
+ int rc;
+
+ /* The only valid flag is O_CLOEXEC. */
+ if (flags & ~LX_O_CLOEXEC)
+ return (-EINVAL);
+
+ if (oldfd == newfd)
+ return (-EINVAL);
+
+ rc = fcntl(oldfd, (flags == 0) ? F_DUP2FD : F_DUP2FD_CLOEXEC, newfd);
+ return ((rc == -1) ? -errno : rc);
+}
+
+long
+lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ int cmd = (int)p2;
+ ulong_t arg = (ulong_t)p3;
+ struct lx_flock lxflk;
+ struct flock fl;
+ int lk = 0;
+ int rc;
+
+ /*
+ * The 64-bit fcntl commands must go through fcntl64().
+ */
+ if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 ||
+ cmd == LX_F_SETLKW64)
+ return (-EINVAL);
+
+ if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
+ cmd == LX_F_GETLEASE) {
+ lx_unsupported("unsupported fcntl command: %d", cmd);
+ return (-ENOTSUP);
+ }
+
+ if (cmd == LX_F_GETLK || cmd == LX_F_SETLK ||
+ cmd == LX_F_SETLKW) {
+ if (uucopy((void *)p3, (void *)&lxflk,
+ sizeof (struct lx_flock)) != 0)
+ return (-errno);
+ lk = 1;
+ ltos_flock(&lxflk, &fl);
+ arg = (ulong_t)&fl;
+ }
+
+ rc = lx_fcntl_com(fd, cmd, arg);
+
+ if (lk)
+ stol_flock(&fl, (struct lx_flock *)p3);
+
+ return (rc);
+}
+
+long
+lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ int cmd = (int)p2;
+ struct lx_flock lxflk;
+ struct lx_flock64 lxflk64;
+ struct flock fl;
+ struct flock64 fl64;
+ int rc;
+
+ if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
+ cmd == LX_F_GETLEASE) {
+ lx_unsupported("unsupported fcntl64 command: %d", cmd);
+ return (-ENOTSUP);
+ }
+
+ if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) {
+ if (uucopy((void *)p3, (void *)&lxflk,
+ sizeof (struct lx_flock)) != 0)
+ return (-errno);
+ ltos_flock(&lxflk, &fl);
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl);
+ stol_flock(&fl, (struct lx_flock *)p3);
+ } else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \
+ cmd == LX_F_SETLK64) {
+ if (uucopy((void *)p3, (void *)&lxflk64,
+ sizeof (struct lx_flock64)) != 0)
+ return (-errno);
+ ltos_flock64(&lxflk64, &fl64);
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64);
+ stol_flock64(&fl64, (struct lx_flock64 *)p3);
+ } else {
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)p3);
+ }
+
+ return (rc);
+}
+
+static int
+lx_fcntl_com(int fd, int cmd, ulong_t arg)
+{
+ int rc = 0;
+
+ switch (cmd) {
+ case LX_F_DUPFD:
+ rc = fcntl(fd, F_DUPFD, arg);
+ break;
+
+ case LX_F_DUPFD_CLOEXEC:
+ rc = fcntl(fd, F_DUPFD_CLOEXEC, arg);
+ break;
+
+ case LX_F_GETFD:
+ rc = fcntl(fd, F_GETFD, 0);
+ break;
+
+ case LX_F_SETFD:
+ rc = fcntl(fd, F_SETFD, arg);
+ break;
+
+ case LX_F_GETFL:
+ rc = lx_fcntl_getfl(fd);
+ break;
+
+ case LX_F_SETFL:
+ rc = lx_fcntl_setfl(fd, arg);
+ break;
+
+ case LX_F_GETLK:
+ rc = fcntl(fd, F_GETLK, arg);
+ break;
+
+ case LX_F_SETLK:
+ rc = fcntl(fd, F_SETLK, arg);
+ break;
+
+ case LX_F_SETLKW:
+ rc = fcntl(fd, F_SETLKW, arg);
+ break;
+
+ case LX_F_GETLK64:
+ rc = fcntl(fd, F_GETLK64, arg);
+ break;
+
+ case LX_F_SETLK64:
+ rc = fcntl(fd, F_SETLK64, arg);
+ break;
+
+ case LX_F_SETLKW64:
+ rc = fcntl(fd, F_SETLKW64, arg);
+ break;
+
+ case LX_F_SETOWN:
+ rc = fcntl(fd, F_SETOWN, arg);
+ break;
+
+ case LX_F_GETOWN:
+ rc = fcntl(fd, F_GETOWN, arg);
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ return ((rc == -1) ? -errno : rc);
+}
+
+
+#define LTOS_FLOCK(l, s) \
+{ \
+ s->l_type = ltos_type(l->l_type); \
+ s->l_whence = l->l_whence; \
+ s->l_start = l->l_start; \
+ s->l_len = l->l_len; \
+ s->l_sysid = 0; /* not defined in linux */ \
+ s->l_pid = (pid_t)l->l_pid; \
+}
+
+#define STOL_FLOCK(s, l) \
+{ \
+ l->l_type = stol_type(s->l_type); \
+ l->l_whence = s->l_whence; \
+ l->l_start = s->l_start; \
+ l->l_len = s->l_len; \
+ l->l_pid = (int)s->l_pid; \
+}
+
+static void
+ltos_flock(struct lx_flock *l, struct flock *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock(struct flock *s, struct lx_flock *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static void
+ltos_flock64(struct lx_flock64 *l, struct flock64 *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock64(struct flock64 *s, struct lx_flock64 *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static short
+ltos_type(short l_type)
+{
+ switch (l_type) {
+ case LX_F_RDLCK:
+ return (F_RDLCK);
+ case LX_F_WRLCK:
+ return (F_WRLCK);
+ case LX_F_UNLCK:
+ return (F_UNLCK);
+ default:
+ return (-1);
+ }
+}
+
+static short
+stol_type(short l_type)
+{
+ switch (l_type) {
+ case F_RDLCK:
+ return (LX_F_RDLCK);
+ case F_WRLCK:
+ return (LX_F_WRLCK);
+ case F_UNLCK:
+ return (LX_F_UNLCK);
+ default:
+ /* can't ever happen */
+ return (0);
+ }
+}
+
+int
+lx_fcntl_getfl(int fd)
+{
+ int retval;
+ int rc;
+
+ retval = fcntl(fd, F_GETFL, 0);
+
+ if ((retval & O_ACCMODE) == O_RDONLY)
+ rc = LX_O_RDONLY;
+ else if ((retval & O_ACCMODE) == O_WRONLY)
+ rc = LX_O_WRONLY;
+ else
+ rc = LX_O_RDWR;
+ /* O_NDELAY != O_NONBLOCK, so we need to check for both */
+ if (retval & O_NDELAY)
+ rc |= LX_O_NDELAY;
+ if (retval & O_NONBLOCK)
+ rc |= LX_O_NONBLOCK;
+ if (retval & O_APPEND)
+ rc |= LX_O_APPEND;
+ if (retval & O_SYNC)
+ rc |= LX_O_SYNC;
+ if (retval & O_LARGEFILE)
+ rc |= LX_O_LARGEFILE;
+ if (retval & FASYNC)
+ rc |= LX_O_ASYNC;
+
+ return (rc);
+}
+
+int
+lx_fcntl_setfl(int fd, ulong_t arg)
+{
+ int new_arg;
+
+ new_arg = 0;
+ /* LX_O_NDELAY == LX_O_NONBLOCK, so we only check for one */
+ if (arg & LX_O_NDELAY)
+ new_arg |= O_NONBLOCK;
+ if (arg & LX_O_APPEND)
+ new_arg |= O_APPEND;
+ if (arg & LX_O_SYNC)
+ new_arg |= O_SYNC;
+ if (arg & LX_O_LARGEFILE)
+ new_arg |= O_LARGEFILE;
+ if (arg & LX_O_ASYNC)
+ new_arg |= FASYNC;
+
+ return ((fcntl(fd, F_SETFL, new_arg) == 0) ? 0 : -errno);
+}
+
+/*
+ * flock() applies or removes an advisory lock on the file
+ * associated with the file descriptor fd.
+ *
+ * Stolen verbatim from usr/src/ucblib/libucb/port/sys/flock.c
+ *
+ * operation is: LX_LOCK_SH, LX_LOCK_EX, LX_LOCK_UN, LX_LOCK_NB
+ */
+long
+lx_flock(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ int operation = (int)p2;
+ struct flock fl;
+ int cmd;
+ int ret;
+
+ /* In non-blocking lock, use F_SETLK for cmd, F_SETLKW otherwise */
+ if (operation & LX_LOCK_NB) {
+ cmd = F_SETLK;
+ operation &= ~LX_LOCK_NB; /* turn off this bit */
+ } else
+ cmd = F_SETLKW;
+
+ switch (operation) {
+ case LX_LOCK_UN:
+ fl.l_type = F_UNLCK;
+ break;
+ case LX_LOCK_SH:
+ fl.l_type = F_RDLCK;
+ break;
+ case LX_LOCK_EX:
+ fl.l_type = F_WRLCK;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ fl.l_whence = 0;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ ret = fcntl(fd, cmd, &fl);
+
+ if (ret == -1 && errno == EACCES)
+ return (-EWOULDBLOCK);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+/*
+ * Based on Illumos posix_fadvise which does nothing. The only difference is
+ * that on Linux an fd refering to a pipe or FIFO returns EINVAL.
+ * The Linux POSIX_FADV_* values are the same as the Illumos values.
+ * See how glibc calls fadvise64; the offeset is a 64bit value, but the length
+ * is not, whereas fadvise64_64 passes both the offset and length as 64bit
+ * values.
+ */
+/* ARGSUSED */
+long
+lx_fadvise64(uintptr_t p1, off64_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ int advice = (int)p4;
+ int32_t len = (int32_t)p3;
+ struct stat64 statb;
+
+ switch (advice) {
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_RANDOM:
+ case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_WILLNEED:
+ case POSIX_FADV_DONTNEED:
+ case POSIX_FADV_NOREUSE:
+ break;
+ default:
+ return (-EINVAL);
+ }
+ if (len < 0)
+ return (-EINVAL);
+ if (fstat64(fd, &statb) != 0)
+ return (-EBADF);
+ if (S_ISFIFO(statb.st_mode))
+ return (-ESPIPE);
+ return (0);
+}
+
+long
+lx_fadvise64_64(uintptr_t p1, off64_t p2, off64_t p3, uintptr_t p4)
+{
+
+ if (p3 < 0)
+ return (-EINVAL);
+
+ return (lx_fadvise64(p1, p2, 0, p4));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/file.c b/usr/src/lib/brand/lx/lx_brand/common/file.c
new file mode 100644
index 0000000000..56201035ff
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/file.c
@@ -0,0 +1,918 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <fcntl.h>
+#include <string.h>
+#include <utime.h>
+#include <atomic.h>
+#include <sys/syscall.h>
+
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_fcntl.h>
+
+#define LX_UTIME_NOW ((1l << 30) - 1l)
+#define LX_UTIME_OMIT ((1l << 30) - 2l)
+
+static int
+install_checkpath(uintptr_t p1)
+{
+ int saved_errno = errno;
+ char path[MAXPATHLEN];
+
+ /*
+ * The "dev" RPM package wants to modify /dev/pts, but /dev/pts is a
+ * lofs mounted copy of /native/dev/pts, so that won't work.
+ *
+ * Instead, if we're trying to modify /dev/pts from install mode, just
+ * act as if it succeded.
+ */
+ if (uucopystr((void *)p1, path, MAXPATHLEN) == -1)
+ return (-errno);
+
+ if (strcmp(path, "/dev/pts") == 0)
+ return (0);
+
+ errno = saved_errno;
+ return (-errno);
+}
+
+/*
+ * Convert linux LX_AT_* flags to solaris AT_* flags but skip verifying allowed
+ * flags have been passed. This also allows EACCESS/REMOVEDIR to be translated
+ * correctly since on linux they have the same value.
+ *
+ * Some code can actually pass in other bits in the flag. We may have to simply
+ * ignore these, as indicated by the enforce parameter. See lx_fchmodat for
+ * another example of this type of behavior.
+ */
+int
+ltos_at_flag(int lflag, int allow, boolean_t enforce)
+{
+ int sflag = 0;
+
+ if ((lflag & LX_AT_EACCESS) && (allow & AT_EACCESS)) {
+ lflag &= ~LX_AT_EACCESS;
+ sflag |= AT_EACCESS;
+ }
+
+ if ((lflag & LX_AT_REMOVEDIR) && (allow & AT_REMOVEDIR)) {
+ lflag &= ~LX_AT_REMOVEDIR;
+ sflag |= AT_REMOVEDIR;
+ }
+
+ if ((lflag & LX_AT_SYMLINK_NOFOLLOW) && (allow & AT_SYMLINK_NOFOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_NOFOLLOW;
+ sflag |= AT_SYMLINK_NOFOLLOW;
+ }
+
+ /* right now solaris doesn't have a _FOLLOW flag, so use a fake one */
+ if ((lflag & LX_AT_SYMLINK_FOLLOW) && (allow & LX_AT_SYMLINK_FOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_FOLLOW;
+ sflag |= LX_AT_SYMLINK_FOLLOW;
+ }
+
+ /* If lflag is not zero than some flags did not hit the above code. */
+ if (enforce && lflag)
+ return (-EINVAL);
+
+ return (sflag);
+}
+
+
+/*
+ * Miscellaneous file-related system calls.
+ */
+
+/*
+ * Linux creates half-duplex pipes and Illumos creates full-duplex pipes.
+ * Thus, to get the correct semantics, we need to setup pipes in the kernel's
+ * lx brand module.
+ */
+
+long
+lx_pipe2(uintptr_t p1, uintptr_t p2)
+{
+ int flags = 0;
+ int r;
+
+ if (p2 & LX_O_NONBLOCK) {
+ flags |= O_NONBLOCK;
+ p2 &= ~LX_O_NONBLOCK;
+ }
+ if (p2 & LX_O_CLOEXEC) {
+ flags |= O_CLOEXEC;
+ p2 &= ~LX_O_CLOEXEC;
+ }
+ if (p2 != 0)
+ return (-EINVAL);
+
+ r = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_pipe2, p1, flags);
+
+ return ((r == -1) ? -errno : r);
+}
+
+/*
+ * On Linux, even root cannot create a link to a directory, so we have to
+ * add an explicit check.
+ */
+long
+lx_link(uintptr_t p1, uintptr_t p2)
+{
+ char *from = (char *)p1;
+ char *to = (char *)p2;
+ struct stat64 statbuf;
+
+ if ((stat64(from, &statbuf) == 0) && S_ISDIR(statbuf.st_mode))
+ return (-EPERM);
+
+ return (link(from, to) ? -errno : 0);
+}
+
+/*
+ * On Linux, an unlink of a directory returns EISDIR, not EPERM.
+ */
+long
+lx_unlink(uintptr_t p)
+{
+ char *pathname = (char *)p;
+ struct stat64 statbuf;
+
+ if ((lstat64(pathname, &statbuf) == 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+
+ return (unlink(pathname) ? -errno : 0);
+}
+
+long
+lx_unlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char *pathname = (char *)p1;
+ int flag = (int)p2;
+ struct stat64 statbuf;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(flag, AT_REMOVEDIR, B_TRUE);
+ if (flag < 0)
+ return (-EINVAL);
+
+ if (!(flag & AT_REMOVEDIR)) {
+ /* Behave like unlink() */
+ if ((fstatat64(atfd, pathname, &statbuf, AT_SYMLINK_NOFOLLOW) ==
+ 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+ }
+
+ return (unlinkat(atfd, pathname, flag) ? -errno : 0);
+}
+
+/*
+ * fsync() and fdatasync() - On Illumos, these calls translate into a common
+ * fdsync() syscall with a different parameter. fsync is handled in the
+ * fsync wrapper.
+ */
+long
+lx_fsync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fsync((int)fd) ? -errno : 0);
+}
+
+long
+lx_fdatasync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fdatasync((int)fd) ? -errno : 0);
+}
+
+/*
+ * Linux, unlike Solaris, ALWAYS resets the setuid and setgid bits on a
+ * chown/fchown regardless of whether it was done by root or not. Therefore,
+ * we must do extra work after each chown/fchown call to emulate this behavior.
+ */
+#define SETUGID (S_ISUID | S_ISGID)
+
+/*
+ * [lf]chown16() - Translate the uid/gid and pass onto the real functions.
+ */
+long
+lx_chown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ struct stat64 statbuf;
+
+ if (chown(filename, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)))
+ return (-errno);
+
+ if (stat64(filename, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) chmod(filename, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+long
+lx_fchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct stat64 statbuf;
+
+ if (fchown(fd, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)))
+ return (-errno);
+
+ if (fstat64(fd, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) fchmod(fd, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+long
+lx_lchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lchown((char *)p1, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)) ? -errno : 0);
+}
+
+long
+lx_chown(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ struct stat64 statbuf;
+ int ret;
+
+ ret = chown(filename, (uid_t)p2, (gid_t)p3);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ if (stat64(filename, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) chmod(filename, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+long
+lx_fchown(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct stat64 statbuf;
+
+ if (fchown(fd, (uid_t)p2, (gid_t)p3))
+ return (-errno);
+
+ if (fstat64(fd, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) fchmod(fd, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+long
+lx_chmod(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = chmod((const char *)p1, (mode_t)p2);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+long
+lx_utime(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = utime((const char *)p1, (const struct utimbuf *)p2);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * llseek() - The Linux implementation takes an additional parameter, which is
+ * the resulting position in the file.
+ */
+long
+lx_llseek(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ offset_t ret;
+ offset_t *res = (offset_t *)p4;
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */
+ if ((int)p5 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, LX_32TO64(p3, p2), p5)) < 0)
+ return (-errno);
+
+ *res = ret;
+ return (0);
+}
+#endif
+
+/*
+ * seek() - For 32-bit lx, when the resultant file offset cannot be represented
+ * in 32 bits, Linux performs the seek but Illumos doesn't, though both set
+ * EOVERFLOW. We call llseek() and then check to see if we need to return
+ * EOVERFLOW.
+ */
+long
+lx_lseek(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ offset_t offset = (offset_t)(off_t)(p2); /* sign extend */
+ offset_t ret;
+#if defined(_ILP32)
+ off_t ret32;
+#endif
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Illumos */
+ if ((int)p3 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, offset, p3)) < 0)
+ return (-errno);
+
+#if defined(_LP64)
+ return (ret);
+#else
+ ret32 = (off_t)ret;
+ if ((offset_t)ret32 == ret)
+ return (ret32);
+ else
+ return (-EOVERFLOW);
+#endif
+}
+
+/*
+ * Neither Illumos nor Linux actually returns anything to the caller, but glibc
+ * expects to see SOME value returned, so placate it and return 0.
+ */
+long
+lx_sync(void)
+{
+ sync();
+ return (0);
+}
+
+long
+lx_rmdir(uintptr_t p1)
+{
+ int r;
+ char *nm = (char *)p1;
+
+ r = rmdir(nm);
+ if (r < 0) {
+ int terr = errno;
+
+ /*
+ * On both Illumos and Linux rmdir returns EINVAL if the last
+ * component of the path is '.', but on Illumos we also return
+ * this errno if we're trying to remove the CWD. Unfortunately,
+ * at least the LTP test suite assumes that it can rmdir the
+ * CWD, so we need handle this. We try to get out of the
+ * directory we're trying to remove.
+ */
+ if (terr == EINVAL) {
+ int l;
+
+ l = strlen(nm);
+ if (l >= 2 && !(nm[l - 2] == '/' && nm[l - 1] == '.')) {
+ if (chdir("..") == 0 && rmdir(nm) == 0) {
+ return (0);
+ }
+ }
+ }
+
+ return ((terr == EEXIST) ? -ENOTEMPTY : -terr);
+ }
+ return (0);
+}
+
+/*
+ * Exactly the same as Illumos' sysfs(2), except Linux numbers their fs indices
+ * starting at 0, and Illumos starts at 1.
+ */
+long
+lx_sysfs(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int option = (int)p1;
+ int res;
+
+ /*
+ * Linux actually doesn't have #defines for these; their sysfs(2)
+ * man page literally defines the "option" field as being 1, 2 or 3,
+ * corresponding to Solaris' GETFSIND, GETFSTYP and GETNFSTYP,
+ * respectively.
+ */
+ switch (option) {
+ case 1:
+ if ((res = sysfs(GETFSIND, (const char *)p2)) < 0)
+ return (-errno);
+
+ return (res - 1);
+
+ case 2:
+ if ((res = sysfs(GETFSTYP, (int)p2 + 1,
+ (char *)p3)) < 0)
+ return (-errno);
+
+ return (0);
+
+ case 3:
+ if ((res = sysfs(GETNFSTYP)) < 0)
+ return (-errno);
+
+ return (res);
+
+ default:
+ break;
+ }
+
+ return (-EINVAL);
+}
+
+/*
+ * For Illumos, access() does this:
+ * If the process has appropriate privileges, an implementation may indicate
+ * success for X_OK even if none of the execute file permission bits are set.
+ *
+ * But for Linux, access() does this:
+ * If the calling process is privileged (i.e., its real UID is zero), then
+ * an X_OK check is successful for a regular file if execute permission is
+ * enabled for any of the file owner, group, or other.
+ *
+ * Linux used to behave more like Illumos on older kernels:
+ * In kernel 2.4 (and earlier) there is some strangeness in the handling
+ * of X_OK tests for superuser. If all categories of execute permission
+ * are disabled for a nondirectory file, then the only access() test that
+ * returns -1 is when mode is specified as just X_OK; if R_OK or W_OK is
+ * also specified in mode, then access() returns 0 for such files. Early
+ * 2.6 kernels (up to and including 2.6.3) also behaved in the same way as
+ * kernel 2.4.
+ *
+ * So we need to handle the case where a privileged process is checking for
+ * X_OK but none of the execute bits are set on the file. We'll keep the old
+ * 2.4 behavior for 2.4 emulation but use the new behavior for any other
+ * kernel rev. (since 2.6.3 is uninteresting, no relevant distros use that).
+ */
+long
+lx_access(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ int mode = (mode_t)p2;
+ int ret;
+
+ ret = access(path, mode);
+
+ if (ret == 0 && (mode & X_OK) && strncmp(lx_release, "2.4", 3) != 0 &&
+ getuid() == 0) {
+ /* check for incorrect execute success */
+ struct stat64 sb;
+
+ if (stat64(path, &sb) < 0)
+ return (-errno);
+
+ if ((sb.st_mode & S_IFMT) == S_IFREG &&
+ !(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ /* no execute bits are set in the mode */
+ return (-EACCES);
+ }
+ }
+
+ return (ret ? -errno : 0);
+
+}
+
+long
+lx_faccessat(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int atfd = (int)p1;
+ char *path = (char *)p2;
+ int mode = (mode_t)p3;
+ int flag = (int)p4;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(flag, AT_EACCESS, B_FALSE);
+ if (flag < 0)
+ return (-EINVAL);
+
+ return (faccessat(atfd, path, mode, flag) ? -errno : 0);
+}
+
+long
+lx_futimesat(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)p1;
+ char *path = (char *)p2;
+ struct timeval *times = (struct timeval *)p3;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ return (futimesat(atfd, path, times) ? -errno : 0);
+}
+
+/*
+ * From the utimensat man page:
+ * On Linux, futimens() is a library function implemented on top of the
+ * utimensat() system call. To support this, the Linux utimensat() system
+ * call implements a nonstandard feature: if pathname is NULL, then the
+ * call modifies the timestamps of the file referred to by the file
+ * descriptor dirfd (which may refer to any type of file). Using this
+ * feature, the call futimens(fd, times) is implemented as:
+ *
+ * utimensat(fd, NULL, times, 0);
+ *
+ * Some of the returns fail here. Linux allows the time to be modified if:
+ *
+ * the caller must have write access to the file
+ * or
+ * the caller's effective user ID must match the owner of the file
+ * or
+ * the caller must have appropriate privileges
+ *
+ * We behave differently. We fail with EPERM if:
+ *
+ * the calling process's euid has write access to the file but does not match
+ * the owner of the file and the calling process does not have the
+ * appropriate privileges
+ *
+ * This causes some of the LTP utimensat tests to fail because they expect an
+ * unprivileged process can update the time on a file it can write but does not
+ * own. There are also other LTP failures when the test uses attributes
+ * (e.g. chattr a+) and expects a failure, but we succeed.
+ */
+long
+lx_utimensat(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ const char *path = (const char *)p2;
+ const timespec_t *times = (const timespec_t *)p3;
+ timespec_t ts[2];
+ int flag = (int)p4;
+
+ if (times != NULL) {
+ if (uucopy((void *)p3, ts, sizeof (ts)) == -1)
+ return (-errno);
+
+ if (ts[0].tv_nsec == LX_UTIME_NOW)
+ ts[0].tv_nsec = UTIME_NOW;
+ if (ts[1].tv_nsec == LX_UTIME_NOW)
+ ts[1].tv_nsec = UTIME_NOW;
+
+ if (ts[0].tv_nsec == LX_UTIME_OMIT)
+ ts[0].tv_nsec = UTIME_OMIT;
+ if (ts[1].tv_nsec == LX_UTIME_OMIT)
+ ts[1].tv_nsec = UTIME_OMIT;
+
+ times = (const timespec_t *)ts;
+ }
+
+ if (flag == LX_AT_SYMLINK_NOFOLLOW)
+ flag = AT_SYMLINK_NOFOLLOW;
+
+ if (fd == LX_AT_FDCWD)
+ fd = AT_FDCWD;
+
+ if (path == NULL) {
+ return (futimens(fd, times) ? -errno : 0);
+ } else {
+ return (utimensat(fd, path, times, flag) ? -errno : 0);
+ }
+}
+
+/*
+ * Constructs an absolute path string in buf from the path of fd and the
+ * relative path string pointed to by "p1". This is required for emulating
+ * *at() system calls.
+ * Example:
+ * If the path of fd is "/foo/bar" and path is "etc" the string returned is
+ * "/foo/bar/etc", if the fd is a file fd then it fails with ENOTDIR.
+ * If path is absolute then no modifcations are made to it when copied.
+ */
+static int
+getpathat(int fd, uintptr_t p1, char *outbuf, size_t outbuf_size)
+{
+ char pathbuf[MAXPATHLEN];
+ char fdpathbuf[MAXPATHLEN];
+ char *fdpath;
+ struct stat64 statbuf;
+
+ if (uucopystr((void *)p1, pathbuf, MAXPATHLEN) == -1)
+ return (-errno);
+
+ /* If the path is absolute then we can early out */
+ if ((pathbuf[0] == '/') || (fd == LX_AT_FDCWD)) {
+ (void) strlcpy(outbuf, pathbuf, outbuf_size);
+ return (0);
+ }
+
+ fdpath = lx_fd_to_path(fd, fdpathbuf, sizeof (fdpathbuf));
+ if (fdpath == NULL)
+ return (-EBADF);
+
+ if ((fstat64(fd, &statbuf) < 0))
+ return (-EBADF);
+
+ if (!S_ISDIR(statbuf.st_mode))
+ return (-ENOTDIR);
+
+ if (snprintf(outbuf, outbuf_size, "%s/%s", fdpath, pathbuf) >
+ (outbuf_size-1))
+ return (-ENAMETOOLONG);
+
+ return (0);
+}
+
+long
+lx_mkdirat(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)p1;
+ mode_t mode = (mode_t)p3;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (mkdir(pathbuf, mode) ? -errno : 0);
+}
+
+long
+lx_mknodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_mknod((uintptr_t)pathbuf, p2, p3));
+}
+
+long
+lx_symlinkat(uintptr_t p1, uintptr_t ext1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf));
+ if (ret < 0) {
+ if (ret == -EBADF) {
+ /*
+ * Try to figure out correct Linux errno. We know path
+ * is relative. Check if we have a fd for a dir which
+ * has been removed.
+ */
+ if (atfd != -1 && lx_fd_to_path(atfd, pathbuf,
+ sizeof (pathbuf)) == NULL)
+ ret = -ENOENT;
+ }
+ return (ret);
+ }
+
+ return (symlink((char *)p1, pathbuf) ? -errno : 0);
+}
+
+long
+lx_linkat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2,
+ uintptr_t p3)
+{
+ int atfd1 = (int)ext1;
+ int atfd2 = (int)ext2;
+ char pathbuf1[MAXPATHLEN];
+ char pathbuf2[MAXPATHLEN];
+ int ret;
+
+ /*
+ * The flag specifies whether the hardlink will point to a symlink or
+ * not, on solaris the default behaviour of link() is to dereference a
+ * symlink and there is no obvious way to trigger the other behaviour.
+ * So for now we just ignore this flag and act like link().
+ */
+ int flag = p3;
+
+ /* return proper error for invalid flags */
+ if ((flag & ~(LX_AT_SYMLINK_FOLLOW | LX_AT_EMPTY_PATH)) != 0)
+ return (-EINVAL);
+
+ ret = getpathat(atfd1, p1, pathbuf1, sizeof (pathbuf1));
+ if (ret < 0) {
+ if (ret == -EBADF) {
+ /*
+ * Try to figure out correct Linux errno. We know path
+ * is relative. Check if we have a fd for a dir which
+ * has been removed.
+ */
+ if (atfd1 != -1 && lx_fd_to_path(atfd1, pathbuf1,
+ sizeof (pathbuf1)) == NULL)
+ ret = -ENOENT;
+ }
+ return (ret);
+ }
+
+ ret = getpathat(atfd2, p2, pathbuf2, sizeof (pathbuf2));
+ if (ret < 0) {
+ if (ret == -EBADF) {
+ /*
+ * Try to figure out correct Linux errno. We know path
+ * is relative. Check if we have a fd for a dir which
+ * has been removed.
+ */
+ if (atfd2 != -1 && lx_fd_to_path(atfd2, pathbuf2,
+ sizeof (pathbuf2)) == NULL)
+ ret = -ENOENT;
+ }
+ return (ret);
+ }
+
+ return (lx_link((uintptr_t)pathbuf1, (uintptr_t)pathbuf2));
+}
+
+long
+lx_readlink(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int ret;
+
+ if ((size_t)p3 <= 0)
+ return (-EINVAL);
+
+ ret = readlink((char *)p1, (char *)p2, (size_t)p3);
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+long
+lx_readlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ if ((size_t)p3 <= 0)
+ return (-EINVAL);
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ ret = readlink(pathbuf, (char *)p2, (size_t)p3);
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+long
+lx_fchownat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4)
+{
+ int flag;
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW, B_TRUE);
+ if (flag < 0)
+ return (-EINVAL);
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ if (flag & AT_SYMLINK_NOFOLLOW)
+ return (lchown(pathbuf, (uid_t)p2, (gid_t)p3) ? -errno : 0);
+ else
+ return (lx_chown((uintptr_t)pathbuf, p2, p3));
+}
+
+/*ARGSUSED*/
+long
+lx_fchmodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ /*
+ * It seems that at least some versions of glibc do not set or clear
+ * the flags arg, so checking them will result in random behaviour.
+ */
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_chmod((uintptr_t)pathbuf, p2));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c
new file mode 100644
index 0000000000..6e72e2c87e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * fork() and vfork()
+ *
+ * These cannot be pass thru system calls because we need libc to do its own
+ * initialization or else bad things will happen (i.e. ending up with a bad
+ * schedctl page). On Linux, there is no such thing as forkall(), so we use
+ * fork1() here.
+ */
+long
+lx_fork(void)
+{
+ int ret = fork1();
+
+ if (ret == 0) {
+ if (lx_is_rpm)
+ (void) sleep(lx_rpm_delay);
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK);
+ }
+
+ return (ret == -1 ? -errno : ret);
+}
+
+/*
+ * For vfork(), we have a serious problem because the child is not allowed to
+ * return from the current frame because it will corrupt the parent's stack.
+ * Since the semantics of vfork() are rather ill-defined (other than "it's
+ * faster than fork"), we should theoretically be safe by falling back to
+ * fork1().
+ */
+long
+lx_vfork(void)
+{
+ int ret = fork1();
+
+ if (ret == 0) {
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK);
+ }
+
+ return (ret == -1 ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/id.c b/usr/src/lib/brand/lx/lx_brand/common/id.c
new file mode 100644
index 0000000000..cd5baefa7d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/id.c
@@ -0,0 +1,271 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+#include <sys/ucred.h>
+#include <sys/syscall.h>
+#include <alloca.h>
+#include <errno.h>
+#include <ucred.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/lx_misc.h>
+
+
+long
+lx_setuid16(uintptr_t uid)
+{
+ return ((setuid(LX_UID16_TO_UID32((lx_uid16_t)uid))) ? -errno : 0);
+}
+
+long
+lx_getuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(getuid()));
+}
+
+long
+lx_setgid16(uintptr_t gid)
+{
+ return ((setgid(LX_GID16_TO_GID32((lx_gid16_t)gid))) ? -errno : 0);
+}
+
+long
+lx_getgid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getgid()));
+}
+
+long
+lx_geteuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(geteuid()));
+}
+
+long
+lx_getegid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getegid()));
+}
+
+long
+lx_geteuid(void)
+{
+ return ((int)geteuid());
+}
+
+long
+lx_getegid(void)
+{
+ return ((int)getegid());
+}
+
+long
+lx_getresuid(uintptr_t ruid, uintptr_t euid, uintptr_t suid)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ ucred_t *cr;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_ruid = (lx_uid_t)ucred_getruid(cr)) == (lx_uid_t)-1) ||
+ ((lx_euid = (lx_uid_t)ucred_geteuid(cr)) == (lx_uid_t)-1) ||
+ ((lx_suid = (lx_uid_t)ucred_getsuid(cr)) == (lx_uid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_ruid, (void *)ruid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid, (void *)euid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid, (void *)suid, sizeof (lx_uid_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresuid16(uintptr_t ruid16, uintptr_t euid16, uintptr_t suid16)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ lx_uid16_t lx_ruid16, lx_euid16, lx_suid16;
+ int rv;
+
+ if ((rv = lx_getresuid((uintptr_t)&lx_ruid, (uintptr_t)&lx_euid,
+ (uintptr_t)&lx_suid)) != 0)
+ return (rv);
+
+ lx_ruid16 = LX_UID32_TO_UID16(lx_ruid);
+ lx_euid16 = LX_UID32_TO_UID16(lx_euid);
+ lx_suid16 = LX_UID32_TO_UID16(lx_suid);
+
+ if (uucopy(&lx_ruid16, (void *)ruid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid16, (void *)euid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid16, (void *)suid16, sizeof (lx_uid16_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresgid(uintptr_t rgid, uintptr_t egid, uintptr_t sgid)
+{
+ ucred_t *cr;
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_rgid = (lx_gid_t)ucred_getrgid(cr)) == (lx_gid_t)-1) ||
+ ((lx_egid = (lx_gid_t)ucred_getegid(cr)) == (lx_gid_t)-1) ||
+ ((lx_sgid = (lx_gid_t)ucred_getsgid(cr)) == (lx_gid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_rgid, (void *)rgid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid, (void *)egid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid, (void *)sgid, sizeof (lx_gid_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresgid16(uintptr_t rgid16, uintptr_t egid16, uintptr_t sgid16)
+{
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ lx_gid16_t lx_rgid16, lx_egid16, lx_sgid16;
+ int rv;
+
+ if ((rv = lx_getresgid((uintptr_t)&lx_rgid, (uintptr_t)&lx_egid,
+ (uintptr_t)&lx_sgid)) != 0)
+ return (rv);
+
+ lx_rgid16 = LX_UID32_TO_UID16(lx_rgid);
+ lx_egid16 = LX_UID32_TO_UID16(lx_egid);
+ lx_sgid16 = LX_UID32_TO_UID16(lx_sgid);
+
+ if (uucopy(&lx_rgid16, (void *)rgid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid16, (void *)egid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid16, (void *)sgid16, sizeof (lx_gid16_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_setreuid16(uintptr_t ruid, uintptr_t euid)
+{
+ return ((setreuid(LX_UID16_TO_UID32((lx_uid16_t)ruid),
+ LX_UID16_TO_UID32((lx_uid16_t)euid))) ? -errno : 0);
+}
+
+long
+lx_setregid16(uintptr_t rgid, uintptr_t egid)
+{
+ return ((setregid(LX_UID16_TO_UID32((lx_gid16_t)rgid),
+ LX_UID16_TO_UID32((lx_gid16_t)egid))) ? -errno : 0);
+}
+
+/*
+ * The lx brand cannot support the setfs[ug]id16/setfs[ug]id calls as that
+ * would require significant rework of Solaris' privilege mechanisms, so
+ * instead return the current effective [ug]id.
+ *
+ * In Linux, fsids track effective IDs, so returning the effective IDs works
+ * as a substitute; returning the current value also denotes failure of the
+ * call if the caller had specified something different. We don't need to
+ * worry about setting error codes because the Linux calls don't set any.
+ */
+/*ARGSUSED*/
+long
+lx_setfsuid16(uintptr_t fsuid16)
+{
+ return (lx_geteuid16());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsgid16(uintptr_t fsgid16)
+{
+ return (lx_getegid16());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsuid(uintptr_t fsuid)
+{
+ return (geteuid());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsgid(uintptr_t fsgid)
+{
+ return (getegid());
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/iovec.c b/usr/src/lib/brand/lx/lx_brand/common/iovec.c
new file mode 100644
index 0000000000..d4b74e8daf
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/iovec.c
@@ -0,0 +1,283 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/uio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <alloca.h>
+#include <string.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+
+static int
+lx_is_directory(int fd)
+{
+ struct stat64 sbuf;
+
+ if (fstat64(fd, &sbuf) < 0)
+ sbuf.st_mode = 0;
+
+ return ((sbuf.st_mode & S_IFMT) == S_IFDIR);
+}
+
+#if defined(_LP64)
+long
+lx_pread(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ off64_t off = (off64_t)p4;
+ ssize_t ret;
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ ret = pread64(fd, buf, nbyte, off);
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * On Linux, the pwrite(2) system call behaves identically to Solaris except
+ * in the case of the file being opened with O_APPEND. In that case Linux's
+ * pwrite(2) ignores the offset parameter and instead appends the data to the
+ * file without modifying the current seek pointer.
+ */
+long
+lx_pwrite(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ off64_t off = (off64_t)p4;
+ ssize_t ret;
+ int rval;
+ struct stat64 statbuf;
+
+ if ((rval = fcntl(fd, F_GETFL, 0)) < 0)
+ return (-errno);
+
+ if (!(rval & O_APPEND)) {
+ ret = pwrite64(fd, buf, nbyte, off);
+ } else if ((ret = fstat64(fd, &statbuf)) == 0) {
+ ret = pwrite64(fd, buf, nbyte, statbuf.st_size);
+ }
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+#else /* 32 bit */
+
+long
+lx_pread64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ uintptr_t off_lo = p4;
+ uintptr_t off_hi = p5;
+ ssize_t ret;
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ ret = pread64(fd, buf, nbyte, (off64_t)LX_32TO64(off_lo, off_hi));
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * On Linux, the pwrite(2) system call behaves identically to Solaris except
+ * in the case of the file being opened with O_APPEND. In that case Linux's
+ * pwrite(2) ignores the offset parameter and instead appends the data to the
+ * file without modifying the current seek pointer.
+ */
+long
+lx_pwrite64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ uintptr_t off_lo = p4;
+ uintptr_t off_hi = p5;
+ ssize_t ret;
+ int rval;
+ struct stat64 statbuf;
+
+ if ((rval = fcntl(fd, F_GETFL, 0)) < 0)
+ return (-errno);
+
+ if (!(rval & O_APPEND)) {
+ ret = pwrite64(fd, buf, nbyte,
+ (off64_t)LX_32TO64(off_lo, off_hi));
+ } else if ((ret = fstat64(fd, &statbuf)) == 0) {
+ ret = pwrite64(fd, buf, nbyte, statbuf.st_size);
+ }
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+#endif
+
+/*
+ * Implementation of Linux readv() and writev() system calls.
+ *
+ * The Linux system calls differ from the Solaris system calls in a few key
+ * areas:
+ *
+ * - On Solaris, the maximum number of I/O vectors that can be passed to readv()
+ * or writev() is IOV_MAX (16). Linux has a much larger restriction (1024).
+ *
+ * - Passing 0 as a vector count is an error on Solaris, but on Linux results
+ * in a return value of 0. Even though the man page says the opposite.
+ *
+ * - If the Nth vector results in an error, Solaris will return an error code
+ * for the entire operation. Linux only returns an error if there has been
+ * no data transferred yet. Otherwise, it returns the number of bytes
+ * transferred up until that point.
+ *
+ * In order to accomodate these differences, we implement these functions as a
+ * series of ordinary read() or write() calls.
+ */
+
+#define LX_IOV_MAX 1024 /* Also called MAX_IOVEC */
+
+static int
+lx_iovec_copy_and_check(const struct iovec *iovp, struct iovec *iov, int count)
+{
+#if defined(_ILP32)
+ int i;
+ ssize_t cnt = 0;
+#endif
+
+ if (uucopy(iovp, (void *)iov, count * sizeof (struct iovec)) != 0)
+ return (-errno);
+
+#if defined(_ILP32)
+ for (i = 0; i < count; i++) {
+ cnt += iov[i].iov_len;
+ if (iov[i].iov_len < 0 || cnt < 0)
+ return (-EINVAL);
+ }
+#endif
+
+ return (0);
+}
+
+long
+lx_readv(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ const struct iovec *iovp = (const struct iovec *)p2;
+ int count = (int)p3;
+ struct iovec *iov;
+ ssize_t total = 0, ret;
+ int i;
+
+ if (count == 0)
+ return (0);
+
+ if (count < 0 || count > LX_IOV_MAX)
+ return (-EINVAL);
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ iov = SAFE_ALLOCA(count * sizeof (struct iovec));
+ if (iov == NULL)
+ return (-ENOMEM);
+ if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0)
+ return (ret);
+
+ for (i = 0; i < count; i++) {
+ ret = read(fd, iov[i].iov_base, iov[i].iov_len);
+
+ if (ret < 0) {
+ if (total > 0)
+ return (total);
+ return (-errno);
+ }
+
+ total += ret;
+ }
+
+ return (total);
+}
+
+long
+lx_writev(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ const struct iovec *iovp = (const struct iovec *)p2;
+ int count = (int)p3;
+ struct iovec *iov;
+ ssize_t total = 0, ret;
+ int i;
+
+ if (count == 0)
+ return (0);
+
+ if (count < 0 || count > LX_IOV_MAX)
+ return (-EINVAL);
+
+ iov = SAFE_ALLOCA(count * sizeof (struct iovec));
+ if (iov == NULL)
+ return (-ENOMEM);
+ if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0)
+ return (ret);
+
+ for (i = 0; i < count; i++) {
+ ret = write(fd, iov[i].iov_base, iov[i].iov_len);
+
+ if (ret < 0) {
+ if (total > 0)
+ return (total);
+ return (-errno);
+ }
+
+ total += ret;
+ }
+
+ return (total);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
new file mode 100644
index 0000000000..84e115bd4e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -0,0 +1,1775 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#include <sys/inttypes.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/systm.h>
+#include <sys/auxv.h>
+#include <sys/frame.h>
+#include <zone.h>
+#include <sys/brand.h>
+#include <sys/epoll.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <synch.h>
+#include <libelf.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <utime.h>
+#include <dirent.h>
+#include <ucontext.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_statfs.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/lx_thunk_server.h>
+
+/*
+ * General emulation guidelines.
+ *
+ * Once the emulation handler has been installed onto the process, we need to
+ * be concerned about system calls made by the emulation, as well as any
+ * library calls which in turn make system calls. This is actually only an
+ * issue for the 64-bit case, since the kernel sycall entry point is common for
+ * both Illumos and Linux. The trampoline code in the kernel needs some way to
+ * distinguish when it should bounce out for emulation (Linux system call) vs.
+ * stay in the kernel (emulation system call). For the 32-bit case Linux uses
+ * int80 for system calls which is orthogonal to all of the Illumos system call
+ * entry points and thus there is no issue.
+ *
+ * To cope with this for the 64-bit case, we maintain a mode flag on each
+ * LWP so we can tell when a system call comes from Linux. We then set the mode
+ * flag to Illumos so that all future system calls from the emulation are
+ * handled correctly. The emulation must reset the mode when it is ready to
+ * return control to Linux. This is done via the B_CLR_NTV_SYSC_FLAG brand
+ * call. There is additional complexity with this mode switching in the
+ * case of a user-defined signal handler. This is described in the signal
+ * emulation code comments.
+ *
+ * *** Setting errno
+ *
+ * This emulation library is loaded onto a seperate link map from the
+ * application whose address space we're running in. The Linux libc errno is
+ * independent of our native libc errno. To pass back an error the emulation
+ * function should return -errno back to the Linux caller.
+ *
+ * *** General considerations
+ *
+ * The lx brand interposes on _all_ system calls. Linux system calls that need
+ * special handling in the kernel are redirected back to the kernel via the
+ * in-kernel emulation (IKE) mechanism which uses a range of the brand system
+ * call command number to determine which in-kernel lx function to invoke.
+ *
+ * *** DTrace
+ *
+ * The lx-syscall DTrace provider (see lx_systrace_attach in
+ * uts/common/brand/lx/dtrace/lx_systrace.c) works as follows:
+ *
+ * When probes are enabled:
+ * lx_systrace_enable -> lx_brand_systrace_enable
+ *
+ * This enables the trace jump table in the kernel (see
+ * uts/intel/brand/lx/lx_brand_asm.s which has the functions
+ * lx_brand_int80_enable and lx_brand_syscall_enable, and the corresponding
+ * patch points lx_brand_int80_patch_point and lx_brand_syscall_patch_point).
+ *
+ * The library code defines lx_handler_table and lx_handler_trace_table
+ * in the i386 and amd64 lx_handler.s code.
+ *
+ * The trace jump table enables lx_traceflag which is used in the lx_emulate
+ * function to make the B_SYSENTRY/B_SYSRETURN brandsys syscalls. These in turn
+ * will call lx_systrace_entry_ptr/lx_systrace_return_ptr so that we can DTrace
+ * the Linux syscalls via the provider.
+ *
+ * When probes are disbaled, we undo the patch points via:
+ * lx_systrace_disable -> lx_brand_systrace_disable
+ */
+
+
+/*
+ * Map Illumos errno to the Linux equivalent.
+ */
+static int stol_errno[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 35, 47, 22, 38, 22, /* 49 */
+ 52, 53, 54, 55, 56, 57, 58, 59, 22, 22,
+ 61, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 22, 22, 72, 22, 22, 74, 36, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 38,
+ 40, 85, 86, 39, 87, 88, 89, 90, 91, 92, /* 99 */
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
+ 103, 104, 105, 106, 107, 22, 22, 22, 22, 22,
+ 22, 22, 22, 108, 109, 110, 111, 112, 113, 114, /* 149 */
+ 115, 116
+};
+
+char lx_release[LX_VERS_MAX];
+char lx_cmd_name[MAXNAMLEN];
+
+/*
+ * Map a linux locale ending string to the solaris equivalent.
+ */
+struct lx_locale_ending {
+ const char *linux_end; /* linux ending string */
+ const char *solaris_end; /* to transform with this string */
+ int le_size; /* linux ending string length */
+ int se_size; /* solaris ending string length */
+};
+
+#define l2s_locale(lname, sname) \
+ {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1}
+
+#define MAXLOCALENAMELEN 30
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+/*
+ * This flag is part of the registration with the in-kernel brand module. It's
+ * used in lx_handler() to determine if we should go back into the kernel after
+ * a system call in case the kernel needs to perform some post-syscall work
+ * like tracing for example.
+ */
+int lx_traceflag;
+
+#define LX_SYS_NOSYS_REASON 0x07
+#define LX_SYS_EBPARG6 0x08
+#define LX_SYS_IKE 0x10
+
+#define LX_IKE(sysnum) ((long(*)(void))LX_EMUL_##sysnum)
+
+/*
+ * Flags that denote the specific reason that we don't have a particular
+ * system call. These reasons are only valid if the function is NULL.
+ */
+#define NOSYS_NULL 0
+#define NOSYS_NONE 1
+#define NOSYS_NO_EQUIV 2
+#define NOSYS_KERNEL 3
+#define NOSYS_UNDOC 4
+#define NOSYS_OBSOLETE 5
+#define NOSYS_MAX 5
+
+#if NOSYS_MAX > LX_SYS_NOSYS_REASON
+#error NOSYS reason codes must fit in LX_SYS_NOSYS_REASON
+#endif
+
+static char *nosys_reasons[] = {
+ "Not done yet",
+ "No such Linux system call",
+ "No equivalent Solaris functionality",
+ "Reads/modifies Linux kernel state",
+ "Undocumented and/or rarely used system call",
+ "Unsupported, obsolete system call"
+};
+
+/*
+ * Most syscalls return an int but some return something else, typically a
+ * ssize_t. This can be either an int or a long, depending on if we're compiled
+ * for 32-bit or 64-bit. To correctly propagate the -errno return code in the
+ * 64-bit case, we declare all emulation wrappers will return a long. Thus,
+ * when we save the return value into the %eax or %rax register and return to
+ * Linux, we will have the right size value in both the 32 and 64 bit cases.
+ */
+
+struct lx_sysent {
+ char *sy_name;
+ long (*sy_callc)();
+ char sy_flags;
+ char sy_narg;
+};
+
+static struct lx_sysent sysents[LX_NSYSCALLS + 1];
+
+static uintptr_t stack_bottom;
+
+#if defined(_LP64)
+long lx_fsb;
+long lx_fs;
+#endif
+int lx_install = 0; /* install mode enabled if non-zero */
+boolean_t lx_is_rpm = B_FALSE;
+int lx_rpm_delay = 1;
+int lx_strict = 0; /* "strict" mode enabled if non-zero */
+int lx_verbose = 0; /* verbose mode enabled if non-zero */
+int lx_debug_enabled = 0; /* debugging output enabled if non-zero */
+
+pid_t zoneinit_pid; /* zone init PID */
+long max_pid; /* native maximum PID */
+
+thread_key_t lx_tsd_key;
+
+int
+lx_errno(int err)
+{
+ if (err >= sizeof (stol_errno) / sizeof (stol_errno[0])) {
+ lx_debug("invalid errno %d\n", err);
+ assert(0);
+ }
+
+ return (stol_errno[err]);
+}
+
+int
+uucopy_unsafe(const void *src, void *dst, size_t n)
+{
+ bcopy(src, dst, n);
+ return (0);
+}
+
+int
+uucopystr_unsafe(const void *src, void *dst, size_t n)
+{
+ (void) strncpy((char *)src, dst, n);
+ return (0);
+}
+
+static void
+i_lx_msg(int fd, char *msg, va_list ap)
+{
+ int i;
+ char buf[LX_MSG_MAXLEN];
+
+ /* LINTED [possible expansion issues] */
+ i = vsnprintf(buf, sizeof (buf), msg, ap);
+ buf[LX_MSG_MAXLEN - 1] = '\0';
+ if (i == -1)
+ return;
+
+ /* if debugging is enabled, send this message to debug output */
+ if (lx_debug_enabled != 0)
+ lx_debug(buf);
+
+ if (fd == 2) {
+ /*
+ * We let the user choose whether or not to see these
+ * messages on the console.
+ */
+ if (lx_verbose == 0)
+ return;
+ }
+
+ /* we retry in case of EINTR */
+ do {
+ i = write(fd, buf, strlen(buf));
+ } while ((i == -1) && (errno == EINTR));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*
+ * This is just a non-zero exit value which also isn't one that would allow
+ * us to easily detect if a branded process exited because of a recursive
+ * fatal error.
+ */
+#define LX_ERR_FATAL 42
+
+/*
+ * Our own custom version of abort(), this routine will be used in place
+ * of the one located in libc. The primary difference is that this version
+ * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the
+ * SIGABRT sent causes us to dump core and is not caught by a user program.
+ */
+void
+abort(void)
+{
+ static int aborting = 0;
+
+ struct sigaction sa;
+ sigset_t sigmask;
+
+ /* watch out for recursive calls to this function */
+ if (aborting != 0)
+ exit(LX_ERR_FATAL);
+
+ aborting = 1;
+
+ /*
+ * Block all signals here to avoid taking any signals while exiting
+ * in an effort to avoid any strange user interaction with our death.
+ */
+ (void) sigfillset(&sigmask);
+ (void) sigprocmask(SIG_BLOCK, &sigmask, NULL);
+
+ /*
+ * Our own version of abort(3C) that we know will never call
+ * a user-installed SIGABRT handler first. We WANT to die.
+ *
+ * Do this by resetting the handler to SIG_DFL, and releasing any
+ * held SIGABRTs.
+ *
+ * If no SIGABRTs are pending, send ourselves one.
+ *
+ * The while loop is a bit of overkill, but abort(3C) does it to
+ * assure it never returns so we will as well.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = SIG_DFL;
+ sa.sa_flags = 0;
+
+ for (;;) {
+ (void) sigaction(SIGABRT, &sa, NULL);
+ (void) sigrelse(SIGABRT);
+ (void) thr_kill(thr_self(), SIGABRT);
+ }
+
+ /*NOTREACHED*/
+}
+
+/*PRINTFLIKE1*/
+void
+lx_msg(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+ va_start(ap, msg);
+ i_lx_msg(STDOUT_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err_fatal(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+ abort();
+}
+
+/*
+ * See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no.
+ */
+int
+lx_check_alloca(size_t sz)
+{
+ uintptr_t sp = (uintptr_t)&sz;
+ uintptr_t end = sp - sz;
+
+ return ((end < sp) && (end >= stack_bottom));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_unsupported(char *msg, ...)
+{
+ va_list ap;
+ char dmsg[256];
+ int lastc;
+
+ assert(msg != NULL);
+
+ /* make a brand call so we can easily dtrace unsupported actions */
+ va_start(ap, msg);
+ /* LINTED [possible expansion issues] */
+ (void) vsnprintf(dmsg, sizeof (dmsg), msg, ap);
+ dmsg[255] = '\0';
+ lastc = strlen(dmsg) - 1;
+ if (dmsg[lastc] == '\n')
+ dmsg[lastc] = '\0';
+ (void) syscall(SYS_brand, B_UNSUPPORTED, dmsg);
+ va_end(ap);
+
+ /* send the msg to the error stream */
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+
+ /*
+ * If the user doesn't trust the application to responsibly
+ * handle ENOTSUP, we kill the application.
+ */
+ if (lx_strict)
+ (void) kill(getpid(), SIGSYS);
+}
+
+extern void lx_runexe(void *argv, void *entry);
+int lx_init(int argc, char *argv[], char *envp[]);
+
+static int
+lx_emulate_args(lx_regs_t *rp, struct lx_sysent *s, uintptr_t *args)
+{
+#if defined(_LP64)
+ /*
+ * Note: Syscall argument passing is different from function call
+ * argument passing on amd64. For function calls, the fourth arg is
+ * passed via %rcx, but for system calls the 4th arg is passed via %r10.
+ * This is because in amd64, the syscall instruction puts the lower
+ * 32 bits of %rflags in %r11 and puts the %rip value to %rcx.
+ *
+ * Appendix A of the amd64 ABI (Linux conventions) states that syscalls
+ * are limited to 6 args and no arg is passed on the stack.
+ */
+ args[0] = rp->lxr_rdi;
+ args[1] = rp->lxr_rsi;
+ args[2] = rp->lxr_rdx;
+ args[3] = rp->lxr_r10;
+ args[4] = rp->lxr_r8;
+ args[5] = rp->lxr_r9;
+#else
+ /*
+ * If the system call takes 6 args, then libc has stashed them in
+ * memory at the address contained in %ebx. Except for some syscalls
+ * which store the 6th argument in %ebp.
+ */
+ if (s->sy_narg == 6 && !(s->sy_flags & LX_SYS_EBPARG6)) {
+ if (uucopy((void *)rp->lxr_ebx, args,
+ sizeof (args[0]) * 6) != 0)
+ return (-stol_errno[errno]);
+ } else {
+ args[0] = rp->lxr_ebx;
+ args[1] = rp->lxr_ecx;
+ args[2] = rp->lxr_edx;
+ args[3] = rp->lxr_esi;
+ args[4] = rp->lxr_edi;
+ args[5] = rp->lxr_ebp;
+ }
+#endif
+
+ return (0);
+}
+
+void
+lx_emulate(lx_regs_t *rp)
+{
+ struct lx_sysent *s;
+ uintptr_t args[6];
+#if defined(_ILP32)
+ uintptr_t gs = rp->lxr_gs & 0xffff; /* %gs is only 16 bits */
+#endif
+ int syscall_num;
+ long ret;
+
+#if defined(_LP64)
+ syscall_num = rp->lxr_rax;
+#else
+ syscall_num = rp->lxr_eax;
+#endif
+
+ /*
+ * lx_brand_int80_callback() or lx_brand_syscall_callback() ensures
+ * that the syscall_num is sane; Use it as is.
+ */
+ assert(syscall_num >= 0);
+ assert(syscall_num < (sizeof (sysents) / sizeof (sysents[0])));
+ s = &sysents[syscall_num];
+
+ if ((ret = lx_emulate_args(rp, s, args)) != 0)
+ goto out;
+
+ /*
+ * If the tracing flag is enabled we call into the brand-specific
+ * kernel module to handle the tracing activity (DTrace or ptrace).
+ * It would be tempting to perform DTrace activity in the brand
+ * module's syscall trap callback, rather than having to return
+ * to the kernel here, but -- since argument encoding can vary
+ * according to the specific system call -- that would require
+ * replicating the knowledge of argument decoding in the kernel
+ * module as well as here in the brand library.
+ */
+ if (lx_traceflag != 0) {
+ /*
+ * Part of the ptrace "interface" is that on syscall entry
+ * %rax / %eax should be reported as -ENOSYS while the
+ * orig_rax / orig_eax field of the user structure needs to
+ * contain the actual system call number. If we end up stopping
+ * here, the controlling process will dig the lx_regs_t
+ * structure out of our stack.
+ */
+#if defined(_LP64)
+ rp->lxr_orig_rax = syscall_num;
+ rp->lxr_rax = -stol_errno[ENOSYS];
+#else
+ rp->lxr_orig_eax = syscall_num;
+ rp->lxr_eax = -stol_errno[ENOSYS];
+#endif
+
+ (void) syscall(SYS_brand, B_SYSENTRY, syscall_num, args);
+
+ /*
+ * The external tracer may have modified the arguments to this
+ * system call. Refresh the argument cache to account for this.
+ */
+ if ((ret = lx_emulate_args(rp, s, args)) != 0)
+ goto out;
+ }
+
+ if (s->sy_callc == NULL) {
+ int reason = s->sy_flags & LX_SYS_NOSYS_REASON;
+ lx_unsupported("unimplemented syscall #%d (%s): %s\n",
+ syscall_num, s->sy_name, nosys_reasons[reason]);
+ ret = -stol_errno[ENOTSUP];
+ goto out;
+ }
+
+ if (lx_debug_enabled != 0) {
+ const char *fmt = NULL;
+
+ switch (s->sy_narg) {
+ case 0:
+ fmt = "calling %s()";
+ break;
+ case 1:
+ fmt = "calling %s(0x%p)";
+ break;
+ case 2:
+ fmt = "calling %s(0x%p, 0x%p)";
+ break;
+ case 3:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p)";
+ break;
+ case 4:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ case 5:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ case 6:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ }
+
+ lx_debug(fmt, s->sy_name, args[0], args[1], args[2], args[3],
+ args[4], args[5]);
+ }
+
+ /*
+ * On 64-bit code, the %gs will be 0 in both native and Linux code.
+ */
+#if defined(_ILP32)
+ if (gs != LWPGS_SEL) {
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * While a %gs of 0 is technically legal (as long as the
+ * application never dereferences memory using %gs), Solaris
+ * has its own ideas as to how a zero %gs should be handled in
+ * _update_sregs(), such that any 32-bit user process with a
+ * %gs of zero running on a system with a 64-bit kernel will
+ * have its %gs hidden base register stomped on on return from
+ * a system call, leaving an incorrect base address in place
+ * until the next time %gs is actually reloaded (forcing a
+ * reload of the base address from the appropriate descriptor
+ * table.)
+ *
+ * Of course the kernel will once again stomp on THAT base
+ * address when returning from a system call, resulting in an
+ * an application segmentation fault.
+ *
+ * To avoid this situation, disallow a save of a zero %gs
+ * here in order to try and capture any Linux process that
+ * attempts to make a syscall with a zero %gs installed.
+ */
+ assert(gs != 0);
+
+ if ((ret = thr_getspecific(lx_tsd_key,
+ (void **)&lx_tsd)) != 0)
+ lx_err_fatal("lx_emulate: unable to read "
+ "thread-specific data: %s", strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_gs = gs;
+
+ lx_debug("lx_emulate(): gsp 0x%p, saved gs: 0x%x", lx_tsd, gs);
+ }
+#endif /* _ILP32 */
+
+ if (s->sy_flags & LX_SYS_IKE) {
+ lx_debug("\tsyscall %d re-vectoring to lx kernel module "
+ "for %s()", syscall_num, s->sy_name);
+
+ if ((ret = syscall(SYS_brand, B_IKE_SYSCALL,
+ (uintptr_t)s->sy_callc, args)) == -1)
+ ret = -errno;
+ } else {
+ ret = s->sy_callc(args[0], args[1], args[2],
+ args[3], args[4], args[5]);
+ }
+
+ if (ret > -65536 && ret < 65536)
+ lx_debug("\t= %d", ret);
+ else
+ lx_debug("\t= 0x%x", ret);
+
+ /*
+ * If the return value is between -1 and -4095 then it's an errno, so
+ * we translate the Illumos error number into the Linux equivalent.
+ */
+ if (ret < 0 && ret > -4096) {
+ if (-ret >= sizeof (stol_errno) / sizeof (stol_errno[0])) {
+ lx_debug("Invalid return value from emulated "
+ "syscall %d (%s): %d\n",
+ syscall_num, s->sy_name, ret);
+ assert(0);
+ }
+
+ ret = -stol_errno[-ret];
+ }
+
+out:
+ /*
+ * For 32-bit, %eax holds the return code from the system call. For
+ * 64-bit, %rax holds the return code.
+ */
+#if defined(_LP64)
+ rp->lxr_rax = ret;
+#else
+ rp->lxr_eax = ret;
+#endif
+
+ /*
+ * If the trace flag is set, bounce into the kernel to let it do
+ * any necessary tracing (DTrace or ptrace).
+ */
+ if (lx_traceflag != 0) {
+#if defined(_LP64)
+ rp->lxr_orig_rax = syscall_num;
+#else
+ rp->lxr_orig_eax = syscall_num;
+#endif
+ (void) syscall(SYS_brand, B_SYSRETURN, syscall_num, ret);
+ }
+
+#if defined(_LP64)
+ /*
+ * For 64-bit code this must be the last thing we do in the emulation
+ * code path before we return back to the Linux program. This will
+ * disable native syscalls so the next time a syscall happens on this
+ * thread, it will come back into the emulation. We can omit the extra
+ * syscall overhead in the 32-bit case.
+ */
+ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
+#endif
+}
+
+static void
+lx_close_fh(FILE *file)
+{
+ int fd, fd_new;
+
+ if (file == NULL)
+ return;
+
+ if ((fd = fileno(file)) < 0)
+ return;
+
+ fd_new = dup(fd);
+ if (fd_new == -1)
+ return;
+
+ (void) fclose(file);
+ (void) dup2(fd_new, fd);
+ (void) close(fd_new);
+}
+
+
+extern int set_l10n_alternate_root(char *path);
+
+#if defined(_LP64)
+static void *
+map_vdso()
+{
+ int fd;
+ mmapobj_result_t mpp[10]; /* we know the size of our lib */
+ mmapobj_result_t *smpp = mpp;
+ uint_t mapnum = 10;
+
+ if ((fd = open("/native/usr/lib/brand/lx/amd64/lx_vdso.so.1",
+ O_RDONLY)) == -1)
+ lx_err_fatal("couldn't open lx_vdso.so.1");
+
+ if (mmapobj(fd, MMOBJ_INTERPRET, smpp, &mapnum, NULL) == -1)
+ lx_err_fatal("couldn't mmapobj lx_vdso.so.1");
+
+ (void) close(fd);
+
+ /* assume first segment is the base of the mapping */
+ return (smpp->mr_addr);
+}
+#endif
+
+/*ARGSUSED*/
+int
+lx_init(int argc, char *argv[], char *envp[])
+{
+ char *r;
+ auxv_t *ap;
+ long *p;
+ int err;
+ lx_elf_data_t edp;
+ lx_brand_registration_t reg;
+ static lx_tsd_t lx_tsd;
+#if defined(_LP64)
+ void *vdso_hdr;
+#endif
+
+ stack_bottom = 2 * sysconf(_SC_PAGESIZE);
+
+ /*
+ * We need to shutdown all libc stdio. libc stdio normally goes to
+ * file descriptors, but since we're actually part of a linux
+ * process we don't own these file descriptors and we can't make
+ * any assumptions about their state.
+ */
+ lx_close_fh(stdin);
+ lx_close_fh(stdout);
+ lx_close_fh(stderr);
+
+ lx_debug_init();
+
+ r = getenv("LX_RELEASE");
+ if (r == NULL) {
+ if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, lx_release,
+ sizeof (lx_release)) != sizeof (lx_release))
+ (void) strlcpy(lx_release, "2.4.21", LX_VERS_MAX);
+ } else {
+ (void) strlcpy(lx_release, r, 128);
+ }
+
+ lx_debug("lx_release: %s\n", lx_release);
+
+ /*
+ * Should we kill an application that attempts an unimplemented
+ * system call?
+ */
+ if (getenv("LX_STRICT") != NULL) {
+ lx_strict = 1;
+ lx_debug("STRICT mode enabled.\n");
+ }
+
+ /*
+ * Are we in install mode?
+ */
+ if (getenv("LX_INSTALL") != NULL) {
+ lx_install = 1;
+ lx_debug("INSTALL mode enabled.\n");
+ }
+
+ /*
+ * Should we attempt to send messages to the screen?
+ */
+ if (getenv("LX_VERBOSE") != NULL) {
+ lx_verbose = 1;
+ lx_debug("VERBOSE mode enabled.\n");
+ }
+
+ /* needed in wait4(), get it once since it never changes */
+ max_pid = sysconf(_SC_MAXPID);
+
+ (void) strlcpy(lx_cmd_name, basename(argv[0]), sizeof (lx_cmd_name));
+ lx_debug("executing linux process: %s", argv[0]);
+ lx_debug("branding myself and setting handler to 0x%p",
+ (void *)lx_handler_table);
+
+ /*
+ * The version of rpm that ships with CentOS/RHEL 3.x has a race
+ * condition in it. If it creates a child process to run a
+ * post-install script, and that child process completes too
+ * quickly, it will disappear before the parent notices. This
+ * causes the parent to hang forever waiting for the already dead
+ * child to die. I'm sure there's a Lazarus joke buried in here
+ * somewhere.
+ *
+ * Anyway, as a workaround, we make every child of an 'rpm' process
+ * sleep for 1 second, giving the parent a chance to enter its
+ * wait-for-the-child-to-die loop. Thay may be the hackiest trick
+ * in all of our Linux emulation code - and that's saying
+ * something.
+ */
+ if (strcmp("rpm", basename(argv[0])) == NULL)
+ lx_is_rpm = B_TRUE;
+
+ reg.lxbr_version = LX_VERSION;
+ reg.lxbr_handler = (void *)&lx_handler_table;
+ reg.lxbr_tracehandler = (void *)&lx_handler_trace_table;
+ reg.lxbr_traceflag = (void *)&lx_traceflag;
+
+ /*
+ * Register the address of the user-space handler with the lx brand
+ * module. As a side-effect this leaves the thread in native syscall
+ * mode so that it's ok to continue to make syscalls during setup. We
+ * need to switch to Linux mode at the end of initialization.
+ */
+ if (syscall(SYS_brand, B_REGISTER, &reg))
+ lx_err_fatal("failed to brand the process");
+
+ /* Look up the PID that serves as init for this zone */
+ if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
+ lx_err_fatal("Unable to find PID for zone init process: %s",
+ strerror(err));
+
+ /*
+ * Ubuntu init will fail if its TERM environment variable is not set
+ * so if we are running init, and TERM is not set, we set term and
+ * reexec so that the new environment variable is propagated to the
+ * linux application stack.
+ */
+ if ((getpid() == zoneinit_pid) && (getenv("TERM") == NULL)) {
+ if (setenv("TERM", "vt100", 1) < 0 || execv(argv[0], argv) < 0)
+ lx_err_fatal("failed to set TERM");
+ }
+
+ /*
+ * Upload data about the lx executable from the kernel.
+ */
+ if (syscall(SYS_brand, B_ELFDATA, (void *)&edp))
+ lx_err_fatal("failed to get required ELF data from the kernel");
+
+ if (lx_stat_init() != 0)
+ lx_err_fatal("failed to setup the stat translator");
+
+ if (lx_statfs_init() != 0)
+ lx_err_fatal("failed to setup the statfs translator");
+
+#if defined(_LP64)
+ vdso_hdr = map_vdso();
+#endif
+
+ /*
+ * Find the aux vector on the stack.
+ */
+ p = (long *)envp;
+ while (*p != NULL)
+ p++;
+ /*
+ * p is now pointing at the 0 word after the environ pointers. After
+ * that is the aux vectors.
+ */
+ p++;
+ for (ap = (auxv_t *)p; ap->a_type != 0; ap++) {
+ switch (ap->a_type) {
+ case AT_BASE:
+ ap->a_un.a_val = edp.ed_base;
+ break;
+ case AT_ENTRY:
+ ap->a_un.a_val = edp.ed_entry;
+ break;
+ case AT_PHDR:
+ ap->a_un.a_val = edp.ed_phdr;
+ break;
+ case AT_PHENT:
+ ap->a_un.a_val = edp.ed_phent;
+ break;
+ case AT_PHNUM:
+ ap->a_un.a_val = edp.ed_phnum;
+ break;
+#if defined(_LP64)
+ case AT_SUN_BRAND_LX_SYSINFO_EHDR:
+ ap->a_type = AT_SYSINFO_EHDR;
+ ap->a_un.a_val = (long)vdso_hdr;
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+
+ /* Do any thunk server initalization. */
+ lxt_server_init(argc, argv);
+
+ /* Setup signal handler information. */
+ if (lx_siginit())
+ lx_err_fatal("failed to initialize lx signals for the "
+ "branded process");
+
+ /* Setup thread-specific data area for managing linux threads. */
+ if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0)
+ lx_err_fatal("thr_keycreate(lx_tsd_key) failed: %s",
+ strerror(err));
+
+ lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key);
+
+ /* Initialize the thread specific data for this thread. */
+ bzero(&lx_tsd, sizeof (lx_tsd));
+#if defined(_ILP32)
+ /* start with %gs having the native libc value */
+ lx_tsd.lxtsd_gs = LWPGS_SEL;
+#endif
+
+ if ((err = thr_setspecific(lx_tsd_key, &lx_tsd)) != 0)
+ lx_err_fatal("Unable to initialize thread-specific data: %s",
+ strerror(err));
+
+ /*
+ * Save the current context of this thread.
+ * We'll restore this context when this thread attempts to exit.
+ */
+ if (getcontext(&lx_tsd.lxtsd_exit_context) != 0)
+ lx_err_fatal("Unable to initialize thread-specific exit "
+ "context: %s", strerror(errno));
+
+ if (lx_tsd.lxtsd_exit == 0) {
+#if defined(_LP64)
+ /* Switch to Linux syscall mode */
+ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
+#endif
+
+ lx_runexe(argv, (void *)edp.ed_ldentry);
+ /* lx_runexe() never returns. */
+ assert(0);
+ }
+
+ /*
+ * We are here because the Linux application called the exit() or
+ * exit_group() system call. In turn the brand library did a
+ * setcontext() to jump to the thread context state we saved above.
+ */
+ if (lx_tsd.lxtsd_exit == 1)
+ thr_exit((void *)(long)lx_tsd.lxtsd_exit_status);
+ else
+ exit(lx_tsd.lxtsd_exit_status);
+
+ assert(0);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/*
+ * Walk back through the stack until we find the lx_emulate() frame.
+ */
+lx_regs_t *
+lx_syscall_regs(void)
+{
+ /* LINTED - alignment */
+ struct frame *fr = (struct frame *)_getfp();
+
+ while (fr->fr_savpc != (uintptr_t)&lx_emulate_done) {
+ fr = (struct frame *)fr->fr_savfp;
+ assert(fr->fr_savpc != NULL);
+ }
+
+#if defined(_LP64)
+ /*
+ * This is %rbp, update to be at the end of the frame for correct
+ * struct offsets. lx_emulate only takes one parameter, a pointer to
+ * lx_regs_t.
+ */
+ return ((lx_regs_t *)(fr->fr_savfp - sizeof (lx_regs_t)));
+#else
+ return ((lx_regs_t *)((uintptr_t *)fr)[2]);
+#endif
+}
+
+int
+lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp)
+{
+ pid_t pid;
+ lwpid_t tid;
+
+ if (lpid == 0) {
+ pid = getpid();
+ tid = thr_self();
+ } else {
+ if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0)
+ return (-errno);
+
+ /*
+ * If the returned pid is -1, that indicates we tried to
+ * look up the PID for init, but that process no longer
+ * exists.
+ */
+ if (pid == -1)
+ return (-ESRCH);
+ }
+
+ if (uucopy(&pid, spid, sizeof (pid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_lpid_to_spid(pid_t lpid, pid_t *spid)
+{
+ lwpid_t slwp;
+
+ return (lx_lpid_to_spair(lpid, spid, &slwp));
+}
+
+char *
+lx_fd_to_path(int fd, char *buf, int buf_size)
+{
+ char path_proc[MAXPATHLEN];
+ pid_t pid;
+ int n;
+
+ assert((buf != NULL) && (buf_size >= 0));
+
+ if (fd < 0)
+ return (NULL);
+
+ if ((pid = getpid()) == -1)
+ return (NULL);
+
+ (void) snprintf(path_proc, MAXPATHLEN,
+ "/native/proc/%d/path/%d", pid, fd);
+
+ if ((n = readlink(path_proc, buf, buf_size - 1)) == -1)
+ return (NULL);
+ buf[n] = '\0';
+
+ return (buf);
+}
+
+#if defined(_LP64)
+/* The following is the 64-bit syscall table */
+
+static struct lx_sysent sysents[] = {
+ {"read", LX_IKE(read), LX_SYS_IKE, 3}, /* 0 */
+ {"write", lx_write, 0, 3}, /* 1 */
+ {"open", lx_open, 0, 3}, /* 2 */
+ {"close", lx_close, 0, 1}, /* 3 */
+ {"stat", lx_stat64, 0, 2}, /* 4 */
+ {"fstat", lx_fstat64, 0, 2}, /* 5 */
+ {"lstat", lx_lstat64, 0, 2}, /* 6 */
+ {"poll", lx_poll, 0, 3}, /* 7 */
+ {"lseek", lx_lseek, 0, 3}, /* 8 */
+ {"mmap", lx_mmap, 0, 6}, /* 9 */
+ {"mprotect", lx_mprotect, 0, 3}, /* 10 */
+ {"munmap", lx_munmap, 0, 2}, /* 11 */
+ {"brk", LX_IKE(brk), LX_SYS_IKE, 1}, /* 12 */
+ {"rt_sigaction", lx_rt_sigaction, 0, 4}, /* 13 */
+ {"rt_sigprocmask", lx_rt_sigprocmask, 0, 4}, /* 14 */
+ {"rt_sigreturn", lx_rt_sigreturn, 0, 0}, /* 15 */
+ {"ioctl", LX_IKE(ioctl), LX_SYS_IKE, 3}, /* 16 */
+ {"pread64", lx_pread, 0, 4}, /* 17 */
+ {"pwrite64", lx_pwrite, 0, 4}, /* 18 */
+ {"readv", lx_readv, 0, 3}, /* 19 */
+ {"writev", lx_writev, 0, 3}, /* 20 */
+ {"access", lx_access, 0, 2}, /* 21 */
+ {"pipe", LX_IKE(pipe), LX_SYS_IKE, 1}, /* 22 */
+ {"select", lx_select, 0, 5}, /* 23 */
+ {"sched_yield", lx_yield, 0, 0}, /* 24 */
+ {"mremap", lx_remap, 0, 5}, /* 25 */
+ {"msync", lx_msync, 0, 3}, /* 26 */
+ {"mincore", lx_mincore, 0, 3}, /* 27 */
+ {"madvise", lx_madvise, 0, 3}, /* 28 */
+ {"shmget", lx_shmget, 0, 3}, /* 29 */
+ {"shmat", lx_shmat, 0, 4}, /* 30 */
+ {"shmctl", lx_shmctl, 0, 3}, /* 31 */
+ {"dup", lx_dup, 0, 1}, /* 32 */
+ {"dup2", lx_dup2, 0, 2}, /* 33 */
+ {"pause", lx_pause, 0, 0}, /* 34 */
+ {"nanosleep", lx_nanosleep, 0, 2}, /* 35 */
+ {"getitimer", lx_getitimer, 0, 2}, /* 36 */
+ {"alarm", lx_alarm, 0, 1}, /* 37 */
+ {"setitimer", lx_setitimer, 0, 3}, /* 38 */
+ {"getpid", lx_getpid, 0, 0}, /* 39 */
+ {"sendfile", lx_sendfile64, 0, 4}, /* 40 */
+ {"socket", lx_socket, 0, 3}, /* 41 */
+ {"connect", lx_connect, 0, 3}, /* 42 */
+ {"accept", lx_accept, 0, 3}, /* 43 */
+ {"sendto", lx_sendto, 0, 6}, /* 44 */
+ {"recvfrom", lx_recvfrom, 0, 6}, /* 45 */
+ {"sendmsg", lx_sendmsg, 0, 3}, /* 46 */
+ {"recvmsg", lx_recvmsg, 0, 3}, /* 47 */
+ {"shutdown", lx_shutdown, 0, 2}, /* 48 */
+ {"bind", lx_bind, 0, 3}, /* 49 */
+ {"listen", lx_listen, 0, 2}, /* 50 */
+ {"getsockname", lx_getsockname, 0, 3}, /* 51 */
+ {"getpeername", lx_getpeername, 0, 3}, /* 52 */
+ {"socketpair", lx_socketpair, 0, 4}, /* 53 */
+ {"setsockopt", lx_setsockopt, 0, 5}, /* 54 */
+ {"getsockopt", lx_getsockopt, 0, 5}, /* 55 */
+ {"clone", lx_clone, 0, 5}, /* 56 */
+ {"fork", lx_fork, 0, 0}, /* 57 */
+ {"vfork", lx_vfork, 0, 0}, /* 58 */
+ {"execve", lx_execve, 0, 3}, /* 59 */
+ {"exit", lx_exit, 0, 1}, /* 60 */
+ {"wait4", lx_wait4, 0, 4}, /* 61 */
+ {"kill", LX_IKE(kill), LX_SYS_IKE, 2}, /* 62 */
+ {"uname", lx_uname, 0, 1}, /* 63 */
+ {"semget", lx_semget, 0, 3}, /* 64 */
+ {"semop", lx_semop, 0, 3}, /* 65 */
+ {"semctl", lx_semctl, 0, 4}, /* 66 */
+ {"shmdt", lx_shmdt, 0, 1}, /* 67 */
+ {"msgget", lx_msgget, 0, 2}, /* 68 */
+ {"msgsnd", lx_msgsnd, 0, 4}, /* 69 */
+ {"msgrcv", lx_msgrcv, 0, 5}, /* 70 */
+ {"msgctl", lx_msgctl, 0, 3}, /* 71 */
+ {"fcntl", lx_fcntl64, 0, 3}, /* 72 */
+ {"flock", lx_flock, 0, 2}, /* 73 */
+ {"fsync", lx_fsync, 0, 1}, /* 74 */
+ {"fdatasync", lx_fdatasync, 0, 1}, /* 75 */
+ {"truncate", lx_truncate, 0, 2}, /* 76 */
+ {"ftruncate", lx_ftruncate, 0, 2}, /* 77 */
+ {"getdents", lx_getdents, 0, 3}, /* 78 */
+ {"getcwd", lx_getcwd, 0, 2}, /* 79 */
+ {"chdir", lx_chdir, 0, 1}, /* 80 */
+ {"fchdir", lx_fchdir, 0, 1}, /* 81 */
+ {"rename", lx_rename, 0, 2}, /* 82 */
+ {"mkdir", lx_mkdir, 0, 2}, /* 83 */
+ {"rmdir", lx_rmdir, 0, 1}, /* 84 */
+ {"creat", lx_creat, 0, 2}, /* 85 */
+ {"link", lx_link, 0, 2}, /* 86 */
+ {"unlink", lx_unlink, 0, 1}, /* 87 */
+ {"symlink", lx_symlink, 0, 2}, /* 88 */
+ {"readlink", lx_readlink, 0, 3}, /* 89 */
+ {"chmod", lx_chmod, 0, 2}, /* 90 */
+ {"fchmod", lx_fchmod, 0, 2}, /* 91 */
+ {"chown", lx_chown, 0, 3}, /* 92 */
+ {"fchown", lx_fchown, 0, 3}, /* 93 */
+ {"lchown", lx_lchown, 0, 3}, /* 94 */
+ {"umask", lx_umask, 0, 1}, /* 95 */
+ {"gettimeofday", lx_gettimeofday, 0, 2}, /* 96 */
+ {"getrlimit", lx_getrlimit, 0, 2}, /* 97 */
+ {"getrusage", lx_getrusage, 0, 2}, /* 98 */
+ {"sysinfo", LX_IKE(sysinfo), LX_SYS_IKE, 1}, /* 99 */
+ {"times", lx_times, 0, 1}, /* 100 */
+ {"ptrace", lx_ptrace, 0, 4}, /* 101 */
+ {"getuid", lx_getuid, 0, 0}, /* 102 */
+ {"syslog", lx_syslog, 0, 3}, /* 103 */
+ {"getgid", lx_getgid, 0, 0}, /* 104 */
+ {"setuid", lx_setuid, 0, 1}, /* 105 */
+ {"setgid", lx_setgid, 0, 1}, /* 106 */
+ {"geteuid", lx_geteuid, 0, 0}, /* 107 */
+ {"getegid", lx_getegid, 0, 0}, /* 108 */
+ {"setpgid", lx_setpgid, 0, 2}, /* 109 */
+ {"getppid", LX_IKE(getppid), LX_SYS_IKE, 0}, /* 110 */
+ {"getpgrp", lx_getpgrp, 0, 0}, /* 111 */
+ {"setsid", lx_setsid, 0, 0}, /* 112 */
+ {"setreuid", lx_setreuid, 0, 0}, /* 113 */
+ {"setregid", lx_setregid, 0, 0}, /* 114 */
+ {"getgroups", lx_getgroups, 0, 2}, /* 115 */
+ {"setgroups", lx_setgroups, 0, 2}, /* 116 */
+ {"setresuid", LX_IKE(setresuid), LX_SYS_IKE, 3}, /* 117 */
+ {"getresuid", lx_getresuid, 0, 3}, /* 118 */
+ {"setresgid", LX_IKE(setresgid), LX_SYS_IKE, 3}, /* 119 */
+ {"getresgid", lx_getresgid, 0, 3}, /* 120 */
+ {"getpgid", lx_getpgid, 0, 1}, /* 121 */
+ {"setfsuid", lx_setfsuid, 0, 1}, /* 122 */
+ {"setfsgid", lx_setfsgid, 0, 1}, /* 123 */
+ {"getsid", lx_getsid, 0, 1}, /* 124 */
+ {"capget", lx_capget, 0, 2}, /* 125 */
+ {"capset", lx_capset, 0, 2}, /* 126 */
+ {"rt_sigpending", lx_rt_sigpending, 0, 2}, /* 127 */
+ {"rt_sigtimedwait", lx_rt_sigtimedwait, 0, 4}, /* 128 */
+ {"rt_sigqueueinfo", lx_rt_sigqueueinfo, 0, 3}, /* 129 */
+ {"rt_sigsuspend", lx_rt_sigsuspend, 0, 2}, /* 130 */
+ {"sigaltstack", lx_sigaltstack, 0, 2}, /* 131 */
+ {"utime", lx_utime, 0, 2}, /* 132 */
+ {"mknod", lx_mknod, 0, 3}, /* 133 */
+ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 134 */
+ {"personality", lx_personality, 0, 1}, /* 135 */
+ {"ustat", NULL, NOSYS_OBSOLETE, 2}, /* 136 */
+ {"statfs", lx_statfs, 0, 2}, /* 137 */
+ {"fstatfs", lx_fstatfs, 0, 2}, /* 138 */
+ {"sysfs", lx_sysfs, 0, 3}, /* 139 */
+ {"getpriority", lx_getpriority, 0, 2}, /* 140 */
+ {"setpriority", lx_setpriority, 0, 3}, /* 141 */
+ {"sched_setparam", lx_sched_setparam, 0, 2}, /* 142 */
+ {"sched_getparam", lx_sched_getparam, 0, 2}, /* 143 */
+ {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 144 */
+ {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 145 */
+ {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 146 */
+ {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 147 */
+ {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 148 */
+ {"mlock", lx_mlock, 0, 2}, /* 149 */
+ {"munlock", lx_munlock, 0, 2}, /* 150 */
+ {"mlockall", lx_mlockall, 0, 1}, /* 151 */
+ {"munlockall", lx_munlockall, 0, 0}, /* 152 */
+ {"vhangup", lx_vhangup, 0, 0}, /* 153 */
+ {"modify_ldt", LX_IKE(modify_ldt), LX_SYS_IKE, 3}, /* 154 */
+ {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 155 */
+ {"sysctl", lx_sysctl, 0, 1}, /* 156 */
+ {"prctl", lx_prctl, 0, 5}, /* 157 */
+ {"arch_prctl", LX_IKE(arch_prctl), LX_SYS_IKE, 2}, /* 158 */
+ {"adjtimex", lx_adjtimex, 0, 1}, /* 159 */
+ {"setrlimit", lx_setrlimit, 0, 2}, /* 160 */
+ {"chroot", lx_chroot, 0, 1}, /* 161 */
+ {"sync", lx_sync, 0, 0}, /* 162 */
+ {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 163 */
+ {"settimeofday", lx_settimeofday, 0, 2}, /* 164 */
+ {"mount", lx_mount, 0, 5}, /* 165 */
+ {"umount2", lx_umount2, 0, 2}, /* 166 */
+ {"swapon", NULL, NOSYS_KERNEL, 0}, /* 167 */
+ {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 168 */
+ {"reboot", lx_reboot, 0, 4}, /* 169 */
+ {"sethostname", lx_sethostname, 0, 2}, /* 170 */
+ {"setdomainname", lx_setdomainname, 0, 2}, /* 171 */
+ {"iopl", NULL, NOSYS_NO_EQUIV, 0}, /* 172 */
+ {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 173 */
+ {"create_module", NULL, NOSYS_KERNEL, 0}, /* 174 */
+ {"init_module", NULL, NOSYS_KERNEL, 0}, /* 175 */
+ {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 176 */
+ {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 177 */
+ {"query_module", lx_query_module, NOSYS_KERNEL, 5}, /* 178 */
+ {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 179 */
+ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 180 */
+ {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 181 */
+ {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 182 */
+ {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 183 */
+ {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 184 */
+ {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 185 */
+ {"gettid", LX_IKE(gettid), LX_SYS_IKE, 0}, /* 186 */
+ {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 187 */
+ {"setxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 188 */
+ {"lsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 189 */
+ {"fsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 190 */
+ {"getxattr", lx_xattr4, 0, 4}, /* 191 */
+ {"lgetxattr", lx_xattr4, 0, 4}, /* 192 */
+ {"fgetxattr", lx_xattr4, 0, 4}, /* 193 */
+ {"listxattr", lx_xattr3, 0, 3}, /* 194 */
+ {"llistxattr", lx_xattr3, 0, 3}, /* 195 */
+ {"flistxattr", lx_xattr3, 0, 3}, /* 196 */
+ {"removexattr", lx_xattr2, 0, 2}, /* 197 */
+ {"lremovexattr", lx_xattr2, 0, 2}, /* 198 */
+ {"fremovexattr", lx_xattr2, 0, 2}, /* 199 */
+ {"tkill", LX_IKE(tkill), LX_SYS_IKE, 2}, /* 200 */
+ {"time", lx_time, 0, 1}, /* 201 */
+ {"futex", LX_IKE(futex), LX_SYS_IKE, 6}, /* 202 */
+ {"sched_setaffinity", lx_sched_setaffinity, 0, 3}, /* 203 */
+ {"sched_getaffinity", lx_sched_getaffinity, 0, 3}, /* 204 */
+ {"set_thread_area", LX_IKE(set_thread_area), LX_SYS_IKE, 1}, /* 205 */
+ {"io_setup", NULL, NOSYS_NO_EQUIV, 0}, /* 206 */
+ {"io_destroy", NULL, NOSYS_NO_EQUIV, 0}, /* 207 */
+ {"io_getevents", NULL, NOSYS_NO_EQUIV, 0}, /* 208 */
+ {"io_submit", NULL, NOSYS_NO_EQUIV, 0}, /* 209 */
+ {"io_cancel", NULL, NOSYS_NO_EQUIV, 0}, /* 210 */
+ {"get_thread_area", LX_IKE(get_thread_area), LX_SYS_IKE, 1}, /* 211 */
+ {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 212 */
+ {"epoll_create", lx_epoll_create, 0, 1}, /* 213 */
+ {"epoll_ctl_old", NULL, NOSYS_NULL, 0}, /* 214 */
+ {"epoll_wait_old", NULL, NOSYS_NULL, 0}, /* 215 */
+ {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 216 */
+ {"getdents64", lx_getdents64, 0, 3}, /* 217 */
+ {"set_tid_address", LX_IKE(set_tid_address), LX_SYS_IKE, 1}, /* 218 */
+ {"restart_syscall", NULL, NOSYS_NULL, 0}, /* 219 */
+ {"semtimedop", lx_semtimedop, 0, 4}, /* 220 */
+ {"fadvise64", lx_fadvise64_64, 0, 4}, /* 221 */
+ {"timer_create", lx_timer_create, 0, 3}, /* 222 */
+ {"timer_settime", lx_timer_settime, 0, 4}, /* 223 */
+ {"timer_gettime", lx_timer_gettime, 0, 2}, /* 224 */
+ {"timer_getoverrun", lx_timer_getoverrun, 0, 1}, /* 225 */
+ {"timer_delete", lx_timer_delete, 0, 1}, /* 226 */
+ {"clock_settime", lx_clock_settime, 0, 2}, /* 227 */
+ {"clock_gettime", lx_clock_gettime, 0, 2}, /* 228 */
+ {"clock_getres", lx_clock_getres, 0, 2}, /* 229 */
+ {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 230 */
+ {"exit_group", lx_group_exit, 0, 1}, /* 231 */
+ {"epoll_wait", lx_epoll_wait, 0, 4}, /* 232 */
+ {"epoll_ctl", lx_epoll_ctl, 0, 4}, /* 233 */
+ {"tgkill", LX_IKE(tgkill), LX_SYS_IKE, 3}, /* 234 */
+ {"utimes", lx_utimes, 0, 2}, /* 235 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 236 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 237 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 238 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 239 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 240 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 241 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 242 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 243 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 244 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 245 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 246 */
+ {"waitid", lx_waitid, 0, 4}, /* 247 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 248 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 249 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 250 */
+ {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 251 */
+ {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 252 */
+ {"inotify_init", lx_inotify_init, 0, 0}, /* 253 */
+ {"inotify_add_watch", lx_inotify_add_watch, 0, 3}, /* 254 */
+ {"inotify_rm_watch", lx_inotify_rm_watch, 0, 2}, /* 255 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 256 */
+ {"openat", lx_openat, 0, 4}, /* 257 */
+ {"mkdirat", lx_mkdirat, 0, 3}, /* 258 */
+ {"mknodat", lx_mknodat, 0, 4}, /* 259 */
+ {"fchownat", lx_fchownat, 0, 5}, /* 260 */
+ {"futimesat", lx_futimesat, 0, 3}, /* 261 */
+ {"fstatat64", lx_fstatat64, 0, 4}, /* 262 */
+ {"unlinkat", lx_unlinkat, 0, 3}, /* 263 */
+ {"renameat", lx_renameat, 0, 4}, /* 264 */
+ {"linkat", lx_linkat, 0, 5}, /* 265 */
+ {"symlinkat", lx_symlinkat, 0, 3}, /* 266 */
+ {"readlinkat", lx_readlinkat, 0, 4}, /* 267 */
+ {"fchmodat", lx_fchmodat, 0, 4}, /* 268 */
+ {"faccessat", lx_faccessat, 0, 4}, /* 269 */
+ {"pselect6", lx_pselect6, 0, 6}, /* 270 */
+ {"ppoll", lx_ppoll, 0, 5}, /* 271 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 272 */
+ {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 273 */
+ {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 274 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 277 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"utimensat", lx_utimensat, 0, 4}, /* 280 */
+ {"epoll_pwait", lx_epoll_pwait, 0, 5}, /* 281 */
+ {"signalfd", NULL, NOSYS_NULL, 0}, /* 282 */
+ {"timerfd_create", NULL, NOSYS_NULL, 0}, /* 283 */
+ {"eventfd", NULL, NOSYS_NULL, 0}, /* 284 */
+ {"fallocate", NULL, NOSYS_NULL, 0}, /* 285 */
+ {"timerfd_settime", NULL, NOSYS_NULL, 0}, /* 286 */
+ {"timerfd_gettime", NULL, NOSYS_NULL, 0}, /* 287 */
+ {"accept4", lx_accept4, 0, 4}, /* 288 */
+ {"signalfd4", NULL, NOSYS_NULL, 0}, /* 289 */
+ {"eventfd2", NULL, NOSYS_NULL, 0}, /* 290 */
+ {"epoll_create1", lx_epoll_create1, 0, 1}, /* 291 */
+ {"dup3", lx_dup3, 0, 3}, /* 292 */
+ {"pipe2", lx_pipe2, 0, 2}, /* 293 */
+ {"inotify_init1", lx_inotify_init1, 0, 1}, /* 294 */
+ {"preadv", NULL, NOSYS_NULL, 0}, /* 295 */
+ {"pwritev", NULL, NOSYS_NULL, 0}, /* 296 */
+ {"rt_tgsigqueueinfo", lx_rt_tgsigqueueinfo, 0, 4}, /* 297 */
+ {"perf_event_open", NULL, NOSYS_NULL, 0}, /* 298 */
+ {"recvmmsg", NULL, NOSYS_NULL, 0}, /* 299 */
+ {"fanotify_init", NULL, NOSYS_NULL, 0}, /* 300 */
+ {"fanotify_mark", NULL, NOSYS_NULL, 0}, /* 301 */
+ {"prlimit64", lx_prlimit64, 0, 4}, /* 302 */
+ {"name_to_handle_at", NULL, NOSYS_NULL, 0}, /* 303 */
+ {"open_by_handle_at", NULL, NOSYS_NULL, 0}, /* 304 */
+ {"clock_adjtime", NULL, NOSYS_NULL, 0}, /* 305 */
+ {"syncfs", NULL, NOSYS_NULL, 0}, /* 306 */
+ {"sendmmsg", NULL, NOSYS_NULL, 0}, /* 307 */
+ {"setns", NULL, NOSYS_NULL, 0}, /* 309 */
+ {"getcpu", lx_getcpu, 0, 3}, /* 309 */
+ {"process_vm_readv", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 311 */
+ {"kcmp", NULL, NOSYS_NULL, 0}, /* 312 */
+ {"finit_module", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 314 */
+ {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"renameat2", NULL, NOSYS_NULL, 0}, /* 316 */
+
+ /* XXX TBD gap then x32 syscalls from 512 - 544 */
+};
+
+#else
+/* The following is the 32-bit syscall table */
+
+static struct lx_sysent sysents[] = {
+ {"nosys", NULL, NOSYS_NONE, 0}, /* 0 */
+ {"exit", lx_exit, 0, 1}, /* 1 */
+ {"fork", lx_fork, 0, 0}, /* 2 */
+ {"read", LX_IKE(read), LX_SYS_IKE, 3}, /* 3 */
+ {"write", lx_write, 0, 3}, /* 4 */
+ {"open", lx_open, 0, 3}, /* 5 */
+ {"close", lx_close, 0, 1}, /* 6 */
+ {"waitpid", lx_waitpid, 0, 3}, /* 7 */
+ {"creat", lx_creat, 0, 2}, /* 8 */
+ {"link", lx_link, 0, 2}, /* 9 */
+ {"unlink", lx_unlink, 0, 1}, /* 10 */
+ {"execve", lx_execve, 0, 3}, /* 11 */
+ {"chdir", lx_chdir, 0, 1}, /* 12 */
+ {"time", lx_time, 0, 1}, /* 13 */
+ {"mknod", lx_mknod, 0, 3}, /* 14 */
+ {"chmod", lx_chmod, 0, 2}, /* 15 */
+ {"lchown16", lx_lchown16, 0, 3}, /* 16 */
+ {"break", NULL, NOSYS_OBSOLETE, 0}, /* 17 */
+ {"stat", NULL, NOSYS_OBSOLETE, 0}, /* 18 */
+ {"lseek", lx_lseek, 0, 3}, /* 19 */
+ {"getpid", lx_getpid, 0, 0}, /* 20 */
+ {"mount", lx_mount, 0, 5}, /* 21 */
+ {"umount", lx_umount, 0, 1}, /* 22 */
+ {"setuid16", lx_setuid16, 0, 1}, /* 23 */
+ {"getuid16", lx_getuid16, 0, 0}, /* 24 */
+ {"stime", lx_stime, 0, 1}, /* 25 */
+ {"ptrace", lx_ptrace, 0, 4}, /* 26 */
+ {"alarm", lx_alarm, 0, 1}, /* 27 */
+ {"fstat", NULL, NOSYS_OBSOLETE, 0}, /* 28 */
+ {"pause", lx_pause, 0, 0}, /* 29 */
+ {"utime", lx_utime, 0, 2}, /* 30 */
+ {"stty", NULL, NOSYS_OBSOLETE, 0}, /* 31 */
+ {"gtty", NULL, NOSYS_OBSOLETE, 0}, /* 32 */
+ {"access", lx_access, 0, 2}, /* 33 */
+ {"nice", lx_nice, 0, 1}, /* 34 */
+ {"ftime", NULL, NOSYS_OBSOLETE, 0}, /* 35 */
+ {"sync", lx_sync, 0, 0}, /* 36 */
+ {"kill", LX_IKE(kill), LX_SYS_IKE, 2}, /* 37 */
+ {"rename", lx_rename, 0, 2}, /* 38 */
+ {"mkdir", lx_mkdir, 0, 2}, /* 39 */
+ {"rmdir", lx_rmdir, 0, 1}, /* 40 */
+ {"dup", lx_dup, 0, 1}, /* 41 */
+ {"pipe", LX_IKE(pipe), LX_SYS_IKE, 1}, /* 42 */
+ {"times", lx_times, 0, 1}, /* 43 */
+ {"prof", NULL, NOSYS_OBSOLETE, 0}, /* 44 */
+ {"brk", LX_IKE(brk), LX_SYS_IKE, 1}, /* 45 */
+ {"setgid16", lx_setgid16, 0, 1}, /* 46 */
+ {"getgid16", lx_getgid16, 0, 0}, /* 47 */
+ {"signal", lx_signal, 0, 2}, /* 48 */
+ {"geteuid16", lx_geteuid16, 0, 0}, /* 49 */
+ {"getegid16", lx_getegid16, 0, 0}, /* 50 */
+ {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 51 */
+ {"umount2", lx_umount2, 0, 2}, /* 52 */
+ {"lock", NULL, NOSYS_OBSOLETE, 0}, /* 53 */
+ {"ioctl", LX_IKE(ioctl), LX_SYS_IKE, 3}, /* 54 */
+ {"fcntl", lx_fcntl, 0, 3}, /* 55 */
+ {"mpx", NULL, NOSYS_OBSOLETE, 0}, /* 56 */
+ {"setpgid", lx_setpgid, 0, 2}, /* 57 */
+ {"ulimit", NULL, NOSYS_OBSOLETE, 0}, /* 58 */
+ {"olduname", NULL, NOSYS_OBSOLETE, 0}, /* 59 */
+ {"umask", lx_umask, 0, 1}, /* 60 */
+ {"chroot", lx_chroot, 0, 1}, /* 61 */
+ {"ustat", NULL, NOSYS_OBSOLETE, 2}, /* 62 */
+ {"dup2", lx_dup2, 0, 2}, /* 63 */
+ {"getppid", LX_IKE(getppid), LX_SYS_IKE, 0}, /* 64 */
+ {"getpgrp", lx_getpgrp, 0, 0}, /* 65 */
+ {"setsid", lx_setsid, 0, 0}, /* 66 */
+ {"sigaction", lx_sigaction, 0, 3}, /* 67 */
+ {"sgetmask", NULL, NOSYS_OBSOLETE, 0}, /* 68 */
+ {"ssetmask", NULL, NOSYS_OBSOLETE, 0}, /* 69 */
+ {"setreuid16", lx_setreuid16, 0, 2}, /* 70 */
+ {"setregid16", lx_setregid16, 0, 2}, /* 71 */
+ {"sigsuspend", lx_sigsuspend, 0, 1}, /* 72 */
+ {"sigpending", lx_sigpending, 0, 1}, /* 73 */
+ {"sethostname", lx_sethostname, 0, 2}, /* 74 */
+ {"setrlimit", lx_setrlimit, 0, 2}, /* 75 */
+ {"getrlimit", lx_oldgetrlimit, 0, 2}, /* 76 */
+ {"getrusage", lx_getrusage, 0, 2}, /* 77 */
+ {"gettimeofday", lx_gettimeofday, 0, 2}, /* 78 */
+ {"settimeofday", lx_settimeofday, 0, 2}, /* 79 */
+ {"getgroups16", lx_getgroups16, 0, 2}, /* 80 */
+ {"setgroups16", lx_setgroups16, 0, 2}, /* 81 */
+ {"select", NULL, NOSYS_OBSOLETE, 0}, /* 82 */
+ {"symlink", lx_symlink, 0, 2}, /* 83 */
+ {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */
+ {"readlink", lx_readlink, 0, 3}, /* 85 */
+ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */
+ {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */
+ {"reboot", lx_reboot, 0, 4}, /* 88 */
+ {"readdir", lx_readdir, 0, 3}, /* 89 */
+ {"mmap", lx_mmap, 0, 6}, /* 90 */
+ {"munmap", lx_munmap, 0, 2}, /* 91 */
+ {"truncate", lx_truncate, 0, 2}, /* 92 */
+ {"ftruncate", lx_ftruncate, 0, 2}, /* 93 */
+ {"fchmod", lx_fchmod, 0, 2}, /* 94 */
+ {"fchown16", lx_fchown16, 0, 3}, /* 95 */
+ {"getpriority", lx_getpriority, 0, 2}, /* 96 */
+ {"setpriority", lx_setpriority, 0, 3}, /* 97 */
+ {"profil", NULL, NOSYS_NO_EQUIV, 0}, /* 98 */
+ {"statfs", lx_statfs, 0, 2}, /* 99 */
+ {"fstatfs", lx_fstatfs, 0, 2}, /* 100 */
+ {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 101 */
+ {"socketcall", lx_socketcall, 0, 2}, /* 102 */
+ {"syslog", lx_syslog, 0, 3}, /* 103 */
+ {"setitimer", lx_setitimer, 0, 3}, /* 104 */
+ {"getitimer", lx_getitimer, 0, 2}, /* 105 */
+ {"stat", lx_stat, 0, 2}, /* 106 */
+ {"lstat", lx_lstat, 0, 2}, /* 107 */
+ {"fstat", lx_fstat, 0, 2}, /* 108 */
+ {"uname", NULL, NOSYS_OBSOLETE, 0}, /* 109 */
+ {"oldiopl", NULL, NOSYS_NO_EQUIV, 0}, /* 110 */
+ {"vhangup", lx_vhangup, 0, 0}, /* 111 */
+ {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */
+ {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */
+ {"wait4", lx_wait4, 0, 4}, /* 114 */
+ {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */
+ {"sysinfo", lx_sysinfo32, 0, 1}, /* 116 */
+ {"ipc", lx_ipc, 0, 5}, /* 117 */
+ {"fsync", lx_fsync, 0, 1}, /* 118 */
+ {"sigreturn", lx_sigreturn, 0, 1}, /* 119 */
+ {"clone", lx_clone, 0, 5}, /* 120 */
+ {"setdomainname", lx_setdomainname, 0, 2}, /* 121 */
+ {"uname", lx_uname, 0, 1}, /* 122 */
+ {"modify_ldt", LX_IKE(modify_ldt), LX_SYS_IKE, 3}, /* 123 */
+ {"adjtimex", lx_adjtimex, 0, 1}, /* 124 */
+ {"mprotect", lx_mprotect, 0, 3}, /* 125 */
+ {"sigprocmask", lx_sigprocmask, 0, 3}, /* 126 */
+ {"create_module", NULL, NOSYS_KERNEL, 0}, /* 127 */
+ {"init_module", NULL, NOSYS_KERNEL, 0}, /* 128 */
+ {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 129 */
+ {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 130 */
+ {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 131 */
+ {"getpgid", lx_getpgid, 0, 1}, /* 132 */
+ {"fchdir", lx_fchdir, 0, 1}, /* 133 */
+ {"bdflush", NULL, NOSYS_KERNEL, 0}, /* 134 */
+ {"sysfs", lx_sysfs, 0, 3}, /* 135 */
+ {"personality", lx_personality, 0, 1}, /* 136 */
+ {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 137 */
+ {"setfsuid16", lx_setfsuid16, 0, 1}, /* 138 */
+ {"setfsgid16", lx_setfsgid16, 0, 1}, /* 139 */
+ {"llseek", lx_llseek, 0, 5}, /* 140 */
+ {"getdents", lx_getdents, 0, 3}, /* 141 */
+ {"select", lx_select, 0, 5}, /* 142 */
+ {"flock", lx_flock, 0, 2}, /* 143 */
+ {"msync", lx_msync, 0, 3}, /* 144 */
+ {"readv", lx_readv, 0, 3}, /* 145 */
+ {"writev", lx_writev, 0, 3}, /* 146 */
+ {"getsid", lx_getsid, 0, 1}, /* 147 */
+ {"fdatasync", lx_fdatasync, 0, 1}, /* 148 */
+ {"sysctl", lx_sysctl, 0, 1}, /* 149 */
+ {"mlock", lx_mlock, 0, 2}, /* 150 */
+ {"munlock", lx_munlock, 0, 2}, /* 151 */
+ {"mlockall", lx_mlockall, 0, 1}, /* 152 */
+ {"munlockall", lx_munlockall, 0, 0}, /* 153 */
+ {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */
+ {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */
+ {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */
+ {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */
+ {"sched_yield", lx_yield, 0, 0}, /* 158 */
+ {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */
+ {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */
+ {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */
+ {"nanosleep", lx_nanosleep, 0, 2}, /* 162 */
+ {"mremap", lx_remap, 0, 5}, /* 163 */
+ {"setresuid16", LX_IKE(setresuid16), LX_SYS_IKE, 3}, /* 164 */
+ {"getresuid16", lx_getresuid16, 0, 3}, /* 165 */
+ {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */
+ {"query_module", lx_query_module, NOSYS_KERNEL, 5}, /* 167 */
+ {"poll", lx_poll, 0, 3}, /* 168 */
+ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */
+ {"setresgid16", LX_IKE(setresgid16), LX_SYS_IKE, 3}, /* 170 */
+ {"getresgid16", lx_getresgid16, 0, 3}, /* 171 */
+ {"prctl", lx_prctl, 0, 5}, /* 172 */
+ {"rt_sigreturn", lx_rt_sigreturn, 0, 0}, /* 173 */
+ {"rt_sigaction", lx_rt_sigaction, 0, 4}, /* 174 */
+ {"rt_sigprocmask", lx_rt_sigprocmask, 0, 4}, /* 175 */
+ {"rt_sigpending", lx_rt_sigpending, 0, 2}, /* 176 */
+ {"rt_sigtimedwait", lx_rt_sigtimedwait, 0, 4}, /* 177 */
+ {"rt_sigqueueinfo", lx_rt_sigqueueinfo, 0, 3}, /* 178 */
+ {"rt_sigsuspend", lx_rt_sigsuspend, 0, 2}, /* 179 */
+ {"pread64", lx_pread64, 0, 5}, /* 180 */
+ {"pwrite64", lx_pwrite64, 0, 5}, /* 181 */
+ {"chown16", lx_chown16, 0, 3}, /* 182 */
+ {"getcwd", lx_getcwd, 0, 2}, /* 183 */
+ {"capget", lx_capget, 0, 2}, /* 184 */
+ {"capset", lx_capset, 0, 2}, /* 185 */
+ {"sigaltstack", lx_sigaltstack, 0, 2}, /* 186 */
+ {"sendfile", lx_sendfile, 0, 4}, /* 187 */
+ {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 188 */
+ {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 189 */
+ {"vfork", lx_vfork, 0, 0}, /* 190 */
+ {"getrlimit", lx_getrlimit, 0, 2}, /* 191 */
+ {"mmap2", lx_mmap2, LX_SYS_EBPARG6, 6}, /* 192 */
+ {"truncate64", lx_truncate64, 0, 3}, /* 193 */
+ {"ftruncate64", lx_ftruncate64, 0, 3}, /* 194 */
+ {"stat64", lx_stat64, 0, 2}, /* 195 */
+ {"lstat64", lx_lstat64, 0, 2}, /* 196 */
+ {"fstat64", lx_fstat64, 0, 2}, /* 197 */
+ {"lchown", lx_lchown, 0, 3}, /* 198 */
+ {"getuid", lx_getuid, 0, 0}, /* 199 */
+ {"getgid", lx_getgid, 0, 0}, /* 200 */
+ {"geteuid", lx_geteuid, 0, 0}, /* 201 */
+ {"getegid", lx_getegid, 0, 0}, /* 202 */
+ {"setreuid", lx_setreuid, 0, 0}, /* 203 */
+ {"setregid", lx_setregid, 0, 0}, /* 204 */
+ {"getgroups", lx_getgroups, 0, 2}, /* 205 */
+ {"setgroups", lx_setgroups, 0, 2}, /* 206 */
+ {"fchown", lx_fchown, 0, 3}, /* 207 */
+ {"setresuid", LX_IKE(setresuid), LX_SYS_IKE, 3}, /* 208 */
+ {"getresuid", lx_getresuid, 0, 3}, /* 209 */
+ {"setresgid", LX_IKE(setresgid), LX_SYS_IKE, 3}, /* 210 */
+ {"getresgid", lx_getresgid, 0, 3}, /* 211 */
+ {"chown", lx_chown, 0, 3}, /* 212 */
+ {"setuid", lx_setuid, 0, 1}, /* 213 */
+ {"setgid", lx_setgid, 0, 1}, /* 214 */
+ {"setfsuid", lx_setfsuid, 0, 1}, /* 215 */
+ {"setfsgid", lx_setfsgid, 0, 1}, /* 216 */
+ {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 217 */
+ {"mincore", lx_mincore, 0, 3}, /* 218 */
+ {"madvise", lx_madvise, 0, 3}, /* 219 */
+ {"getdents64", lx_getdents64, 0, 3}, /* 220 */
+ {"fcntl64", lx_fcntl64, 0, 3}, /* 221 */
+ {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 222 */
+ {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 223 */
+ {"gettid", LX_IKE(gettid), LX_SYS_IKE, 0}, /* 224 */
+ {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 225 */
+ {"setxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 226 */
+ {"lsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 227 */
+ {"fsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 228 */
+ {"getxattr", lx_xattr4, 0, 4}, /* 229 */
+ {"lgetxattr", lx_xattr4, 0, 4}, /* 230 */
+ {"fgetxattr", lx_xattr4, 0, 4}, /* 231 */
+ {"listxattr", lx_xattr3, 0, 3}, /* 232 */
+ {"llistxattr", lx_xattr3, 0, 3}, /* 233 */
+ {"flistxattr", lx_xattr3, 0, 3}, /* 234 */
+ {"removexattr", lx_xattr2, 0, 2}, /* 235 */
+ {"lremovexattr", lx_xattr2, 0, 2}, /* 236 */
+ {"fremovexattr", lx_xattr2, 0, 2}, /* 237 */
+ {"tkill", LX_IKE(tkill), LX_SYS_IKE, 2}, /* 238 */
+ {"sendfile64", lx_sendfile64, 0, 4}, /* 239 */
+ {"futex", LX_IKE(futex), LX_SYS_IKE | LX_SYS_EBPARG6, 6}, /* 240 */
+ {"sched_setaffinity", lx_sched_setaffinity, 0, 3}, /* 241 */
+ {"sched_getaffinity", lx_sched_getaffinity, 0, 3}, /* 242 */
+ {"set_thread_area", LX_IKE(set_thread_area), LX_SYS_IKE, 1}, /* 243 */
+ {"get_thread_area", LX_IKE(get_thread_area), LX_SYS_IKE, 1}, /* 244 */
+ {"io_setup", NULL, NOSYS_NO_EQUIV, 0}, /* 245 */
+ {"io_destroy", NULL, NOSYS_NO_EQUIV, 0}, /* 246 */
+ {"io_getevents", NULL, NOSYS_NO_EQUIV, 0}, /* 247 */
+ {"io_submit", NULL, NOSYS_NO_EQUIV, 0}, /* 248 */
+ {"io_cancel", NULL, NOSYS_NO_EQUIV, 0}, /* 249 */
+ {"fadvise64", lx_fadvise64, 0, 4}, /* 250 */
+ {"nosys", NULL, 0, 0}, /* 251 */
+ {"group_exit", lx_group_exit, 0, 1}, /* 252 */
+ {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 253 */
+ {"epoll_create", lx_epoll_create, 0, 1}, /* 254 */
+ {"epoll_ctl", lx_epoll_ctl, 0, 4}, /* 255 */
+ {"epoll_wait", lx_epoll_wait, 0, 4}, /* 256 */
+ {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 257 */
+ {"set_tid_address", LX_IKE(set_tid_address), LX_SYS_IKE, 1}, /* 258 */
+ {"timer_create", lx_timer_create, 0, 3}, /* 259 */
+ {"timer_settime", lx_timer_settime, 0, 4}, /* 260 */
+ {"timer_gettime", lx_timer_gettime, 0, 2}, /* 261 */
+ {"timer_getoverrun", lx_timer_getoverrun, 0, 1}, /* 262 */
+ {"timer_delete", lx_timer_delete, 0, 1}, /* 263 */
+ {"clock_settime", lx_clock_settime, 0, 2}, /* 264 */
+ {"clock_gettime", lx_clock_gettime, 0, 2}, /* 265 */
+ {"clock_getres", lx_clock_getres, 0, 2}, /* 266 */
+ {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */
+ {"statfs64", lx_statfs64, 0, 2}, /* 268 */
+ {"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */
+ {"tgkill", LX_IKE(tgkill), LX_SYS_IKE, 3}, /* 270 */
+
+ /* The following system calls only exist in kernel 2.6 and greater */
+ {"utimes", lx_utimes, 0, 2}, /* 271 */
+ {"fadvise64_64", lx_fadvise64_64, 0, 4}, /* 272 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */
+ {"waitid", lx_waitid, 0, 4}, /* 284 */
+ {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */
+ {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 289 */
+ {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 290 */
+ {"inotify_init", lx_inotify_init, 0, 0}, /* 291 */
+ {"inotify_add_watch", lx_inotify_add_watch, 0, 3}, /* 292 */
+ {"inotify_rm_watch", lx_inotify_rm_watch, 0, 2}, /* 293 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */
+ {"openat", lx_openat, 0, 4}, /* 295 */
+ {"mkdirat", lx_mkdirat, 0, 3}, /* 296 */
+ {"mknodat", lx_mknodat, 0, 4}, /* 297 */
+ {"fchownat", lx_fchownat, 0, 5}, /* 298 */
+ {"futimesat", lx_futimesat, 0, 3}, /* 299 */
+ {"fstatat64", lx_fstatat64, 0, 4}, /* 300 */
+ {"unlinkat", lx_unlinkat, 0, 3}, /* 301 */
+ {"renameat", lx_renameat, 0, 4}, /* 302 */
+ {"linkat", lx_linkat, 0, 5}, /* 303 */
+ {"symlinkat", lx_symlinkat, 0, 3}, /* 304 */
+ {"readlinkat", lx_readlinkat, 0, 4}, /* 305 */
+ {"fchmodat", lx_fchmodat, 0, 4}, /* 306 */
+ {"faccessat", lx_faccessat, 0, 4}, /* 307 */
+ {"pselect6", lx_pselect6, LX_SYS_EBPARG6, 6}, /* 308 */
+ {"ppoll", lx_ppoll, 0, 5}, /* 309 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 311 */
+ {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 312 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 314 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */
+ {"getcpu", lx_getcpu, 0, 3}, /* 318 */
+ {"epoll_pwait", lx_epoll_pwait, 0, 5}, /* 319 */
+ {"utimensat", lx_utimensat, 0, 4}, /* 320 */
+ {"signalfd", NULL, NOSYS_NULL, 0}, /* 321 */
+ {"timerfd_create", NULL, NOSYS_NULL, 0}, /* 322 */
+ {"eventfd", NULL, NOSYS_NULL, 0}, /* 323 */
+ {"fallocate", NULL, NOSYS_NULL, 0}, /* 324 */
+ {"timerfd_settime", NULL, NOSYS_NULL, 0}, /* 325 */
+ {"timerfd_gettime", NULL, NOSYS_NULL, 0}, /* 326 */
+ {"signalfd4", NULL, NOSYS_NULL, 0}, /* 327 */
+ {"eventfd2", NULL, NOSYS_NULL, 0}, /* 328 */
+ {"epoll_create1", lx_epoll_create1, 0, 1}, /* 329 */
+ {"dup3", lx_dup3, 0, 3}, /* 330 */
+ {"pipe2", lx_pipe2, 0, 2}, /* 331 */
+ {"inotify_init1", lx_inotify_init1, 0, 1}, /* 332 */
+ {"preadv", NULL, NOSYS_NULL, 0}, /* 333 */
+ {"pwritev", NULL, NOSYS_NULL, 0}, /* 334 */
+ {"rt_tgsigqueueinfo", lx_rt_tgsigqueueinfo, 0, 4}, /* 335 */
+ {"perf_event_open", NULL, NOSYS_NULL, 0}, /* 336 */
+ {"recvmmsg", NULL, NOSYS_NULL, 0}, /* 337 */
+ {"fanotify_init", NULL, NOSYS_NULL, 0}, /* 338 */
+ {"fanotify_mark", NULL, NOSYS_NULL, 0}, /* 339 */
+ {"prlimit64", lx_prlimit64, 0, 4}, /* 340 */
+ {"name_to_handle_at", NULL, NOSYS_NULL, 0}, /* 341 */
+ {"open_by_handle_at", NULL, NOSYS_NULL, 0}, /* 342 */
+ {"clock_adjtime", NULL, NOSYS_NULL, 0}, /* 343 */
+ {"syncfs", NULL, NOSYS_NULL, 0}, /* 344 */
+ {"sendmmsg", NULL, NOSYS_NULL, 0}, /* 345 */
+ {"setns", NULL, NOSYS_NULL, 0}, /* 346 */
+ {"process_vm_readv", NULL, NOSYS_NULL, 0}, /* 347 */
+ {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 348 */
+ {"kcmp", NULL, NOSYS_NULL, 0}, /* 349 */
+ {"finit_module", NULL, NOSYS_NULL, 0}, /* 350 */
+ {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 351 */
+ {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 352 */
+};
+#endif
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c
new file mode 100644
index 0000000000..02bfe48e01
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c
@@ -0,0 +1,1033 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The BrandZ Linux thunking server.
+ *
+ * The interfaces defined in this file form the server side of a bridge
+ * to allow native solaris process to access Linux services. Currently
+ * the Linux services that is made accessible by these interfaces here
+ * are:
+ * - Linux host <-> address naming services
+ * - Linux service <-> port naming services
+ * - Linux syslog
+ *
+ * Access to all these services is provided through a doors server.
+ * Currently the only client of these interfaces and the process that
+ * initially starts up the doors server is lx_thunk.so.
+ *
+ * lx_thunk.so is a native solaris library that is loaded into native
+ * solaris process that need to run inside a Linux zone and have access
+ * to Linux services. When lx_thunk.so receives a request that requires
+ * accessing Linux services it creates a "thunk server" process by
+ * forking and executing the following shell script (which runs as
+ * a branded /bin/sh Linux process):
+ * /native/usr/lib/brand/lx/lx_thunk
+ *
+ * The first and only thing this shell script attempts to do is re-exec
+ * itself. The brand library will detect when this script attempts to
+ * re-exec itself and take control of the process. The exec() system
+ * call made by the Linux shell will never return.
+ *
+ * At this point the process becomes a "thunk server" process.
+ * The first thing it does is a bunch of initialization:
+ *
+ * - Sanity check that a file descriptor based communication mechanism
+ * needed talk to the parent process is correctly initialized.
+ *
+ * - Verify that two predetermined file descriptors are FIFOs.
+ * These FIFOs will be used to establish communications with
+ * the client program that spawned us and which will be sending
+ * us requests.
+ *
+ * - Use existing debugging libraries (libproc.so, librtld_db.so,
+ * and the BrandZ lx plug-in to librtld_db.so) and /native/proc to
+ * walk the Linux link maps in our own address space to determine
+ * the address of the Linux dlsym() function.
+ *
+ * - Use the native Linux dlsym() function to look up other symbols
+ * (for both functions and variables) that we will need access
+ * to service thunking requests.
+ *
+ * - Create a doors server and notify the parent process that we
+ * are ready to service requests.
+ *
+ * - Enter a service loop and wait for requests.
+ *
+ * At this point the lx_thunk process is ready to service door
+ * based requests. When door service request is received the
+ * following happens inside the lx_thunk process:
+ *
+ * - The doors server function is is invoked on a new solaris thread
+ * that the kernel injects into the lx_thunk process. We sanity
+ * check the incoming request, place it on a service queue, and
+ * wait for notification that the request has been completed.
+ *
+ * - A Linux thread takes this request off the service queue
+ * and dispatches it to a service function that will:
+ * - Decode the request.
+ * - Handle the request by invoking native Linux interfaces.
+ * - Encode the results for the request.
+ *
+ * - The Linux thread then notifies the requesting doors server
+ * thread that the request has been completed and goes to sleep
+ * until it receives another request.
+ *
+ * - the solaris door server thread returns the results of the
+ * operation to the caller.
+ *
+ * Notes:
+ *
+ * - The service request hand off operation from the solaris doors thread to
+ * the "Linux thread" is required because only "Linux threads" can call
+ * into Linux code. In this context a "Linux thread" is a thread that
+ * is either the initial thread of a Linux process or a thread that was
+ * created by calling the Linux version of thread_create(). The reason
+ * for this restriction is that any thread that invokes Linux code needs
+ * to have been initialized in the Linux threading libraries and have
+ * things like Linux thread local storage properly setup.
+ *
+ * But under solaris all door server threads are created and destroyed
+ * dynamically. This means that when a doors server function is invoked,
+ * it is invoked via a thread that hasn't been initialized in the Linux
+ * environment and there for can't call directly into Linux code.
+ *
+ * - Currently when a thunk server process is starting up, it communicated
+ * with it's parent via two FIFOs. These FIFOs are setup by the
+ * lx_thunk.so library. After creating the FIFOs and starting the lx_thunk
+ * server, lx_thunk.so writes the name of the file that the door should
+ * be attached to to the first pipe. The lx_thunk server reads in this
+ * value, initialized the server, fattach()s it to the file request by
+ * lx_thunk.so and does a write to the second FIFO to let lx_thunk.so
+ * know that the server is ready to take requests.
+ *
+ * This negotiation could be simplified to use only use one FIFO.
+ * lx_thunk.so would attempt to read from the FIFO and the lx_thunk
+ * server process could send the new door server file descriptor
+ * to this process via an I_SENDFD ioctl (see streamio.7I).
+ *
+ * - The lx_thunk server process will exit when the client process
+ * that it's handling requests for exists. (ie, when there are no
+ * more open file handles to the doors server.)
+ */
+
+#include <assert.h>
+#include <door.h>
+#include <errno.h>
+#include <libproc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/varargs.h>
+#include <thread.h>
+#include <unistd.h>
+
+/*
+ * Generic interfaces used for looking up and calling Linux functions.
+ */
+typedef struct __lx_handle_dlsym *lx_handle_dlsym_t;
+typedef struct __lx_handle_sym *lx_handle_sym_t;
+
+uintptr_t lx_call0(lx_handle_sym_t);
+uintptr_t lx_call1(lx_handle_sym_t, uintptr_t);
+uintptr_t lx_call2(lx_handle_sym_t, uintptr_t, uintptr_t);
+uintptr_t lx_call3(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call4(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+uintptr_t lx_call5(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+uintptr_t lx_call6(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call7(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call8(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+/*
+ * Flag indicating if this process is destined to become a thunking
+ * server process.
+ */
+static int lxt_server_processes = 0;
+
+/*
+ * Linux function call defines and handles.
+ */
+static lx_handle_dlsym_t lxh_init = NULL;
+
+#define LXTH_GETHOSTBYNAME_R 0
+#define LXTH_GETHOSTBYADDR_R 1
+#define LXTH_GETSERVBYNAME_R 2
+#define LXTH_GETSERVBYPORT_R 3
+#define LXTH_OPENLOG 4
+#define LXTH_SYSLOG 5
+#define LXTH_CLOSELOG 6
+#define LXTH_PROGNAME 7
+
+static struct lxt_handles {
+ int lxth_index;
+ char *lxth_name;
+ lx_handle_sym_t lxth_handle;
+} lxt_handles[] = {
+ { LXTH_GETHOSTBYNAME_R, "gethostbyname_r", NULL },
+ { LXTH_GETHOSTBYADDR_R, "gethostbyaddr_r", NULL },
+ { LXTH_GETSERVBYNAME_R, "getservbyname_r", NULL },
+ { LXTH_GETSERVBYPORT_R, "getservbyport_r", NULL },
+ { LXTH_OPENLOG, "openlog", NULL },
+ { LXTH_SYSLOG, "syslog", NULL },
+ { LXTH_CLOSELOG, "closelog", NULL },
+ { LXTH_PROGNAME, "__progname", NULL },
+ { -1, NULL, NULL },
+};
+
+/*
+ * Door server operations dispatch functions and table.
+ *
+ * When the doors server get's a request for a particlar operation
+ * this dispatch table controls what function will be invoked to
+ * service the request. The function is invoked via Linux thread
+ * so that it can call into native Linux code if necessary.
+ */
+static void lxt_server_gethost(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_getserv(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_openlog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_syslog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_closelog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+
+typedef void (*lxt_op_func_t)(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+
+static struct lxt_operations {
+ int lxto_index;
+ lxt_op_func_t lxto_fp;
+} lxt_operations[] = {
+ { LXT_SERVER_OP_PING, NULL },
+ { LXT_SERVER_OP_NAME2HOST, lxt_server_gethost },
+ { LXT_SERVER_OP_ADDR2HOST, lxt_server_gethost },
+ { LXT_SERVER_OP_NAME2SERV, lxt_server_getserv },
+ { LXT_SERVER_OP_PORT2SERV, lxt_server_getserv },
+ { LXT_SERVER_OP_OPENLOG, lxt_server_openlog },
+ { LXT_SERVER_OP_SYSLOG, lxt_server_syslog },
+ { LXT_SERVER_OP_CLOSELOG, lxt_server_closelog },
+};
+
+/*
+ * Structures for passing off requests from doors threads (which are
+ * solaris threads) to a Linux thread that that can handle them.
+ */
+typedef struct lxt_req {
+ lxt_server_arg_t *lxtr_request;
+ size_t lxtr_request_size;
+ char *lxtr_result;
+ size_t lxtr_result_size;
+ int lxtr_complete;
+ cond_t lxtr_complete_cv;
+} lxt_req_t;
+
+static mutex_t lxt_req_lock = DEFAULTMUTEX;
+static cond_t lxt_req_cv = DEFAULTCV;
+static lxt_req_t *lxt_req_ptr = NULL;
+
+static mutex_t lxt_pid_lock = DEFAULTMUTEX;
+static pid_t lxt_pid = NULL;
+
+/*
+ * Interfaces used to call from lx_brand.so into Linux code.
+ */
+typedef struct lookup_cb_arg {
+ struct ps_prochandle *lca_ph;
+ caddr_t lca_ptr;
+} lookup_cb_arg_t;
+
+static int
+/*ARGSUSED*/
+lookup_cb(void *data, const prmap_t *pmp, const char *object)
+{
+ lookup_cb_arg_t *lcap = (lookup_cb_arg_t *)data;
+ prsyminfo_t si;
+ GElf_Sym sym;
+
+ if (Pxlookup_by_name(lcap->lca_ph,
+ LM_ID_BASE, object, "dlsym", &sym, &si) != 0)
+ return (0);
+
+ if (sym.st_shndx == SHN_UNDEF)
+ return (0);
+
+ /*
+ * XXX: we should be more paranoid and verify that the symbol
+ * we just looked up is libdl.so.2`dlsym
+ */
+ lcap->lca_ptr = (caddr_t)(uintptr_t)sym.st_value;
+ return (1);
+}
+
+lx_handle_dlsym_t
+lx_call_init(void)
+{
+ struct ps_prochandle *ph;
+ lookup_cb_arg_t lca;
+ extern int __libc_threaded;
+ int err;
+
+ lx_debug("lx_call_init(): looking up Linux dlsym");
+
+ /*
+ * The handle is really the address of the Linux "dlsym" function.
+ * Once we have this address we can call into the Linux "dlsym"
+ * function to lookup other functions. It's the initial lookup
+ * of "dlsym" that's difficult. To do this we'll leverage the
+ * brand support that we added to librtld_db. We're going
+ * to fire up a seperate native solaris process that will
+ * attach to us via libproc/librtld_db and lookup the symbol
+ * for us.
+ */
+
+ /* Make sure we're single threaded. */
+ if (__libc_threaded) {
+ lx_debug("lx_call_init() fail: "
+ "process must be single threaded");
+ return (NULL);
+ }
+
+ /* Tell libproc.so where the real procfs is mounted. */
+ Pset_procfs_path("/native/proc");
+
+ /* Tell librtld_db.so where the real /native is */
+ (void) rd_ctl(RD_CTL_SET_HELPPATH, "/native");
+
+ /* Grab ourselves but don't stop ourselves. */
+ if ((ph = Pgrab(getpid(),
+ PGRAB_FORCE | PGRAB_RDONLY | PGRAB_NOSTOP, &err)) == NULL) {
+ lx_debug("lx_call_init() fail: Pgrab failed: %s",
+ Pgrab_error(err));
+ return (NULL);
+ }
+
+ lca.lca_ph = ph;
+ lca.lca_ptr = NULL;
+ if (Pobject_iter(ph, lookup_cb, &lca) == -1) {
+ lx_debug("lx_call_init() fail: couldn't find Linux dlsym");
+ return (NULL);
+ }
+
+ lx_debug("lx_call_init(): Linux dlsym = 0x%p", lca.lca_ptr);
+ return ((lx_handle_dlsym_t)lca.lca_ptr);
+}
+
+#define LX_RTLD_DEFAULT ((void *)0)
+#define LX_RTLD_NEXT ((void *) -1l)
+
+lx_handle_sym_t
+lx_call_dlsym(lx_handle_dlsym_t lxh_dlsym, const char *str)
+{
+ lx_handle_sym_t result;
+ lx_debug("lx_call_dlsym: calling Linux dlsym for: %s", str);
+ result = (lx_handle_sym_t)lx_call2((lx_handle_sym_t)lxh_dlsym,
+ (uintptr_t)LX_RTLD_DEFAULT, (uintptr_t)str);
+ lx_debug("lx_call_dlsym: Linux sym: \"%s\" = 0x%p", str, result);
+ return (result);
+}
+
+static uintptr_t
+/*ARGSUSED*/
+lx_call(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2,
+ uintptr_t p3, uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7,
+ uintptr_t p8)
+{
+ typedef uintptr_t (*fp8_t)(uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ lx_regs_t *rp;
+ uintptr_t ret;
+ fp8_t lx_funcp = (fp8_t)lx_ch;
+#if defined(_ILP32)
+ long cur_gs;
+#endif
+
+ rp = lx_syscall_regs();
+
+#if defined(_ILP32)
+ lx_debug("lx_call: loading Linux gs, rp = 0x%p, gs = 0x%p",
+ rp, rp->lxr_gs);
+ lx_swap_gs(rp->lxr_gs, &cur_gs);
+#endif
+
+ lx_debug("lx_call: calling to Linux code at 0x%p", lx_ch);
+ ret = lx_funcp(p1, p2, p3, p4, p5, p6, p7, p8);
+
+#if defined(_ILP32)
+ lx_swap_gs(cur_gs, &rp->lxr_gs);
+#endif
+
+ lx_debug("lx_call: returned from Linux code at 0x%p (%p)", lx_ch, ret);
+ return (ret);
+}
+
+uintptr_t
+lx_call0(lx_handle_sym_t lx_ch)
+{
+ return (lx_call(lx_ch, 0, 0, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call1(lx_handle_sym_t lx_ch, uintptr_t p1)
+{
+ return (lx_call(lx_ch, p1, 0, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call2(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2)
+{
+ return (lx_call(lx_ch, p1, p2, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call3(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lx_call(lx_ch, p1, p2, p3, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call4(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call5(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, 0, 0, 0));
+}
+
+uintptr_t
+lx_call6(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, 0, 0));
+}
+
+uintptr_t
+lx_call7(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, 0));
+}
+
+uintptr_t
+lx_call8(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, uintptr_t p8)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, p8));
+}
+
+/*
+ * Linux Thunking Interfaces - Server Side
+ */
+static int
+lxt_gethost_arg_check(lxt_gethost_arg_t *x, int x_size)
+{
+ if (x_size != sizeof (*x) + x->lxt_gh_buf_len - 1)
+ return (-1);
+
+ if ((x->lxt_gh_token_len < 0) || (x->lxt_gh_buf_len < 0))
+ return (-1);
+
+ /* Token and buf should use up all the storage. */
+ if ((x->lxt_gh_token_len + x->lxt_gh_buf_len) != x->lxt_gh_storage_len)
+ return (-1);
+
+ return (0);
+}
+
+static void
+lxt_server_gethost(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_gethost_arg_t *data;
+ struct hostent *result, *rv;
+ int token_len, buf_len, type, data_size, i;
+ char *token, *buf;
+ int h_errnop;
+
+ assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) ||
+ (request->lxt_sa_op == LXT_SERVER_OP_ADDR2HOST));
+
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request) - 1;
+
+ if (!lxt_gethost_arg_check(data, data_size)) {
+ lx_debug("lxt_server_gethost: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Unpack the arguments. */
+ type = data->lxt_gh_type;
+ token = &data->lxt_gh_storage[0];
+ token_len = data->lxt_gh_token_len;
+ result = &data->lxt_gh_result;
+ buf = &data->lxt_gh_storage[data->lxt_gh_token_len];
+ buf_len = data->lxt_gh_buf_len - data->lxt_gh_token_len;
+
+ if (request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) {
+ (void) lx_call6(lxt_handles[LXTH_GETHOSTBYNAME_R].lxth_handle,
+ (uintptr_t)token, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv,
+ (uintptr_t)&h_errnop);
+ } else {
+ (void) lx_call8(lxt_handles[LXTH_GETHOSTBYADDR_R].lxth_handle,
+ (uintptr_t)token, token_len, type, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv,
+ (uintptr_t)&h_errnop);
+ }
+
+ if (rv == NULL) {
+ /* the lookup failed */
+ request->lxt_sa_success = 0;
+ request->lxt_sa_errno = errno;
+ data->lxt_gh_h_errno = h_errnop;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+ return;
+ }
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ data->lxt_gh_h_errno = 0;
+
+ /*
+ * The result structure that we would normally return contains a
+ * bunch of pointers, but those pointers are useless to our caller
+ * since they are in a different address space. So before returning
+ * we'll convert all the result pointers into offsets. The caller
+ * can then map the offsets back into pointers.
+ */
+ for (i = 0; result->h_aliases[i] != NULL; i++) {
+ result->h_aliases[i] =
+ LXT_PTR_TO_OFFSET(result->h_aliases[i], buf);
+ }
+ for (i = 0; result->h_addr_list[i] != NULL; i++) {
+ result->h_addr_list[i] =
+ LXT_PTR_TO_OFFSET(result->h_addr_list[i], buf);
+ }
+ result->h_name = LXT_PTR_TO_OFFSET(result->h_name, buf);
+ result->h_aliases = LXT_PTR_TO_OFFSET(result->h_aliases, buf);
+ result->h_addr_list = LXT_PTR_TO_OFFSET(result->h_addr_list, buf);
+
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static int
+lxt_getserv_arg_check(lxt_getserv_arg_t *x, int x_size)
+{
+ if (x_size != sizeof (*x) + x->lxt_gs_buf_len - 1)
+ return (-1);
+
+ if ((x->lxt_gs_token_len < 0) || (x->lxt_gs_buf_len < 0))
+ return (-1);
+
+ /* Token and buf should use up all the storage. */
+ if ((x->lxt_gs_token_len + x->lxt_gs_buf_len) != x->lxt_gs_storage_len)
+ return (-1);
+
+ return (0);
+}
+
+static void
+lxt_server_getserv(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_getserv_arg_t *data;
+ struct servent *result, *rv;
+ int token_len, buf_len, data_size, i, port;
+ char *token, *buf, *proto = NULL;
+
+ assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) ||
+ (request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV));
+
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request) - 1;
+
+ if (!lxt_getserv_arg_check(data, data_size)) {
+ lx_debug("lxt_server_getserv: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Unpack the arguments. */
+ token = &data->lxt_gs_storage[0];
+ token_len = data->lxt_gs_token_len;
+ result = &data->lxt_gs_result;
+ buf = &data->lxt_gs_storage[data->lxt_gs_token_len];
+ buf_len = data->lxt_gs_buf_len - data->lxt_gs_token_len;
+ if (strlen(data->lxt_gs_proto) > 0)
+ proto = data->lxt_gs_proto;
+
+ /* Do more sanity checks */
+ if ((request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV) &&
+ (token_len != sizeof (int))) {
+ lx_debug("lxt_server_getserv: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ if (request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) {
+ (void) lx_call6(lxt_handles[LXTH_GETSERVBYNAME_R].lxth_handle,
+ (uintptr_t)token, (uintptr_t)proto, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv);
+ } else {
+ bcopy(token, &port, sizeof (int));
+ (void) lx_call6(lxt_handles[LXTH_GETSERVBYPORT_R].lxth_handle,
+ port, (uintptr_t)proto, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv);
+ }
+
+ if (rv == NULL) {
+ /* the lookup failed */
+ request->lxt_sa_success = 0;
+ request->lxt_sa_errno = errno;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+ return;
+ }
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+
+ /*
+ * The result structure that we would normally return contains a
+ * bunch of pointers, but those pointers are useless to our caller
+ * since they are in a different address space. So before returning
+ * we'll convert all the result pointers into offsets. The caller
+ * can then map the offsets back into pointers.
+ */
+ for (i = 0; result->s_aliases[i] != NULL; i++) {
+ result->s_aliases[i] =
+ LXT_PTR_TO_OFFSET(result->s_aliases[i], buf);
+ }
+ result->s_proto = LXT_PTR_TO_OFFSET(result->s_proto, buf);
+ result->s_aliases = LXT_PTR_TO_OFFSET(result->s_aliases, buf);
+ result->s_name = LXT_PTR_TO_OFFSET(result->s_name, buf);
+
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_openlog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_openlog_arg_t *data;
+ int data_size;
+ static char ident[128];
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_OPENLOG);
+
+ /*LINTED*/
+ data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request);
+
+ if (data_size != sizeof (*data)) {
+ lx_debug("lxt_server_openlog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /*
+ * Linux expects that the ident pointer passed to openlog()
+ * points to a static string that won't go away. Linux
+ * saves the pointer and references with syslog() is called.
+ * Hence we'll make a local copy of the ident string here.
+ */
+ (void) mutex_lock(&lxt_pid_lock);
+ (void) strlcpy(ident, data->lxt_ol_ident, sizeof (ident));
+ (void) mutex_unlock(&lxt_pid_lock);
+
+ /* Call Linx openlog(). */
+ (void) lx_call3(lxt_handles[LXTH_OPENLOG].lxth_handle,
+ (uintptr_t)ident, data->lxt_ol_logopt, data->lxt_ol_facility);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_syslog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_syslog_arg_t *data;
+ int data_size;
+ char *progname_ptr_new;
+ char *progname_ptr_old;
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_SYSLOG);
+
+ /*LINTED*/
+ data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request);
+
+ if (data_size != sizeof (*data)) {
+ lx_debug("lxt_server_openlog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+ progname_ptr_new = data->lxt_sl_progname;
+
+ (void) mutex_lock(&lxt_pid_lock);
+
+ /*
+ * Ensure the message has the correct pid.
+ * We do this by telling our getpid() system call to return a
+ * different value.
+ */
+ lxt_pid = data->lxt_sl_pid;
+
+ /*
+ * Ensure the message has the correct program name.
+ * Normally instead of a program name an "ident" string is
+ * used, this is the string passed to openlog(). But if
+ * openlog() wasn't called before syslog() then Linux
+ * syslog() will attempt to use the program name as
+ * the ident string, and the program name is determined
+ * by looking at the __progname variable. So we'll just
+ * update the Linux __progname variable while we do the
+ * call.
+ */
+ (void) uucopy(lxt_handles[LXTH_PROGNAME].lxth_handle,
+ &progname_ptr_old, sizeof (char *));
+ (void) uucopy(&progname_ptr_new,
+ lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *));
+
+ /* Call Linux syslog(). */
+ (void) lx_call2(lxt_handles[LXTH_SYSLOG].lxth_handle,
+ data->lxt_sl_priority, (uintptr_t)data->lxt_sl_message);
+
+ /* Restore pid and program name. */
+ (void) uucopy(&progname_ptr_old,
+ lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *));
+ lxt_pid = NULL;
+
+ (void) mutex_unlock(&lxt_pid_lock);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_closelog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ int data_size;
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_CLOSELOG);
+
+ data_size = request_size - sizeof (*request);
+ if (data_size != 0) {
+ lx_debug("lxt_server_closelog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Call Linux closelog(). */
+ (void) lx_call0(lxt_handles[LXTH_CLOSELOG].lxth_handle);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server(void *cookie, char *argp, size_t request_size,
+ door_desc_t *dp, uint_t n_desc)
+{
+ /*LINTED*/
+ lxt_server_arg_t *request = (lxt_server_arg_t *)argp;
+ lxt_req_t lxt_req;
+ char *door_path = cookie;
+
+ /* Check if there's no callers left */
+ if (argp == DOOR_UNREF_DATA) {
+ (void) fdetach(door_path);
+ (void) unlink(door_path);
+ lx_debug("lxt_thunk_server: no clients, exiting");
+ exit(0);
+ }
+
+ /* Sanity check the incomming request. */
+ if (request_size < sizeof (*request)) {
+ /* the lookup failed */
+ lx_debug("lxt_thunk_server: invalid request size");
+ (void) door_return(NULL, 0, NULL, 0);
+ return;
+ }
+
+ if ((request->lxt_sa_op < LXT_SERVER_OP_MIN) ||
+ (request->lxt_sa_op > LXT_SERVER_OP_MAX)) {
+ lx_debug("lxt_thunk_server: invalid request op");
+ (void) door_return(NULL, 0, NULL, 0);
+ return;
+ }
+
+ /* Handle ping requests immediatly, return here. */
+ if (request->lxt_sa_op == LXT_SERVER_OP_PING) {
+ lx_debug("lxt_thunk_server: handling ping request");
+ request->lxt_sa_success = 1;
+ (void) door_return((char *)request, request_size, NULL, 0);
+ return;
+ }
+
+ lx_debug("lxt_thunk_server: hand off request to Linux thread, "
+ "request = 0x%p", request);
+
+ /* Pack the request up so we can pass it to a Linux thread. */
+ lxt_req.lxtr_request = request;
+ lxt_req.lxtr_request_size = request_size;
+ lxt_req.lxtr_result = NULL;
+ lxt_req.lxtr_result_size = 0;
+ lxt_req.lxtr_complete = 0;
+ (void) cond_init(&lxt_req.lxtr_complete_cv, USYNC_THREAD, NULL);
+
+ /* Pass the request onto a Linux thread. */
+ (void) mutex_lock(&lxt_req_lock);
+ while (lxt_req_ptr != NULL)
+ (void) cond_wait(&lxt_req_cv, &lxt_req_lock);
+ lxt_req_ptr = &lxt_req;
+ (void) cond_broadcast(&lxt_req_cv);
+
+ /* Wait for the request to be completed. */
+ while (lxt_req.lxtr_complete == 0)
+ (void) cond_wait(&lxt_req.lxtr_complete_cv, &lxt_req_lock);
+ assert(lxt_req_ptr != &lxt_req);
+ (void) mutex_unlock(&lxt_req_lock);
+
+ lx_debug("lxt_thunk_server: hand off request completed, "
+ "request = 0x%p", request);
+
+ /*
+ * If door_return() is successfull it never returns, so if we made
+ * it here there was some kind of error, but there's nothing we can
+ * really do about it.
+ */
+ (void) door_return(
+ lxt_req.lxtr_result, lxt_req.lxtr_result_size, NULL, 0);
+}
+
+static void
+lxt_server_loop(void)
+{
+ lxt_req_t *lxt_req;
+ lxt_server_arg_t *request;
+ size_t request_size;
+ char *door_result;
+ size_t door_result_size;
+
+ for (;;) {
+ /* Wait for a request from a doors server thread. */
+ (void) mutex_lock(&lxt_req_lock);
+ while (lxt_req_ptr == NULL)
+ (void) cond_wait(&lxt_req_cv, &lxt_req_lock);
+
+ /* We got a request, get a local pointer to it. */
+ lxt_req = lxt_req_ptr;
+ lxt_req_ptr = NULL;
+ (void) cond_broadcast(&lxt_req_cv);
+ (void) mutex_unlock(&lxt_req_lock);
+
+ /* Get a pointer to the request. */
+ request = lxt_req->lxtr_request;
+ request_size = lxt_req->lxtr_request_size;
+
+ lx_debug("lxt_server_loop: Linux thread request recieved, "
+ "request = %p", request);
+
+ /* Dispatch the request. */
+ assert((request->lxt_sa_op > LXT_SERVER_OP_PING) ||
+ (request->lxt_sa_op < LXT_SERVER_OP_MAX));
+ lxt_operations[request->lxt_sa_op].lxto_fp(
+ request, request_size, &door_result, &door_result_size);
+
+ lx_debug("lxt_server_loop: Linux thread request completed, "
+ "request = %p", request);
+
+ (void) mutex_lock(&lxt_req_lock);
+
+ /* Set the result pointers for the calling door thread. */
+ lxt_req->lxtr_result = door_result;
+ lxt_req->lxtr_result_size = door_result_size;
+
+ /* Let the door thread know we're done. */
+ lxt_req->lxtr_complete = 1;
+ (void) cond_signal(&lxt_req->lxtr_complete_cv);
+
+ (void) mutex_unlock(&lxt_req_lock);
+ }
+ /*NOTREACHED*/
+}
+
+static void
+lxt_server_enter(int fifo1_wr, int fifo2_rd)
+{
+ struct stat stat;
+ char door_path[MAXPATHLEN];
+ int i, dfd, junk = 0;
+
+ /*
+ * Do some sanity checks. Make sure we've got the fifos
+ * we need passed to us on the correct file descriptors.
+ */
+ if ((fstat(fifo1_wr, &stat) != 0) ||
+ ((stat.st_mode & S_IFMT) != S_IFIFO) ||
+ (fstat(fifo2_rd, &stat) != 0) ||
+ ((stat.st_mode & S_IFMT) != S_IFIFO)) {
+ lx_err("lx_thunk server aborting, can't contact parent");
+ exit(-1);
+ }
+
+ /*
+ * Get the initial Linux call handle so we can invoke other
+ * Linux calls.
+ */
+ lxh_init = lx_call_init();
+ if (lxh_init == NULL) {
+ lx_err("lx_thunk server aborting, failed Linux call init");
+ exit(-1);
+ }
+
+ /* Now lookup other Linux symbols we'll need access to. */
+ for (i = 0; lxt_handles[i].lxth_name != NULL; i++) {
+ assert(lxt_handles[i].lxth_index == i);
+ if ((lxt_handles[i].lxth_handle = lx_call_dlsym(lxh_init,
+ lxt_handles[i].lxth_name)) == NULL) {
+ lx_err("lx_thunk server aborting, "
+ "failed Linux symbol lookup: %s",
+ lxt_handles[i].lxth_name);
+ exit(-1);
+ }
+ }
+
+ /* get the path to the door server */
+ if (read(fifo2_rd, door_path, sizeof (door_path)) < 0) {
+ lx_err("lxt_server_enter: failed to get door path");
+ exit(-1);
+ }
+ (void) close(fifo2_rd);
+
+ /* Create the door server. */
+ if ((dfd = door_create(lxt_server, door_path,
+ DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
+ lx_err("lxt_server_enter: door_create() failed");
+ exit(-1);
+ }
+
+ /* Attach the door to a file system path. */
+ (void) fdetach(door_path);
+ if (fattach(dfd, door_path) < 0) {
+ lx_err("lxt_server_enter: fattach() failed");
+ exit(-1);
+ }
+
+ /* The door server is ready, signal this via a fifo write */
+ (void) write(fifo1_wr, &junk, 1);
+ (void) close(fifo1_wr);
+
+ lx_debug("lxt_server_enter: doors server initialized");
+ lxt_server_loop();
+ /*NOTREACHED*/
+}
+
+void
+lxt_server_exec_check(void)
+{
+ if (lxt_server_processes == 0)
+ return;
+
+ /*
+ * We're a thunk server process, so we take over control of
+ * the current Linux process here.
+ */
+ lx_debug("lx_thunk server initalization starting");
+ lxt_server_enter(LXT_SERVER_FIFO_WR_FD, LXT_SERVER_FIFO_RD_FD);
+ /*NOTREACHED*/
+}
+
+void
+lxt_server_init(int argc, char *argv[])
+{
+ /*
+ * The thunk server process is a shell script named LXT_SERVER_BINARY.
+ * It is executed without any parameters. Since it's a shell script
+ * the arguments passed to the shell's main entry point are:
+ * 1) the name of the shell
+ * 2) the name of the script to execute
+ *
+ * So to check if we're the thunk server process we first check
+ * for the expected number of arduments and then we'll look at
+ * the second parameter to see if it's LXT_SERVER_BINARY.
+ */
+ if ((argc != 2) ||
+ (strcmp(argv[1], LXT_SERVER_BINARY) != 0))
+ return;
+
+ lxt_server_processes = 1;
+ lx_debug("lx_thunk server detected, delaying initalization");
+}
+
+int
+lxt_server_pid(int *pid)
+{
+ if (lxt_server_processes == 0)
+ return (0);
+ *pid = lxt_pid;
+ return (1);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile b/usr/src/lib/brand/lx/lx_brand/common/mapfile
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mem.c b/usr/src/lib/brand/lx/lx_brand/common/mem.c
new file mode 100644
index 0000000000..cebe2ec5bb
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mem.c
@@ -0,0 +1,381 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * There are two forms of mmap, mmap() and mmap2(). The only difference is that
+ * the final argument to mmap2() specifies the number of pages, not bytes.
+ * Linux has a number of additional flags, but they are all deprecated. We also
+ * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris.
+ *
+ * The Linux mmap() returns ENOMEM in some cases where Solaris returns
+ * EOVERFLOW, so we translate the errno as necessary.
+ */
+
+int pagesize; /* needed for mmap2() */
+
+#define LX_MAP_ANONYMOUS 0x00020
+#define LX_MAP_LOCKED 0x02000
+#define LX_MAP_NORESERVE 0x04000
+#define LX_MAP_32BIT 0x00040
+
+#define LX_MADV_REMOVE 9
+#define LX_MADV_DONTFORK 10
+#define LX_MADV_DOFORK 11
+#define LX_MADV_MERGEABLE 12
+#define LX_MADV_UNMERGEABLE 13
+#define LX_MADV_HUGEPAGE 14
+#define LX_MADV_NOHUGEPAGE 15
+#define LX_MADV_DONTDUMP 16
+#define LX_MADV_DODUMP 17
+
+static int
+ltos_mmap_flags(int flags)
+{
+ int new_flags;
+
+ new_flags = flags & (MAP_TYPE | MAP_FIXED);
+
+ if (flags & LX_MAP_ANONYMOUS)
+ new_flags |= MAP_ANONYMOUS;
+ if (flags & LX_MAP_NORESERVE)
+ new_flags |= MAP_NORESERVE;
+
+#if defined(_LP64)
+ if (flags & LX_MAP_32BIT)
+ new_flags |= MAP_32BIT;
+#endif
+
+ return (new_flags);
+}
+
+static void *
+mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, off64_t p6)
+{
+ void *addr = (void *)p1;
+ size_t len = p2;
+ int prot = p3;
+ int flags = p4;
+ int fd = p5;
+ off64_t off = p6;
+ void *ret;
+
+ if (lx_debug_enabled != 0) {
+ char *path, path_buf[MAXPATHLEN];
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tmmap_common(): fd = %d - %s", fd, path);
+ }
+
+ /*
+ * Under Linux, the file descriptor is ignored when mapping zfod
+ * anonymous memory, On Solaris, we want the fd set to -1 for the
+ * same functionality.
+ */
+ if (flags & LX_MAP_ANONYMOUS)
+ fd = -1;
+
+ /*
+ * This is totally insane. The NOTES section in the linux mmap(2) man
+ * page claims that on some architectures, read protection may
+ * automatically include exec protection. It has been observed on a
+ * native linux system that the /proc/<pid>/maps file does indeed
+ * show that segments mmap'd from userland (such as libraries mapped in
+ * by the dynamic linker) all have exec the permission set, even for
+ * data segments.
+ */
+ if (prot & PROT_READ)
+ prot |= PROT_EXEC;
+
+ ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off);
+
+ if (ret == MAP_FAILED)
+ return ((void *)(long)(errno == EOVERFLOW ? -ENOMEM : -errno));
+
+ if (flags & LX_MAP_LOCKED)
+ (void) mlock(ret, len);
+
+ return (ret);
+}
+
+long
+lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ return ((ssize_t)mmap_common(p1, p2, p3, p4, p5, (off64_t)p6));
+}
+
+long
+lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ if (pagesize == 0)
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ return ((ssize_t)mmap_common(p1, p2, p3, p4, p5,
+ (off64_t)p6 * pagesize));
+}
+
+
+/*
+ * The locking family of system calls, as well as msync(), are identical. On
+ * Solaris, they are layered on top of the memcntl syscall, so they cannot be
+ * pass-thru.
+ */
+long
+lx_mlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (mlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+long
+lx_mlockall(uintptr_t flags)
+{
+ return (mlockall(flags) ? -errno : 0);
+}
+
+long
+lx_munlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (munlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+long
+lx_munlockall(void)
+{
+ return (munlockall() ? -errno : 0);
+}
+
+long
+lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags)
+{
+ return (msync((void *)addr, (size_t)len, flags) ? -errno : 0);
+}
+
+/*
+ * Illumos and Linux overlap on the basic flags, and are disjoint on the rest.
+ * Linux also allows the length to be zero, while Illumos does not.
+ */
+long
+lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice)
+{
+ int ret;
+
+ if (len == 0)
+ return (0);
+
+ /* approximately similar */
+ if (advice == LX_MADV_REMOVE)
+ advice = MADV_FREE;
+
+ switch (advice) {
+ case MADV_NORMAL:
+ case MADV_RANDOM:
+ case MADV_SEQUENTIAL:
+ case MADV_WILLNEED:
+ case MADV_DONTNEED:
+ case MADV_FREE:
+ ret = madvise((void *)start, len, advice);
+ if (ret == -1) {
+ if (errno == EBUSY)
+ return (-EINVAL);
+ return (-errno);
+ } else {
+ return (0);
+ }
+
+ /* harmless to pretend these work */
+ case LX_MADV_DONTFORK:
+ case LX_MADV_DOFORK:
+ case LX_MADV_HUGEPAGE:
+ case LX_MADV_NOHUGEPAGE:
+ case LX_MADV_DONTDUMP:
+ case LX_MADV_DODUMP:
+ return (0);
+
+ /* we'll return an error for the rest of the Linux flags */
+ default:
+ return (-EINVAL);
+ }
+}
+
+/*
+ * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN
+ * and PROT_GROWSUP, which have no equivalent on Solaris.
+ */
+#define LX_PROT_GROWSDOWN 0x01000000
+#define LX_PROT_GROWSUP 0x02000000
+
+long
+lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot)
+{
+ prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN);
+
+ return (mprotect((void *)start, len, prot) ? -errno : 0);
+}
+
+#define LX_MREMAP_MAYMOVE 1 /* mapping can be moved */
+#define LX_MREMAP_FIXED 2 /* address is fixed */
+
+long
+lx_remap(uintptr_t old_address, uintptr_t old_size,
+ uintptr_t new_size, uintptr_t flags, uintptr_t new_address)
+{
+ int prot = 0, oflags, mflags = 0, len, fd;
+ prmap_t map;
+ uintptr_t rval;
+ char path[256], buf[MAXPATHLEN + 1];
+
+ /*
+ * The kernel doesn't actually support mremap(), so to emulate it,
+ * we're going to mmap() the underlying object with the new size.
+ * We don't actually have a file descriptor (and indeed, the mapped
+ * file may not exist in any file system name space), so we'll
+ * find the path via the object's entry in /proc/self/path. There are
+ * many reasons why this might fail; generally, we'll return EINVAL,
+ * but in some cases we'll return ENOMEM.
+ */
+ if ((fd = open("/native/proc/self/map", O_RDONLY)) == -1)
+ return (-EINVAL);
+
+ do {
+ if (read(fd, &map, sizeof (map)) < sizeof (map)) {
+ /*
+ * This is either a short read or we've hit the end
+ * of the mappings. Either way, our passed mapping is
+ * invalid; return EINVAL.
+ */
+ (void) close(fd);
+ return (-EINVAL);
+ }
+ } while (map.pr_vaddr != old_address || map.pr_size != old_size);
+
+ (void) close(fd);
+
+ if (!(map.pr_mflags & MA_SHARED)) {
+ /*
+ * If this is a private mapping, we're not going to remap it.
+ */
+ return (-EINVAL);
+ }
+
+ if (map.pr_mflags & (MA_ISM | MA_SHM)) {
+ /*
+ * If this is either ISM or System V shared memory, we're not
+ * going to remap it.
+ */
+ return (-EINVAL);
+ }
+
+ if (!(flags & LX_MREMAP_MAYMOVE)) {
+ /*
+ * If we're not allowed to move this mapping, we're going to
+ * act as if we can't expand it.
+ */
+ return (-ENOMEM);
+ }
+
+ oflags = (map.pr_mflags & MA_WRITE) ? O_RDWR : O_RDONLY;
+
+ if (map.pr_mapname[0] == '\0') {
+ /*
+ * This is likely an anonymous mapping.
+ */
+ return (-EINVAL);
+ }
+
+ (void) snprintf(path, sizeof (path),
+ "/native/proc/self/path/%s", map.pr_mapname);
+
+ if ((len = readlink(path, buf, sizeof (buf))) == -1 ||
+ len == sizeof (buf)) {
+ /*
+ * If we failed to read the link, the path might not exist.
+ */
+ return (-EINVAL);
+ }
+ buf[len] = '\0';
+
+ if ((fd = open(buf, oflags)) == -1) {
+ /*
+ * If we failed to open the object, it may be because it's
+ * not named (i.e., it's anonymous) or because we somehow
+ * don't have permissions. Either way, we're going to kick
+ * it back with EINVAL.
+ */
+ return (-EINVAL);
+ }
+
+ if (map.pr_mflags & MA_WRITE)
+ prot |= PROT_WRITE;
+
+ if (map.pr_mflags & MA_READ)
+ prot |= PROT_READ;
+
+ if (map.pr_mflags & MA_EXEC)
+ prot |= PROT_EXEC;
+
+ mflags = MAP_SHARED;
+
+ if (new_address != NULL && (flags & LX_MREMAP_FIXED)) {
+ mflags |= MAP_FIXED;
+ } else {
+ new_address = NULL;
+ }
+
+ rval = (uintptr_t)mmap((void *)new_address, new_size,
+ prot, mflags, fd, map.pr_offset);
+ (void) close(fd);
+
+ if ((void *)rval == MAP_FAILED)
+ return ((long)-ENOMEM);
+
+ /*
+ * Our mapping succeeded; we're now going to rip down the old mapping.
+ */
+ (void) munmap((void *)old_address, old_size);
+
+ return (rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c
new file mode 100644
index 0000000000..7064ed64b4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c
@@ -0,0 +1,1109 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <macros.h>
+#include <sys/brand.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/lx_fcntl.h>
+#include <sys/inotify.h>
+#include <thread.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <zone.h>
+#include <priv.h>
+
+extern int sethostname(char *, int);
+
+struct lx_sysinfo {
+ int64_t si_uptime; /* Seconds since boot */
+ uint64_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint64_t si_totalram; /* Total memory size */
+ uint64_t si_freeram; /* Available memory */
+ uint64_t si_sharedram; /* Shared memory */
+ uint64_t si_bufferram; /* Buffer memory */
+ uint64_t si_totalswap; /* Total swap space */
+ uint64_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint16_t si_pad; /* Padding */
+ uint64_t si_totalhigh; /* High memory size */
+ uint64_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+};
+
+struct lx_sysinfo32 {
+ int32_t si_uptime; /* Seconds since boot */
+ uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint32_t si_totalram; /* Total memory size */
+ uint32_t si_freeram; /* Available memory */
+ uint32_t si_sharedram; /* Shared memory */
+ uint32_t si_bufferram; /* Buffer memory */
+ uint32_t si_totalswap; /* Total swap space */
+ uint32_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint16_t si_pad; /* Padding */
+ uint32_t si_totalhigh; /* High memory size */
+ uint32_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+};
+
+extern long lx_sysinfo(struct lx_sysinfo *sip);
+
+/* ARGUSED */
+long
+lx_rename(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = rename((const char *)p1, (const char *)p2);
+
+ if (ret < 0) {
+ /*
+ * If rename(2) failed and we're in install mode, return
+ * success if the the reason we failed was either because the
+ * source file didn't actually exist or if it was because we
+ * tried to rename it to be the name of a device currently in
+ * use (resulting in an EBUSY.)
+ *
+ * To help install along further, if the failure was due
+ * to an EBUSY, delete the original file so we don't leave
+ * extra files lying around.
+ */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlink((const char *)p1);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+long
+lx_renameat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2)
+{
+ int ret;
+ int atfd1 = (int)ext1;
+ int atfd2 = (int)ext2;
+
+ if (atfd1 == LX_AT_FDCWD)
+ atfd1 = AT_FDCWD;
+
+ if (atfd2 == LX_AT_FDCWD)
+ atfd2 = AT_FDCWD;
+
+ ret = renameat(atfd1, (const char *)p1, atfd2, (const char *)p2);
+
+ if (ret < 0) {
+ /* see lx_rename() for why we check lx_install */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlinkat(ext1, (const char *)p1, 0);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+long
+lx_reboot(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int magic = (int)p1;
+ int magic2 = (int)p2;
+ uint_t flag = (int)p3;
+ int rc;
+
+ if (magic != LINUX_REBOOT_MAGIC1)
+ return (-EINVAL);
+ if (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
+ magic2 != LINUX_REBOOT_MAGIC2B && magic2 != LINUX_REBOOT_MAGIC2C &&
+ magic2 != LINUX_REBOOT_MAGIC2D)
+ return (-EINVAL);
+
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ switch (flag) {
+ case LINUX_REBOOT_CMD_CAD_ON:
+ case LINUX_REBOOT_CMD_CAD_OFF:
+ /* ignored */
+ rc = 0;
+ break;
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ case LINUX_REBOOT_CMD_HALT:
+ rc = reboot(RB_HALT, NULL);
+ break;
+ case LINUX_REBOOT_CMD_RESTART:
+ case LINUX_REBOOT_CMD_RESTART2:
+ /* RESTART2 may need more work */
+ lx_msg("Restarting system.\n");
+ rc = reboot(RB_AUTOBOOT, NULL);
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ return ((rc == -1) ? -errno : rc);
+}
+
+/*
+ * getcwd() - Linux syscall semantics are slightly different; we need to return
+ * the length of the pathname copied (+ 1 for the terminating NULL byte.)
+ */
+long
+lx_getcwd(uintptr_t p1, uintptr_t p2)
+{
+ char *buf;
+ size_t buflen = (size_t)p2;
+ size_t copylen, local_len;
+ size_t len = 0;
+
+ if ((getcwd((char *)p1, (size_t)p2)) == NULL)
+ return (-errno);
+
+ /*
+ * We need the length of the pathname getcwd() copied but we never want
+ * to dereference a Linux pointer for any reason.
+ *
+ * Thus, to get the string length we will uucopy() up to copylen
+ * bytes at a time into a local buffer and will walk each chunk looking
+ * for the string-terminating NULL byte.
+ *
+ * We can use strlen() to find the length of the string in the
+ * local buffer by delimiting the buffer with a NULL byte in the
+ * last element that will never be overwritten.
+ */
+ copylen = min(buflen, MAXPATHLEN + 1);
+ buf = SAFE_ALLOCA(copylen + 1);
+ if (buf == NULL)
+ return (-ENOMEM);
+ buf[copylen] = '\0';
+
+ for (;;) {
+ if (uucopy((char *)p1 + len, buf, copylen) != 0)
+ return (-errno);
+
+ local_len = strlen(buf);
+ len += local_len;
+
+ /*
+ * If the strlen() is less than copylen, we found the
+ * real end of the string -- not the NULL byte used to
+ * delimit the end of our buffer.
+ */
+ if (local_len != copylen)
+ break;
+
+ /* prepare to check the next chunk of the string */
+ buflen -= copylen;
+ copylen = min(buflen, copylen);
+ }
+
+ return (len + 1);
+}
+
+long
+lx_uname(uintptr_t p1)
+{
+ struct lx_utsname *un = (struct lx_utsname *)p1;
+ char buf[LX_SYS_UTS_LN + 1];
+
+ if (gethostname(un->nodename, sizeof (un->nodename)) == -1)
+ return (-errno);
+
+ (void) strlcpy(un->sysname, LX_UNAME_SYSNAME, LX_SYS_UTS_LN);
+ (void) strlcpy(un->release, lx_release, LX_SYS_UTS_LN);
+ (void) strlcpy(un->version, LX_UNAME_VERSION, LX_SYS_UTS_LN);
+ (void) strlcpy(un->machine, LX_UNAME_MACHINE, LX_SYS_UTS_LN);
+ if ((sysinfo(SI_SRPC_DOMAIN, buf, LX_SYS_UTS_LN) < 0))
+ un->domainname[0] = '\0';
+ else
+ (void) strlcpy(un->domainname, buf, LX_SYS_UTS_LN);
+
+ return (0);
+}
+
+/*
+ * {get,set}groups16() - Handle the conversion between 16-bit Linux gids and
+ * 32-bit Solaris gids.
+ */
+long
+lx_getgroups16(uintptr_t p1, uintptr_t p2)
+{
+ int count = (int)p1;
+ lx_gid16_t *grouplist = (lx_gid16_t *)p2;
+ gid_t *grouplist32;
+ int ret;
+ int i;
+
+ if (count < 0)
+ return (-EINVAL);
+
+ grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t));
+ if (grouplist32 == NULL && count > 0)
+ return (-ENOMEM);
+ if ((ret = getgroups(count, grouplist32)) < 0)
+ return (-errno);
+
+ /* we must not modify the list if the incoming count was 0 */
+ if (count > 0) {
+ for (i = 0; i < ret; i++)
+ grouplist[i] = LX_GID32_TO_GID16(grouplist32[i]);
+ }
+
+ return (ret);
+}
+
+long
+lx_setgroups16(uintptr_t p1, uintptr_t p2)
+{
+ int count = (int)p1;
+ lx_gid16_t *grouplist = (lx_gid16_t *)p2;
+ gid_t *grouplist32;
+ int i;
+
+ grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t));
+ if (grouplist32 == NULL)
+ return (-ENOMEM);
+ for (i = 0; i < count; i++)
+ grouplist32[i] = LX_GID16_TO_GID32(grouplist[i]);
+
+ /* order matters here to get the correct errno back */
+ if (count > NGROUPS_MAX_DEFAULT)
+ return (-EINVAL);
+
+ return (setgroups(count, grouplist32) ? -errno : 0);
+}
+
+/*
+ * personality() - Solaris doesn't support Linux personalities, but we have to
+ * emulate enough to show that we support the basic personality.
+ */
+#define LX_PER_LINUX 0x0
+
+long
+lx_personality(uintptr_t p1)
+{
+ int per = (int)p1;
+
+ switch (per) {
+ case -1:
+ /* Request current personality */
+ return (LX_PER_LINUX);
+ case LX_PER_LINUX:
+ return (0);
+ default:
+ return (-EINVAL);
+ }
+}
+
+/*
+ * mknod() - Since we don't have the SYS_CONFIG privilege within a zone, the
+ * only mode we have to support is S_IFIFO. We also have to distinguish between
+ * an invalid type and insufficient privileges.
+ */
+#define LX_S_IFMT 0170000
+#define LX_S_IFDIR 0040000
+#define LX_S_IFCHR 0020000
+#define LX_S_IFBLK 0060000
+#define LX_S_IFREG 0100000
+#define LX_S_IFIFO 0010000
+#define LX_S_IFLNK 0120000
+#define LX_S_IFSOCK 0140000
+
+/*ARGSUSED*/
+long
+lx_mknod(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *path = (char *)p1;
+ lx_dev_t lx_dev = (lx_dev_t)p3;
+ struct sockaddr_un sockaddr;
+ struct stat statbuf;
+ mode_t mode, type;
+ dev_t dev;
+ int fd;
+
+ type = ((mode_t)p2 & LX_S_IFMT);
+ mode = ((mode_t)p2 & 07777);
+
+ switch (type) {
+ case 0:
+ case LX_S_IFREG:
+ /* create a regular file */
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if ((fd = creat(path, mode)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFSOCK:
+ /*
+ * Create a UNIX domain socket.
+ *
+ * Most programmers aren't even aware you can do this.
+ *
+ * Note you can also do this via Solaris' mknod(2), but
+ * Linux allows anyone who can create a UNIX domain
+ * socket via bind(2) to create one via mknod(2);
+ * Solaris requires the caller to be privileged.
+ */
+ if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-errno);
+
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if (uucopy(path, &sockaddr.sun_path,
+ sizeof (sockaddr.sun_path)) < 0)
+ return (-errno);
+
+ /* assure NULL termination of sockaddr.sun_path */
+ sockaddr.sun_path[sizeof (sockaddr.sun_path) - 1] = '\0';
+ sockaddr.sun_family = AF_UNIX;
+
+ if (bind(fd, (struct sockaddr *)&sockaddr,
+ strlen(sockaddr.sun_path) +
+ sizeof (sockaddr.sun_family)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFIFO:
+ dev = 0;
+ break;
+
+ case LX_S_IFCHR:
+ case LX_S_IFBLK:
+ /*
+ * The "dev" RPM package wants to create all possible Linux
+ * device nodes, so just report its mknod()s as having
+ * succeeded if we're in install mode.
+ */
+ if (lx_install != 0) {
+ lx_debug("lx_mknod: install mode spoofed creation of "
+ "Linux device [%lld, %lld]\n",
+ LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+
+ return (0);
+ }
+
+ dev = makedevice(LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ return (mknod(path, mode | type, dev) ? -errno : 0);
+}
+
+long
+lx_sethostname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+
+ return (sethostname(name, len) ? -errno : 0);
+}
+
+long
+lx_setdomainname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+ long rval;
+
+ if (len < 0 || len >= LX_SYS_UTS_LN)
+ return (-EINVAL);
+
+ rval = sysinfo(SI_SET_SRPC_DOMAIN, name, len);
+
+ return ((rval < 0) ? -errno : 0);
+}
+
+long
+lx_getpid(void)
+{
+ int pid;
+
+ /* First call the thunk server hook. */
+ if (lxt_server_pid(&pid) != 0)
+ return (pid);
+
+ pid = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_getpid);
+ return ((pid == -1) ? -errno : pid);
+}
+
+long
+lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ char **argv = (char **)p2;
+ char **envp = (char **)p3;
+ char *nullist[] = { NULL };
+ char path[64];
+
+ /* First call the thunk server hook. */
+ lxt_server_exec_check();
+
+ /* Get a copy of the executable we're trying to run */
+ path[0] = '\0';
+ (void) uucopystr(filename, path, sizeof (path));
+
+ /* Check if we're trying to run a native binary */
+ if (strncmp(path, "/native/usr/lib/brand/lx/lx_native",
+ sizeof (path)) == 0) {
+ /* Skip the first element in the argv array */
+ argv++;
+
+ /*
+ * The name of the new program to execute was the first
+ * parameter passed to lx_native.
+ */
+ if (uucopy(argv, &filename, sizeof (char *)) != 0)
+ return (-errno);
+
+ (void) syscall(SYS_brand, B_EXEC_NATIVE, filename, argv, envp,
+ NULL, NULL, NULL);
+ return (-errno);
+ }
+
+ if (argv == NULL)
+ argv = nullist;
+
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXEC);
+
+ /*
+ * Emulate PR_SET_KEEPCAPS which is reset on execve. If this is not done
+ * the emulated capabilities could be reduced more than expected.
+ */
+ (void) setpflags(PRIV_AWARE_RESET, 1);
+
+ /* This is a normal exec call. */
+ (void) execve(filename, argv, envp);
+
+ return (-errno);
+}
+
+long
+lx_setgroups(uintptr_t p1, uintptr_t p2)
+{
+ int ng = (int)p1;
+ gid_t *glist = NULL;
+ int i, r;
+
+ lx_debug("\tlx_setgroups(%d, 0x%p", ng, p2);
+
+ if (ng > 0) {
+ if ((glist = (gid_t *)SAFE_ALLOCA(ng * sizeof (gid_t))) == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)p2, glist, ng * sizeof (gid_t)) != 0)
+ return (-errno);
+
+ /*
+ * Linux doesn't check the validity of the group IDs, but
+ * Solaris does. Change any invalid group IDs to a known, valid
+ * value (yuck).
+ */
+ for (i = 0; i < ng; i++) {
+ if (glist[i] > MAXUID)
+ glist[i] = MAXUID;
+ }
+ }
+
+ /* order matters here to get the correct errno back */
+ if (ng > NGROUPS_MAX_DEFAULT)
+ return (-EINVAL);
+
+ r = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_setgroups,
+ ng, glist);
+
+ return ((r == -1) ? -errno : r);
+}
+
+/*
+ * Linux currently defines 42 options for prctl (PR_CAPBSET_READ,
+ * PR_CAPBSET_DROP, etc.). Most of these are not emulated.
+ */
+long
+lx_prctl(int option, uintptr_t arg2, uintptr_t arg3,
+ uintptr_t arg4, uintptr_t arg5)
+{
+ psinfo_t psinfo;
+ size_t fnamelen = sizeof (psinfo.pr_fname);
+ size_t psargslen = sizeof (psinfo.pr_psargs);
+ int fd;
+
+ if (option == LX_PR_SET_KEEPCAPS) {
+ /*
+ * The closest illumos analog to SET_KEEPCAPS is the PRIV_AWARE
+ * flag. There are probably some cases where it's not exactly
+ * the same, but this will do for a first try.
+ */
+ if (arg2 == 0) {
+ if (setpflags(PRIV_AWARE_RESET, 1) != 0)
+ return (-errno);
+ } else {
+ if (setpflags(PRIV_AWARE, 1) != 0)
+ return (-errno);
+ }
+ return (0);
+ }
+
+ if (option != LX_PR_SET_NAME) {
+ lx_unsupported("prctl option %d", option);
+ return (-ENOSYS);
+ }
+
+ /*
+ * In Linux, PR_SET_NAME sets the name of the thread, not the process.
+ * Due to the historical quirks of Linux's asinine thread model, this
+ * name is effectively the name of the process (as visible via ps(1))
+ * if the thread is the first of its task group. The first thread is
+ * therefore special, and to best mimic Linux semantics (and absent a
+ * notion of per-LWP names), we do nothing (but return success) on LWPs
+ * other than LWP 1.
+ */
+ if (thr_self() != 1)
+ return (0);
+
+ if (uucopy((void *)arg2, psinfo.pr_fname,
+ MIN(LX_PR_SET_NAME_NAMELEN, fnamelen)) != 0)
+ return (-errno);
+
+ psinfo.pr_fname[fnamelen - 1] = '\0';
+
+ if (uucopy((void *)arg2, psinfo.pr_psargs,
+ MIN(LX_PR_SET_NAME_NAMELEN, psargslen)) != 0)
+ return (-errno);
+
+ psinfo.pr_psargs[psargslen - 1] = '\0';
+
+ if ((fd = open("/native/proc/self/psinfo", O_WRONLY)) < 0)
+ return (-errno);
+
+ if (pwrite(fd, psinfo.pr_fname, fnamelen,
+ (uintptr_t)psinfo.pr_fname - (uintptr_t)&psinfo) != fnamelen) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ if (pwrite(fd, psinfo.pr_psargs, psargslen,
+ (uintptr_t)psinfo.pr_psargs - (uintptr_t)&psinfo) != psargslen) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+/*
+ * For syslog(), as there is no kernel and nothing to log, we simply emulate a
+ * kernel cyclic buffer (LOG_BUF_LEN) of 0 bytes, only handling errors for bad
+ * input. All actions except 3 and 10 require CAP_SYS_ADMIN or CAP_SYSLOG, in
+ * lieu of full capabilities support for now we just perform an euid check.
+ */
+long
+lx_syslog(int type, char *bufp, int len)
+{
+ if (type < 0 || type > 10)
+ return (-EINVAL);
+
+ if ((type != 3 && type != 10) && (geteuid() != 0))
+ return (-EPERM);
+
+ if ((type >= 2 && type <= 4) && (bufp == NULL || len < 0))
+ return (-EINVAL);
+
+ if ((type == 8) && (len < 1 || len > 8))
+ return (-EINVAL);
+
+ return (0);
+}
+
+long
+lx_sysinfo32(uintptr_t arg)
+{
+ struct lx_sysinfo32 *sip = (struct lx_sysinfo32 *)arg;
+ struct lx_sysinfo32 si;
+ struct lx_sysinfo sil;
+ int i;
+
+ if (syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_sysinfo, &sil) != 0)
+ return (-errno);
+
+ si.si_uptime = sil.si_uptime;
+
+ for (i = 0; i < 3; i++) {
+ if ((sil.si_loads[i]) > 0x7fffffff)
+ si.si_loads[i] = 0x7fffffff;
+ else
+ si.si_loads[i] = sil.si_loads[i];
+ }
+
+ si.si_procs = sil.si_procs;
+ si.si_totalram = sil.si_totalram;
+ si.si_freeram = sil.si_freeram;
+ si.si_totalswap = sil.si_totalswap;
+ si.si_freeswap = sil.si_freeswap;
+ si.si_mem_unit = sil.si_mem_unit;
+
+ si.si_bufferram = sil.si_bufferram;
+ si.si_sharedram = sil.si_sharedram;
+
+ si.si_totalhigh = sil.si_totalhigh;
+ si.si_freehigh = sil.si_freehigh;
+
+ if (uucopy(&si, sip, sizeof (si)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/*
+ * The following are pass-through functions but we need to return the correct
+ * long so that the errno propagates back to the Linux code correctly.
+ */
+
+long
+lx_alarm(unsigned int seconds)
+{
+ int r;
+
+ r = alarm(seconds);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_close(int fildes)
+{
+ int r;
+
+ r = close(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_chdir(const char *path)
+{
+ int r;
+
+ r = chdir(path);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_chroot(const char *path)
+{
+ int r;
+
+ r = chroot(path);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_creat(const char *path, mode_t mode)
+{
+ int r;
+
+ r = creat(path, mode);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_dup(int fildes)
+{
+ int r;
+
+ r = dup(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_epoll_pwait(int epfd, void *events, int maxevents, int timeout,
+ const sigset_t *sigmask)
+{
+ int r;
+
+ r = epoll_pwait(epfd, events, maxevents, timeout, sigmask);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_epoll_create(int size)
+{
+ int r;
+
+ r = epoll_create(size);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_epoll_create1(int flags)
+{
+ int r;
+
+ r = epoll_create1(flags);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_epoll_wait(int epfd, void *events, int maxevents, int timeout)
+{
+ int r;
+
+ r = epoll_wait(epfd, events, maxevents, timeout);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_fchdir(int fildes)
+{
+ int r;
+
+ r = fchdir(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_fchmod(int fildes, mode_t mode)
+{
+ int r;
+
+ r = fchmod(fildes, mode);
+ return ((r == -1) ? -errno : r);
+}
+
+/*
+ * We support neither the second argument (NUMA node), nor the third (obsolete
+ * pre-2.6.24 caching functionality which was ultimately broken).
+ */
+long
+lx_getcpu(unsigned int *cpu, uintptr_t p2, uintptr_t p3)
+{
+ psinfo_t psinfo;
+ int procfd;
+ unsigned int curcpu;
+
+ if ((procfd = open("/native/proc/self/psinfo", O_RDONLY)) == -1)
+ return (-errno);
+
+ if (read(procfd, &psinfo, sizeof (psinfo_t)) == -1)
+ return (-errno);
+
+ curcpu = psinfo.pr_lwp.pr_onpro;
+
+ return ((uucopy(&curcpu, cpu, sizeof (curcpu)) != 0) ? -errno : 0);
+}
+
+long
+lx_getgid(void)
+{
+ int r;
+
+ r = getgid();
+ return (r);
+}
+
+long
+lx_getgroups(int gidsetsize, gid_t *grouplist)
+{
+ int r;
+
+ r = getgroups(gidsetsize, grouplist);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_getitimer(int which, struct itimerval *value)
+{
+ int r;
+
+ r = getitimer(which, value);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_getuid(void)
+{
+ int r;
+
+ r = getuid();
+ return (r);
+}
+
+long
+lx_inotify_add_watch(int fd, const char *pathname, uint32_t mask)
+{
+ int r;
+
+ r = inotify_add_watch(fd, pathname, mask);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_init(void)
+{
+ int r;
+
+ r = inotify_init();
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_init1(int flags)
+{
+ int r;
+
+ r = inotify_init1(flags);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_rm_watch(int fd, int wd)
+{
+ int r;
+
+ r = inotify_rm_watch(fd, wd);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_lchown(const char *path, uid_t owner, gid_t group)
+{
+ int r;
+
+ r = lchown(path, owner, group);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_mincore(caddr_t addr, size_t len, char *vec)
+{
+ int r;
+
+ r = mincore(addr, len, vec);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_mkdir(const char *path, mode_t mode)
+{
+ int r;
+
+ r = mkdir(path, mode);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_munmap(void *addr, size_t len)
+{
+ int r;
+
+ r = munmap(addr, len);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_nice(int incr)
+{
+ int r;
+
+ r = nice(incr);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
+{
+ int r;
+
+ r = nanosleep(rqtp, rmtp);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_pause(void)
+{
+ int r;
+
+ r = pause();
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setgid(gid_t gid)
+{
+ int r;
+
+ r = setgid(gid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setuid(uid_t uid)
+{
+ int r;
+
+ r = setuid(uid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setregid(gid_t rgid, gid_t egid)
+{
+ int r;
+
+ r = setregid(rgid, egid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setreuid(uid_t ruid, uid_t euid)
+{
+ int r;
+
+ r = setreuid(ruid, euid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_shmdt(char *shmaddr)
+{
+ int r;
+
+ r = shmdt(shmaddr);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_stime(const time_t *tp)
+{
+ int r;
+
+ r = stime(tp);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_symlink(const char *name1, const char *name2)
+{
+ int r;
+
+ r = symlink(name1, name2);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_umask(mode_t cmask)
+{
+ int r;
+
+ r = umask(cmask);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_utimes(const char *path, const struct timeval times[2])
+{
+ int r;
+
+ r = utimes(path, times);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_write(int fildes, const void *buf, size_t nbyte)
+{
+ int r;
+
+ r = write(fildes, buf, nbyte);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_yield(void)
+{
+
+ yield();
+ return (0);
+}
+
+long
+lx_vhangup(void)
+{
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ vhangup();
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/module.c b/usr/src/lib/brand/lx/lx_brand/common/module.c
new file mode 100644
index 0000000000..78a593712f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/module.c
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * We don't support Linux modules, but we have to emulate enough of the system
+ * calls to show that we don't have any modules installed.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * For query_module(), we provide an empty list of modules, and return ENOENT
+ * on any request for a specific module.
+ */
+#define LX_QM_MODULES 1
+#define LX_QM_DEPS 2
+#define LX_QM_REFS 3
+#define LX_QM_SYMBOLS 4
+#define LX_QM_INFO 5
+
+/*ARGSUSED*/
+long
+lx_query_module(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /*
+ * parameter p1 is the 'name' argument.
+ */
+ int which = (int)p2;
+ char *buf = (char *)p3;
+ size_t bufsize = (size_t)p4;
+ size_t *ret = (size_t *)p5;
+
+ switch (which) {
+ case 0:
+ /*
+ * Special case: always return 0
+ */
+ return (0);
+
+ case LX_QM_MODULES:
+ /*
+ * Generate an empty list of modules.
+ */
+ if (bufsize && buf)
+ buf[0] = '\0';
+ if (ret)
+ *ret = 0;
+ return (0);
+
+ case LX_QM_DEPS:
+ case LX_QM_REFS:
+ case LX_QM_SYMBOLS:
+ case LX_QM_INFO:
+ /*
+ * Any requests for specific module information return ENOENT.
+ */
+ return (-ENOENT);
+
+ default:
+ return (-EINVAL);
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount.c b/usr/src/lib/brand/lx/lx_brand/common/mount.c
new file mode 100644
index 0000000000..34f8b859f1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mount.c
@@ -0,0 +1,748 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <strings.h>
+#include <nfs/mount.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/lx_autofs.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_mount.h>
+
+/*
+ * support definitions
+ */
+union fh_buffer {
+ struct nfs_fid fh2;
+ struct nfs_fh3 fh3;
+ char fh_data[NFS3_FHSIZE + 2];
+};
+
+typedef enum mount_opt_type {
+ MOUNT_OPT_INVALID = 0,
+ MOUNT_OPT_NORMAL = 1, /* option value: none */
+ MOUNT_OPT_UINT = 2 /* option value: unsigned int */
+} mount_opt_type_t;
+
+typedef struct mount_opt {
+ char *mo_name;
+ mount_opt_type_t mo_type;
+} mount_opt_t;
+
+
+/*
+ * Globals
+ */
+mount_opt_t lofs_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_proc_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_tmpfs_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_autofs_options[] = {
+ { LX_MNTOPT_FD, MOUNT_OPT_UINT },
+ { LX_MNTOPT_PGRP, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MINPROTO, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MAXPROTO, MOUNT_OPT_UINT },
+};
+
+
+/*
+ * i_lx_opt_verify() - Check the mount options.
+ *
+ * You might wonder why we're being so strict about the mount options
+ * we allow. The reason is that normally all mount option verification
+ * is done by the Solaris userland mount command. Once mount options
+ * are passed to the kernel, invalid options are simply ignored. So
+ * if we actually want to catch requests for functionality that we
+ * don't support, or if we want to make sure that we don't randomly
+ * enable options that we haven't check to make sure they have the
+ * same syntax on Linux and Solaris, we need to reject any options
+ * we don't know to be ok here.
+ */
+static int
+i_lx_opt_verify(char *opts, mount_opt_t *mop)
+{
+ int opts_len = strlen(opts);
+ char *opts_tmp, *opt;
+ int opt_len, i;
+
+ assert((opts != NULL) && (mop != NULL));
+
+ /* If no options were specified, there's no problem. */
+ if (opts_len == 0)
+ return (1);
+
+ /* If no options are allowed, fail. */
+ if (mop[0].mo_name == NULL)
+ return (0);
+
+ /* Don't accept leading or trailing ','. */
+ if ((opts[0] == ',') || (opts[opts_len] == ','))
+ return (0);
+
+ /* Don't accept sequential ','. */
+ for (i = 1; i < opts_len; i++)
+ if ((opts[i - 1] == ',') && (opts[i] == ','))
+ return (0);
+
+ /*
+ * We're going to use strtok() which modifies the target
+ * string so make a temporary copy.
+ */
+ opts_tmp = SAFE_ALLOCA(opts_len);
+ if (opts_tmp == NULL)
+ return (-1);
+ bcopy(opts, opts_tmp, opts_len + 1);
+
+ /* Verify each prop one at a time. */
+ opt = strtok(opts_tmp, ",");
+ opt_len = strlen(opt);
+ for (;;) {
+
+ /* Check for matching option/value pair. */
+ for (i = 0; mop[i].mo_name != NULL; i++) {
+ char *ovalue;
+ int ovalue_len, mo_len;
+
+ /* If the options is too short don't bother comparing */
+ mo_len = strlen(mop[i].mo_name);
+ if (opt_len < mo_len) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* Compare the option to an allowed option. */
+ if (strncmp(mop[i].mo_name, opt, mo_len) != 0) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ if (mop[i].mo_type == MOUNT_OPT_NORMAL) {
+ /* The option doesn't take a value. */
+ if (opt_len == mo_len) {
+ /* This option is ok. */
+ break;
+ } else {
+ /* Keep trying to find a match. */
+ continue;
+ }
+ }
+
+ /* This options takes a value. */
+ if ((opt_len == mo_len) || (opt[mo_len] != '=')) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* We have an option match. Verify option value. */
+ ovalue = &opt[mo_len] + 1;
+ ovalue_len = strlen(ovalue);
+
+ /* Value can't be zero length string. */
+ if (ovalue_len == 0)
+ return (0);
+
+ if (mop[i].mo_type == MOUNT_OPT_UINT) {
+ int j;
+ /* Verify that value is an unsigned int. */
+ for (j = 0; j < ovalue_len; j++)
+ if (!isdigit(ovalue[j]))
+ return (0);
+ } else {
+ /* Unknown option type specified. */
+ assert(0);
+ }
+
+ /* The option is ok. */
+ break;
+ }
+
+ /* If there were no matches this is an unsupported option. */
+ if (mop[i].mo_name == NULL)
+ return (0);
+
+ /* This option is ok, move onto the next option. */
+ if ((opt = strtok(NULL, ",")) == NULL)
+ break;
+ opt_len = strlen(opt);
+ };
+
+ /* We verified all the options. */
+ return (1);
+}
+
+static int
+i_add_option(char *option, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s";
+ } else {
+ fmt_str = ",%s";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_add_option_int(char *option, int val, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s=%d";
+ } else {
+ fmt_str = ",%s=%d";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option, val) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_make_nfs_args(lx_nfs_mount_data_t *lx_nmd, struct nfs_args *nfs_args,
+ struct netbuf *nfs_args_addr, struct knetconfig *nfs_args_knconf,
+ union fh_buffer *nfs_args_fh, struct sec_data *nfs_args_secdata,
+ char *fstype, char *options, int options_size)
+{
+ struct stat statbuf;
+ int i, rv, use_tcp;
+
+ /* Sanity check the incomming Linux request. */
+ if ((lx_nmd->nmd_rsize < 0) || (lx_nmd->nmd_wsize < 0) ||
+ (lx_nmd->nmd_timeo < 0) || (lx_nmd->nmd_retrans < 0) ||
+ (lx_nmd->nmd_acregmin < 0) || (lx_nmd->nmd_acregmax < 0) ||
+ (lx_nmd->nmd_acdirmax < 0)) {
+ return (-EINVAL);
+ }
+
+ /*
+ * Additional sanity checks of incomming request.
+ *
+ * Some of the sanity checks below should probably return
+ * EINVAL (or some other error code) instead or ENOTSUP,
+ * but without experiminting on Linux to see how it
+ * deals with certain strange values there is no way
+ * to really know what we should return, hence we return
+ * ENOTSUP to tell us that eventually if we see some
+ * application hitting the problem we can go to a real
+ * Linux system, figure out how it deals with the situation
+ * and update our code to handle it in the same fashion.
+ */
+ if (lx_nmd->nmd_version != 4) {
+ lx_unsupported("unsupported nfs mount request, "
+ "unrecognized NFS mount structure: %d\n",
+ lx_nmd->nmd_version);
+ return (-ENOTSUP);
+ }
+ if ((lx_nmd->nmd_flags & ~LX_NFS_MOUNT_SUPPORTED) != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "flags: 0x%x\n", lx_nmd->nmd_flags);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_addr.sin_family != AF_INET) {
+ lx_unsupported("unsupported nfs mount request, "
+ "transport address family: 0x%x\n",
+ lx_nmd->nmd_addr.sin_family);
+ return (-ENOTSUP);
+ }
+ for (i = 0; i < LX_NMD_MAXHOSTNAMELEN; i++) {
+ if (lx_nmd->nmd_hostname[i] == '\0')
+ break;
+ }
+ if (i == 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "no hostname specified\n");
+ return (-ENOTSUP);
+ }
+ if (i == LX_NMD_MAXHOSTNAMELEN) {
+ lx_unsupported("unsupported nfs mount request, "
+ "hostname not terminated\n");
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_namlen < i) {
+ lx_unsupported("unsupported nfs mount request, "
+ "invalid namlen value: 0x%x\n", lx_nmd->nmd_namlen);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_bsize != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "bsize value: 0x%x\n", lx_nmd->nmd_bsize);
+ return (-ENOTSUP);
+ }
+
+ /* Initialize and clear the output structure pointers passed in. */
+ bzero(nfs_args, sizeof (*nfs_args));
+ bzero(nfs_args_addr, sizeof (*nfs_args_addr));
+ bzero(nfs_args_knconf, sizeof (*nfs_args_knconf));
+ bzero(nfs_args_fh, sizeof (*nfs_args_fh));
+ bzero(nfs_args_secdata, sizeof (*nfs_args_secdata));
+ nfs_args->addr = nfs_args_addr;
+ nfs_args->knconf = nfs_args_knconf;
+ nfs_args->fh = (caddr_t)nfs_args_fh;
+ nfs_args->nfs_ext_u.nfs_extB.secdata = nfs_args_secdata;
+
+ /* Check if we're using tcp. */
+ use_tcp = (lx_nmd->nmd_flags & LX_NFS_MOUNT_TCP) ? 1 : 0;
+
+ /*
+ * These seem to be the default flags used by Solaris for v2 and v3
+ * nfs mounts.
+ *
+ * Don't bother with NFSMNT_TRYRDMA since we always specify a
+ * transport (either udp or tcp).
+ */
+ nfs_args->flags = NFSMNT_NEWARGS | NFSMNT_KNCONF | NFSMNT_INT |
+ NFSMNT_HOSTNAME;
+
+ /* Translate some Linux mount flags into Solaris mount flags. */
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_SOFT)
+ nfs_args->flags |= NFSMNT_SOFT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_INTR)
+ nfs_args->flags |= NFSMNT_INT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_POSIX)
+ nfs_args->flags |= NFSMNT_POSIX;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOCTO)
+ nfs_args->flags |= NFSMNT_NOCTO;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOAC)
+ nfs_args->flags |= NFSMNT_NOAC;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NONLM)
+ nfs_args->flags |= NFSMNT_LLOCK;
+
+ if ((lx_nmd->nmd_flags & LX_NFS_MOUNT_VER3) != 0) {
+ (void) strcpy(fstype, "nfs3");
+ if ((rv = i_add_option_int("vers", 3,
+ options, options_size)) != 0)
+ return (rv);
+
+ if (lx_nmd->nmd_root.lx_fh3_length >
+ sizeof (nfs_args_fh->fh3.fh3_u.data)) {
+ lx_unsupported("unsupported nfs mount request, "
+ "nfs file handle length: 0x%x\n",
+ lx_nmd->nmd_root.lx_fh3_length);
+ return (-ENOTSUP);
+ }
+
+ /* Set the v3 file handle info. */
+ nfs_args_fh->fh3.fh3_length = lx_nmd->nmd_root.lx_fh3_length;
+ bcopy(&lx_nmd->nmd_root.lx_fh3_data,
+ nfs_args_fh->fh3.fh3_u.data,
+ lx_nmd->nmd_root.lx_fh3_length);
+ } else {
+ /*
+ * Assume nfs v2. Note that this could also be a v1
+ * mount request but there doesn't seem to be any difference
+ * in the parameters passed to the Linux mount system
+ * call for v1 or v2 mounts so there is no way of really
+ * knowing.
+ */
+ (void) strcpy(fstype, "nfs");
+ if ((rv = i_add_option_int("vers", 2,
+ options, options_size)) != 0)
+ return (rv);
+
+ /* Solaris seems to add this flag when using v2. */
+ nfs_args->flags |= NFSMNT_SECDEFAULT;
+
+ /* Set the v2 file handle info. */
+ bcopy(&lx_nmd->nmd_old_root,
+ nfs_args_fh, sizeof (nfs_args_fh->fh2));
+ }
+
+ /*
+ * We can't use getnetconfig() here because there is no netconfig
+ * database in linux.
+ */
+ nfs_args_knconf->knc_protofmly = "inet";
+ if (use_tcp) {
+ /*
+ * TCP uses NC_TPI_COTS_ORD semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_COTS_ORD;
+ nfs_args_knconf->knc_proto = "tcp";
+ if ((rv = i_add_option("proto=tcp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/tcp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ } else {
+ /*
+ * Assume UDP. UDP uses NC_TPI_CLTS semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_CLTS;
+ nfs_args_knconf->knc_proto = "udp";
+ if ((rv = i_add_option("proto=udp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/udp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ }
+
+ /* Set the server address. */
+ nfs_args_addr->maxlen = nfs_args_addr->len =
+ sizeof (struct sockaddr_in);
+ nfs_args_addr->buf = (char *)&lx_nmd->nmd_addr;
+
+ /* Set the server hostname string. */
+ nfs_args->hostname = lx_nmd->nmd_hostname;
+
+ /* Translate Linux nfs mount parameters into Solaris mount options. */
+ if (lx_nmd->nmd_rsize != LX_NMD_DEFAULT_RSIZE) {
+ if ((rv = i_add_option_int("rsize", lx_nmd->nmd_rsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->rsize = lx_nmd->nmd_rsize;
+ nfs_args->flags |= NFSMNT_RSIZE;
+ }
+ if (lx_nmd->nmd_wsize != LX_NMD_DEFAULT_WSIZE) {
+ if ((rv = i_add_option_int("wsize", lx_nmd->nmd_wsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->wsize = lx_nmd->nmd_wsize;
+ nfs_args->flags |= NFSMNT_WSIZE;
+ }
+ if ((rv = i_add_option_int("timeo", lx_nmd->nmd_timeo,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->timeo = lx_nmd->nmd_timeo;
+ nfs_args->flags |= NFSMNT_TIMEO;
+ if ((rv = i_add_option_int("retrans", lx_nmd->nmd_retrans,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->retrans = lx_nmd->nmd_retrans;
+ nfs_args->flags |= NFSMNT_RETRANS;
+ if ((rv = i_add_option_int("acregmin", lx_nmd->nmd_acregmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmin = lx_nmd->nmd_acregmin;
+ nfs_args->flags |= NFSMNT_ACREGMIN;
+ if ((rv = i_add_option_int("acregmax", lx_nmd->nmd_acregmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmax = lx_nmd->nmd_acregmax;
+ nfs_args->flags |= NFSMNT_ACREGMAX;
+ if ((rv = i_add_option_int("acdirmin", lx_nmd->nmd_acdirmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmin = lx_nmd->nmd_acdirmin;
+ nfs_args->flags |= NFSMNT_ACDIRMIN;
+ if ((rv = i_add_option_int("acdirmax", lx_nmd->nmd_acdirmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmax = lx_nmd->nmd_acdirmax;
+ nfs_args->flags |= NFSMNT_ACDIRMAX;
+
+ /* We only support nfs with a security type of AUTH_SYS. */
+ nfs_args->nfs_args_ext = NFS_ARGS_EXTB;
+ nfs_args_secdata->secmod = AUTH_SYS;
+ nfs_args_secdata->rpcflavor = AUTH_SYS;
+ nfs_args_secdata->flags = 0;
+ nfs_args_secdata->uid = 0;
+ nfs_args_secdata->data = NULL;
+ nfs_args->nfs_ext_u.nfs_extB.next = NULL;
+
+ /*
+ * The Linux nfs mount command seems to pass an open socket fd
+ * to the kernel during the mount system call. We don't need
+ * this fd on Solaris so just close it.
+ */
+ (void) close(lx_nmd->nmd_fd);
+
+ return (0);
+}
+
+long
+lx_mount(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /* Linux input arguments. */
+ const char *sourcep = (const char *)p1;
+ const char *targetp = (const char *)p2;
+ const char *fstypep = (const char *)p3;
+ unsigned int flags = (unsigned int)p4;
+ const void *datap = (const void *)p5;
+
+ /* Variables needed for all mounts. */
+ char source[MAXPATHLEN], target[MAXPATHLEN];
+ char fstype[MAXPATHLEN], options[MAXPATHLEN];
+ int sflags, rv;
+
+ /* Variables needed for nfs mounts. */
+ lx_nfs_mount_data_t lx_nmd;
+ struct nfs_args nfs_args;
+ struct netbuf nfs_args_addr;
+ struct knetconfig nfs_args_knconf;
+ union fh_buffer nfs_args_fh;
+ struct sec_data nfs_args_secdata;
+ char *sdataptr = NULL;
+ int sdatalen = 0;
+
+ /* Initialize Solaris mount arguments. */
+ sflags = MS_OPTIONSTR;
+ options[0] = '\0';
+ sdatalen = 0;
+
+ /* Copy in parameters that are always present. */
+ rv = uucopystr((void *)sourcep, &source, sizeof (source));
+ if ((rv == -1) || (rv == sizeof (source)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)targetp, &target, sizeof (target));
+ if ((rv == -1) || (rv == sizeof (target)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)fstypep, &fstype, sizeof (fstype));
+ if ((rv == -1) || (rv == sizeof (fstype)))
+ return (-EFAULT);
+
+ lx_debug("\tlinux mount source: %s", source);
+ lx_debug("\tlinux mount target: %s", target);
+ lx_debug("\tlinux mount fstype: %s", fstype);
+
+ /* Make sure we support the requested mount flags. */
+ if ((flags & ~LX_MS_SUPPORTED) != 0) {
+ lx_unsupported("unsupported mount flags: 0x%x", flags);
+ return (-ENOTSUP);
+ }
+
+ /* Do filesystem specific mount work. */
+ if (flags & LX_MS_BIND) {
+
+ /* If MS_BIND is set, we turn this into a lofs mount. */
+ (void) strcpy(fstype, "lofs");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lofs_options) == 0) {
+ lx_unsupported("unsupported lofs mount options: %s",
+ options);
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "tmpfs") == 0) {
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_tmpfs_options) == 0) {
+ lx_unsupported("unsupported tmpfs mount options: %s",
+ options);
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "proc") == 0) {
+ struct stat64 sb;
+
+ /* Translate proc mount requests to lx_proc requests. */
+ (void) strcpy(fstype, "lx_proc");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_proc_options) == 0) {
+ lx_unsupported("unsupported proc mount options: %s",
+ options);
+ return (-ENOTSUP);
+ }
+
+ /* If mounting proc over itself, just return ok */
+ if (stat64(target, &sb) == 0 &&
+ strcmp(sb.st_fstype, "lx_proc") == 0) {
+ return (0);
+ }
+ } else if (strcmp(fstype, "autofs") == 0) {
+
+ /* Translate autofs mount requests to lx_afs requests. */
+ (void) strcpy(fstype, LX_AUTOFS_NAME);
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_autofs_options) == 0) {
+ lx_unsupported("unsupported autofs mount options: %s",
+ options);
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "nfs") == 0) {
+
+ /*
+ * Copy in Linux mount options. Note that for Linux
+ * nfs mounts the mount options pointer (which normally
+ * points to a string) points to a structure.
+ */
+ if (uucopy((void *)datap, &lx_nmd, sizeof (lx_nmd)) < 0)
+ return (-errno);
+
+ /*
+ * For Solaris nfs mounts, the kernel expects a special
+ * strucutre, but a pointer to this structure is passed
+ * in via an extra parameter (sdataptr below.)
+ */
+ if ((rv = i_make_nfs_args(&lx_nmd, &nfs_args,
+ &nfs_args_addr, &nfs_args_knconf, &nfs_args_fh,
+ &nfs_args_secdata, fstype,
+ options, sizeof (options))) != 0)
+ return (rv);
+
+ /*
+ * For nfs mounts we need to tell the mount system call
+ * to expect extra parameters.
+ */
+ sflags |= MS_DATA;
+ sdataptr = (char *)&nfs_args;
+ sdatalen = sizeof (nfs_args);
+ } else {
+ lx_unsupported("unsupported mount filesystem type: %s", fstype);
+ return (-ENOTSUP);
+ }
+
+ /* Convert some Linux flags to Solaris flags. */
+ if (flags & LX_MS_RDONLY)
+ sflags |= MS_RDONLY;
+ if (flags & LX_MS_NOSUID)
+ sflags |= MS_NOSUID;
+ if (flags & LX_MS_REMOUNT)
+ sflags |= MS_REMOUNT;
+
+ /* Convert some Linux flags to Solaris option strings. */
+ if ((flags & LX_MS_NODEV) &&
+ ((rv = i_add_option("nodev", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOEXEC) &&
+ ((rv = i_add_option("noexec", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOATIME) &&
+ ((rv = i_add_option("noatime", options, sizeof (options))) != 0))
+ return (rv);
+
+ lx_debug("\tsolaris mount fstype: %s", fstype);
+ lx_debug("\tsolaris mount options: \"%s\"", options);
+
+ return (mount(source, target, sflags, fstype, sdataptr, sdatalen,
+ options, sizeof (options)) ? -errno : 0);
+}
+
+/*
+ * umount() is identical, though it is implemented on top of umount2() in
+ * Solaris so it cannot be a pass-thru system call.
+ */
+long
+lx_umount(uintptr_t p1)
+{
+ return (umount((char *)p1) ? -errno : 0);
+}
+
+/*
+ * The Linux umount2() system call is identical but has a different value for
+ * MNT_FORCE (the logical equivalent to MS_FORCE).
+ */
+#define LX_MNT_FORCE 0x1
+
+long
+lx_umount2(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ int flags = 0;
+
+ if (p2 & ~LX_MNT_FORCE)
+ return (-EINVAL);
+
+ if (p2 & LX_MNT_FORCE)
+ flags |= MS_FORCE;
+
+ return (umount2(path, flags) ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/open.c b/usr/src/lib/brand/lx/lx_brand/common/open.c
new file mode 100644
index 0000000000..0a643bf3ad
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/open.c
@@ -0,0 +1,204 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdio.h>
+
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_misc.h>
+
+static int
+ltos_open_flags(uintptr_t p2)
+{
+ int flags;
+
+ if ((p2 & O_ACCMODE) == LX_O_RDONLY)
+ flags = O_RDONLY;
+ else if ((p2 & O_ACCMODE) == LX_O_WRONLY)
+ flags = O_WRONLY;
+ else
+ flags = O_RDWR;
+
+ if (p2 & LX_O_CREAT) {
+ flags |= O_CREAT;
+ }
+
+ if (p2 & LX_O_EXCL)
+ flags |= O_EXCL;
+ if (p2 & LX_O_NOCTTY)
+ flags |= O_NOCTTY;
+ if (p2 & LX_O_TRUNC)
+ flags |= O_TRUNC;
+ if (p2 & LX_O_APPEND)
+ flags |= O_APPEND;
+ if (p2 & LX_O_NONBLOCK)
+ flags |= O_NONBLOCK;
+ if (p2 & LX_O_SYNC)
+ flags |= O_SYNC;
+ if (p2 & LX_O_LARGEFILE)
+ flags |= O_LARGEFILE;
+ if (p2 & LX_O_NOFOLLOW)
+ flags |= O_NOFOLLOW;
+ if (p2 & LX_O_CLOEXEC)
+ flags |= O_CLOEXEC;
+
+ /*
+ * Linux uses the LX_O_DIRECT flag to do raw, synchronous I/O to the
+ * device backing the fd in question. Solaris doesn't have similar
+ * functionality, but we can attempt to simulate it using the flags
+ * (O_RSYNC|O_SYNC) and directio(3C).
+ *
+ * The LX_O_DIRECT flag also requires that the transfer size and
+ * alignment of I/O buffers be a multiple of the logical block size for
+ * the underlying file system, but frankly there isn't an easy way to
+ * support that functionality without doing something like adding an
+ * fcntl(2) flag to denote LX_O_DIRECT mode.
+ *
+ * Since LX_O_DIRECT is merely a performance advisory, we'll just
+ * emulate what we can and trust that the only applications expecting
+ * an error when performing I/O from a misaligned buffer or when
+ * passing a transfer size is not a multiple of the underlying file
+ * system block size will be test suites.
+ */
+ if (p2 & LX_O_DIRECT)
+ flags |= (O_RSYNC|O_SYNC);
+
+ return (flags);
+}
+
+static int
+lx_open_postprocess(int fd, uintptr_t p2)
+{
+ struct stat64 statbuf;
+
+ /*
+ * Check the file type AFTER opening the file to avoid a race condition
+ * where the file we want to open could change types between a stat64()
+ * and an open().
+ */
+ if (p2 & LX_O_DIRECTORY) {
+ if (fstat64(fd, &statbuf) < 0) {
+ int ret = -errno;
+
+ (void) close(fd);
+ return (ret);
+ } else if (!S_ISDIR(statbuf.st_mode)) {
+ (void) close(fd);
+ return (-ENOTDIR);
+ }
+ }
+
+ if (p2 & LX_O_DIRECT)
+ (void) directio(fd, DIRECTIO_ON);
+
+ /*
+ * Set the ASYNC flag if passsed.
+ */
+ if (p2 & LX_O_ASYNC) {
+ if (fcntl(fd, F_SETFL, FASYNC) < 0) {
+ int ret = -errno;
+
+ (void) close(fd);
+ return (ret);
+ }
+ }
+
+ return (fd);
+}
+
+long
+lx_openat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ int flags, fd;
+ mode_t mode = 0;
+ char *path = (char *)p1;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flags = ltos_open_flags(p2);
+
+ if (flags & O_CREAT) {
+ mode = (mode_t)p3;
+ }
+
+ lx_debug("\topenat(%d, %s, 0%o, 0%o)", atfd, path, flags, mode);
+
+ if ((fd = openat(atfd, path, flags, mode)) < 0)
+ return (-errno);
+
+ return (lx_open_postprocess(fd, p2));
+}
+
+long
+lx_open(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int flags, fd;
+ mode_t mode = 0;
+ char *path = (char *)p1;
+
+ /*
+ * We'll check the file type again after opening the file (see the
+ * explanation in lx_open_postprocess), but we also need to check BEFORE
+ * to avoid the very hang O_DIRECTORY is trying to avoid for opendir(3)
+ * when given a FIFO.
+ */
+ if (p2 & LX_O_DIRECTORY) {
+ struct stat64 statbuf;
+
+ if (lstat64(path, &statbuf) < 0) {
+ return (-errno);
+ }
+
+ if (!S_ISDIR(statbuf.st_mode)) {
+ return (-ENOTDIR);
+ }
+ }
+
+ flags = ltos_open_flags(p2);
+
+ if (flags & O_CREAT) {
+ mode = (mode_t)p3;
+ }
+
+ lx_debug("\topen(%s, 0%o, 0%o)", path, flags, mode);
+
+ if ((fd = open(path, flags, mode)) < 0)
+ return (-errno);
+
+ return (lx_open_postprocess(fd, p2));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/pgrp.c b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
new file mode 100644
index 0000000000..33b4bb7667
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+long
+lx_getpgrp(void)
+{
+ int ret;
+
+ ret = getpgrp();
+
+ /*
+ * If the pgrp is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_getpgid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied pid matches that of the init process, return
+ * the pgid Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getpgid(spid);
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_setpgid(uintptr_t p1, uintptr_t p2)
+{
+ pid_t pid = (pid_t)p1;
+ pid_t pgid = (pid_t)p2;
+ pid_t spid, spgid;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ if (pgid < 0)
+ return (-EINVAL);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ if (pgid == 0)
+ spgid = spid;
+ else if ((ret = lx_lpid_to_spid(pgid, &spgid)) < 0)
+ return (ret);
+
+ ret = setpgid(spid, spgid);
+
+ if (ret != 0 && errno == EPERM) {
+ /*
+ * On Linux, calling setpgid with a desired pgid that is equal
+ * to the current pgid of the process, no error is emitted.
+ * This differs slightly from illumos which will return EPERM.
+ *
+ * To emulate the Linux behavior, we check specifically for
+ * matching pgids if an EPERM is encountered.
+ */
+ if (spgid == getpgid(spid))
+ return (0);
+ else
+ return (-EPERM);
+ }
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+long
+lx_getsid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied matches that of the init process, return the value
+ * Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getsid(spid);
+
+ /*
+ * If the sid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_setsid(void)
+{
+ int ret;
+
+ ret = setsid();
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c
new file mode 100644
index 0000000000..a69c3bb56f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c
@@ -0,0 +1,269 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <alloca.h>
+#include <signal.h>
+#include <strings.h>
+#include <sys/param.h>
+#include <sys/brand.h>
+#include <sys/poll.h>
+#include <sys/syscall.h>
+#include <sys/epoll.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_poll.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+
+#if defined(_ILP32)
+extern int select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
+ struct timeval *tv);
+#endif
+
+long
+lx_select(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ int nfds = (int)p1;
+ fd_set *rfdsp = NULL;
+ fd_set *wfdsp = NULL;
+ fd_set *efdsp = NULL;
+ struct timeval tv, *tvp = NULL;
+ int fd_set_len = howmany(nfds, 8);
+ int r;
+ hrtime_t start = NULL, end;
+
+ lx_debug("\tselect(%d, 0x%p, 0x%p, 0x%p, 0x%p)", p1, p2, p3, p4, p5);
+
+ if (nfds > 0) {
+ if (p2 != NULL) {
+ rfdsp = SAFE_ALLOCA(fd_set_len);
+ if (rfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p3 != NULL) {
+ wfdsp = SAFE_ALLOCA(fd_set_len);
+ if (wfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p4 != NULL) {
+ efdsp = SAFE_ALLOCA(fd_set_len);
+ if (efdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ }
+ if (p5 != NULL) {
+ tvp = &tv;
+ if (uucopy((void *)p5, &tv, sizeof (tv)) != 0)
+ return (-errno);
+ start = gethrtime();
+ }
+
+#if defined(_LP64)
+ r = select(nfds, rfdsp, wfdsp, efdsp, tvp);
+#else
+ if (nfds >= FD_SETSIZE)
+ r = select_large_fdset(nfds, rfdsp, wfdsp, efdsp, tvp);
+ else
+ r = select(nfds, rfdsp, wfdsp, efdsp, tvp);
+#endif
+ if (r < 0)
+ return (-errno);
+
+ if (tvp != NULL) {
+ long long tv_total;
+
+ /*
+ * Linux updates the timeval parameter for select() calls
+ * with the amount of time that left before the select
+ * would have timed out.
+ */
+ end = gethrtime();
+ tv_total = (tv.tv_sec * MICROSEC) + tv.tv_usec;
+ tv_total -= ((end - start) / (NANOSEC / MICROSEC));
+ if (tv_total < 0) {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ } else {
+ tv.tv_sec = tv_total / MICROSEC;
+ tv.tv_usec = tv_total % MICROSEC;
+ }
+
+ if (uucopy(&tv, (void *)p5, sizeof (tv)) != 0)
+ return (-errno);
+ }
+
+ if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
+ return (-errno);
+ if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
+ return (-errno);
+ if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
+ return (-errno);
+
+ return (r);
+}
+
+long
+lx_poll(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct pollfd *lfds, *sfds;
+ nfds_t nfds = (nfds_t)p2;
+ int fds_size, i, rval, revents;
+
+ /*
+ * Deal with the NULL fds[] case.
+ */
+ if (nfds == 0 && p1 == NULL) {
+ if ((rval = poll(NULL, 0, (int)p3)) < 0)
+ return (-errno);
+
+ return (rval);
+ }
+
+ /*
+ * Note: we are assuming that the Linux and Illumos pollfd
+ * structures are identical. Copy in the linux poll structure.
+ */
+ fds_size = sizeof (struct pollfd) * nfds;
+ lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (lfds == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p1, lfds, fds_size) != 0)
+ return (-errno);
+
+ /*
+ * The poll system call modifies the poll structures passed in
+ * so we'll need to make an extra copy of them.
+ */
+ sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (sfds == NULL)
+ return (-ENOMEM);
+
+ /* Convert the Linux events bitmask into the Illumos equivalent. */
+ for (i = 0; i < nfds; i++) {
+ /*
+ * If the caller is polling for an unsupported event, we
+ * have to bail out.
+ */
+ if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
+ lx_unsupported("unsupported poll events requested: "
+ "events=0x%x", lfds[i].events);
+ return (-ENOTSUP);
+ }
+
+ sfds[i].fd = lfds[i].fd;
+ sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
+ if (lfds[i].events & LX_POLLWRNORM)
+ sfds[i].events |= POLLWRNORM;
+ if (lfds[i].events & LX_POLLWRBAND)
+ sfds[i].events |= POLLWRBAND;
+ if (lfds[i].events & LX_POLLRDHUP)
+ sfds[i].events |= POLLRDHUP;
+ sfds[i].revents = 0;
+ }
+
+ lx_debug("\tpoll(0x%p, %u, %d)", sfds, nfds, (int)p3);
+
+ if ((rval = poll(sfds, nfds, (int)p3)) < 0)
+ return (-errno);
+
+ /* Convert the Illumos revents bitmask into the Linux equivalent */
+ for (i = 0; i < nfds; i++) {
+ revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
+ if (sfds[i].revents & POLLWRBAND)
+ revents |= LX_POLLWRBAND;
+ if (sfds[i].revents & POLLRDHUP)
+ revents |= LX_POLLRDHUP;
+
+ /*
+ * Be careful because on Illumos POLLOUT and POLLWRNORM
+ * are defined to the same values but on Linux they
+ * are not.
+ */
+ if (sfds[i].revents & POLLOUT) {
+ if ((lfds[i].events & LX_POLLOUT) == 0)
+ revents &= ~LX_POLLOUT;
+ if (lfds[i].events & LX_POLLWRNORM)
+ revents |= LX_POLLWRNORM;
+ }
+
+ lfds[i].revents = revents;
+ }
+
+ /* Copy out the results */
+ if (uucopy(lfds, (void *)p1, fds_size) != 0)
+ return (-errno);
+
+ return (rval);
+}
+
+long
+lx_epoll_ctl(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int epfd = (int)p1;
+ int op = (int)p2;
+ int fd = (int)p3;
+ int rval;
+ struct epoll_event ev;
+
+ /*
+ * Linux limits the max number of open files to 1m so we can also test
+ * for this.
+ */
+ if (epfd < 0 || fd < 0 || epfd > (1024 * 1024) || fd > (1024 * 1024))
+ return (-EBADF);
+
+ if (epfd == fd)
+ return (-EINVAL);
+
+ /*
+ * Unlike the base epoll_ctl, we need to return a fault if the
+ * event pointer is invalid.
+ */
+ if (op != EPOLL_CTL_DEL) {
+ if (uucopy((void *)p4, &ev, sizeof (ev)) != 0)
+ return (-errno);
+ }
+
+ rval = epoll_ctl(epfd, op, fd, (struct epoll_event *)p4);
+ return ((rval < 0) ? -errno : rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/priority.c b/usr/src/lib/brand/lx/lx_brand/common/priority.c
new file mode 100644
index 0000000000..48b61d6bdc
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/priority.c
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/resource.h>
+#include <sys/lx_misc.h>
+
+long
+lx_getpriority(uintptr_t p1, uintptr_t p2)
+{
+ int which = (int)p1;
+ id_t who = (id_t)p2;
+ int ret;
+
+ /*
+ * The only valid values for 'which' are positive integers, and unlike
+ * Solaris, linux doesn't support anything past PRIO_USER.
+ */
+ if (which < 0 || which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tgetpriority(%d, %d)", which, who);
+
+ errno = 0;
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ ret = getpriority(which, who);
+ if (ret == -1 && errno != 0) {
+ /*
+ * Linux does not return EINVAL for invalid 'who' values, it
+ * returns ESRCH instead. We already validated 'which' above.
+ */
+ if (errno == EINVAL)
+ errno = ESRCH;
+ return (-errno);
+ }
+
+ /*
+ * The return value of the getpriority syscall is biased by 20 to avoid
+ * returning negative values when successful.
+ */
+ return (20 - ret);
+}
+
+long
+lx_setpriority(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int which = (int)p1;
+ id_t who = (id_t)p2;
+ int prio = (int)p3;
+ int rval;
+
+ if (which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tsetpriority(%d, %d, %d)", which, who, prio);
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ rval = setpriority(which, who, prio);
+
+ return ((rval == -1) ? -errno : rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
new file mode 100644
index 0000000000..e43a38973a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
@@ -0,0 +1,2357 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_thread.h>
+#include <sys/lwp.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/frame.h>
+#include <strings.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <sys/auxv.h>
+#include <thread.h>
+#include <pthread.h>
+#include <synch.h>
+#include <elf.h>
+#include <ieeefp.h>
+#include <assert.h>
+#include <libintl.h>
+
+/*
+ * Linux ptrace compatibility.
+ *
+ * The brand support for ptrace(2) is built on top of the Solaris /proc
+ * interfaces, mounted at /native/proc in the zone. This gets quite
+ * complicated due to the way ptrace works and the Solaris realization of the
+ * Linux threading model.
+ *
+ * ptrace can only interact with a process if we are tracing it, and it is
+ * currently stopped. There are two ways a process can begin tracing another
+ * process:
+ *
+ * PTRACE_TRACEME
+ *
+ * A child process can use PTRACE_TRACEME to indicate that it wants to be
+ * traced by the parent. This sets the ptrace compatibility flag in /proc
+ * which causes ths ptrace consumer to be notified through the wait(2)
+ * system call of events of interest. PTRACE_TRACEME is typically used by
+ * the debugger by forking a process, using PTRACE_TRACEME, and finally
+ * doing an exec of the specified program.
+ *
+ *
+ * PTRACE_ATTACH
+ *
+ * We can attach to a process using PTRACE_ATTACH. This is considerably
+ * more complicated than the previous case. On Linux, the traced process is
+ * effectively reparented to the ptrace consumer so that event notification
+ * can go through the normal wait(2) system call. Solaris has no such
+ * ability to reparent a process (nor should it) so some trickery was
+ * required.
+ *
+ * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child
+ * process. The monitor enables the /proc ptrace flag for itself and uses
+ * the native /proc mechanisms to observe the traced process and wait for
+ * events of interest. When the traced process stops, the monitor process
+ * sends itself a SIGTRAP thus rousting its parent process (the ptrace
+ * consumer) out of wait(2). We then translate the process id and status
+ * code from wait(2) to those of the traced process.
+ *
+ * To detach from the process we just have to clean up tracing flags and
+ * clean up the monitor.
+ *
+ * ptrace can only interact with a process if we have traced it, and it is
+ * currently stopped (see is_traced()). For threads, there's no way to
+ * distinguish whether ptrace() has been called for all threads or some
+ * subset. Since most clients will be tracing all threads, and erroneously
+ * allowing ptrace to access a non-traced thread is non-fatal (or at least
+ * would be fatal on linux), we ignore this aspect of the problem.
+ */
+
+#define LX_PTRACE_TRACEME 0
+#define LX_PTRACE_PEEKTEXT 1
+#define LX_PTRACE_PEEKDATA 2
+#define LX_PTRACE_PEEKUSER 3
+#define LX_PTRACE_POKETEXT 4
+#define LX_PTRACE_POKEDATA 5
+#define LX_PTRACE_POKEUSER 6
+#define LX_PTRACE_CONT 7
+#define LX_PTRACE_KILL 8
+#define LX_PTRACE_SINGLESTEP 9
+#define LX_PTRACE_GETREGS 12
+#define LX_PTRACE_SETREGS 13
+#define LX_PTRACE_GETFPREGS 14
+#define LX_PTRACE_SETFPREGS 15
+#define LX_PTRACE_ATTACH 16
+#define LX_PTRACE_DETACH 17
+#define LX_PTRACE_GETFPXREGS 18
+#define LX_PTRACE_SETFPXREGS 19
+#define LX_PTRACE_SYSCALL 24
+#define LX_PTRACE_SETOPTIONS 0x4200
+
+/* execve syscall numbers for 64-bit vs. 32-bit */
+#if defined(_LP64)
+#define LX_SYS_execve 520
+#else
+#define LX_SYS_execve 11
+#endif
+
+/*
+ * This corresponds to the user_i387_struct Linux structure.
+ */
+typedef struct lx_user_fpregs {
+ long lxuf_cwd;
+ long lxuf_swd;
+ long lxuf_twd;
+ long lxuf_fip;
+ long lxuf_fcs;
+ long lxuf_foo;
+ long lxuf_fos;
+ long lxuf_st_space[20];
+} lx_user_fpregs_t;
+
+/*
+ * This corresponds to the user_fxsr_struct Linux structure.
+ */
+typedef struct lx_user_fpxregs {
+ uint16_t lxux_cwd;
+ uint16_t lxux_swd;
+ uint16_t lxux_twd;
+ uint16_t lxux_fop;
+ long lxux_fip;
+ long lxux_fcs;
+ long lxux_foo;
+ long lxux_fos;
+ long lxux_mxcsr;
+ long lxux_reserved;
+ long lxux_st_space[32];
+ long lxux_xmm_space[32];
+ long lxux_padding[56];
+} lx_user_fpxregs_t;
+
+/*
+ * This corresponds to the user_regs_struct Linux structure.
+ */
+#if defined(_LP64)
+typedef struct lx_user_regs {
+ long lxur_r15;
+ long lxur_r14;
+ long lxur_r13;
+ long lxur_r12;
+ long lxur_rbp;
+ long lxur_rbx;
+ long lxur_r11;
+ long lxur_r10;
+ long lxur_r9;
+ long lxur_r8;
+ long lxur_rax;
+ long lxur_rcx;
+ long lxur_rdx;
+ long lxur_rsi;
+ long lxur_rdi;
+ long lxur_orig_rax;
+ long lxur_rip;
+ long lxur_xcs;
+ long lxur_rflags;
+ long lxur_rsp;
+ long lxur_xss;
+ long lxur_xfs_base;
+ long lxur_xgs_base;
+ long lxur_xds;
+ long lxur_xes;
+ long lxur_xfs;
+ long lxur_xgs;
+} lx_user_regs_t;
+#else
+typedef struct lx_user_regs {
+ long lxur_ebx;
+ long lxur_ecx;
+ long lxur_edx;
+ long lxur_esi;
+ long lxur_edi;
+ long lxur_ebp;
+ long lxur_eax;
+ long lxur_xds;
+ long lxur_xes;
+ long lxur_xfs;
+ long lxur_xgs;
+ long lxur_orig_eax;
+ long lxur_eip;
+ long lxur_xcs;
+ long lxur_eflags;
+ long lxur_esp;
+ long lxur_xss;
+} lx_user_regs_t;
+#endif
+
+typedef struct lx_user {
+ lx_user_regs_t lxu_regs;
+ int lxu_fpvalid;
+ lx_user_fpregs_t lxu_i387;
+ ulong_t lxu_tsize;
+ ulong_t lxu_dsize;
+ ulong_t lxu_ssize;
+ ulong_t lxu_start_code;
+ ulong_t lxu_start_stack;
+ long lxu_signal;
+ int lxu_reserved;
+ lx_user_regs_t *lxu_ar0;
+ lx_user_fpregs_t *lxu_fpstate;
+ ulong_t lxu_magic;
+ char lxu_comm[32];
+ int lxu_debugreg[8];
+} lx_user_t;
+
+typedef struct ptrace_monitor_map {
+ struct ptrace_monitor_map *pmm_next; /* next pointer */
+ pid_t pmm_monitor; /* monitor child process */
+ pid_t pmm_target; /* traced Linux pid */
+ pid_t pmm_pid; /* Solaris pid */
+ lwpid_t pmm_lwpid; /* Solaris lwpid */
+ uint_t pmm_exiting; /* detached */
+} ptrace_monitor_map_t;
+
+typedef struct ptrace_state_map {
+ struct ptrace_state_map *psm_next; /* next pointer */
+ pid_t psm_pid; /* Solaris pid */
+ uintptr_t psm_debugreg[8]; /* debug registers */
+} ptrace_state_map_t;
+
+static ptrace_monitor_map_t *ptrace_monitor_map = NULL;
+static ptrace_state_map_t *ptrace_state_map = NULL;
+static mutex_t ptrace_map_mtx = DEFAULTMUTEX;
+
+extern void *_START_;
+
+static sigset_t blockable_sigs;
+
+#pragma init(ptrace_init)
+void
+ptrace_init(void)
+{
+ (void) sigfillset(&blockable_sigs);
+ (void) sigdelset(&blockable_sigs, SIGKILL);
+ (void) sigdelset(&blockable_sigs, SIGSTOP);
+}
+
+/*
+ * Given a pid, open the named file under /native/proc/<pid>/name using the
+ * given mode.
+ */
+static int
+open_procfile(pid_t pid, int mode, const char *name)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "/native/proc/%d/%s", pid, name);
+
+ return (open(path, mode));
+}
+
+/*
+ * Given a pid and lwpid, open the named file under
+ * /native/proc/<pid>/<lwpid>/name using the given mode.
+ */
+static int
+open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "/native/proc/%d/lwp/%d/%s",
+ pid, lwpid, name);
+
+ return (open(path, mode));
+}
+
+static int
+get_status(pid_t pid, pstatus_t *psp)
+{
+ int fd;
+
+ if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0)
+ return (-ESRCH);
+
+ if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp)
+{
+ int fd;
+
+ if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0)
+ return (-ESRCH);
+
+ if (read(fd, lsp, sizeof (lwpstatus_t)) != sizeof (lwpstatus_t)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static uintptr_t
+syscall_regs(int fd, uintptr_t fp, pid_t pid)
+{
+ uintptr_t addr, done;
+ struct frame fr;
+ auxv_t auxv;
+ int afd;
+#if defined(_LP64)
+ Elf64_Phdr phdr;
+#elif defined(_ILP32)
+ Elf32_Phdr phdr;
+#endif
+
+ /*
+ * Try to walk the stack looking for a return address that corresponds
+ * to the traced process's lx_emulate_done symbol. This relies on the
+ * fact that the brand library in the traced process is the same as the
+ * brand library in this process (indeed, this is true of all processes
+ * in a given branded zone).
+ */
+
+ /*
+ * Find the base address for the brand library in the traced process
+ * by grabbing the AT_PHDR auxv entry, reading in the program header
+ * at that location and subtracting off the p_vaddr member. We use
+ * this to compute the location of lx_emulate done in the traced
+ * process.
+ */
+ if ((afd = open_procfile(pid, O_RDONLY, "auxv")) < 0)
+ return (0);
+
+ do {
+ if (read(afd, &auxv, sizeof (auxv)) != sizeof (auxv)) {
+ (void) close(afd);
+ return (0);
+ }
+ } while (auxv.a_type != AT_PHDR);
+
+ (void) close(afd);
+
+ if (pread(fd, &phdr, sizeof (phdr), auxv.a_un.a_val) != sizeof (phdr)) {
+ lx_debug("failed to read brand library's phdr");
+ return (0);
+ }
+
+ addr = auxv.a_un.a_val - phdr.p_vaddr;
+ done = (uintptr_t)&lx_emulate_done - (uintptr_t)&_START_ + addr;
+
+ fr.fr_savfp = fp;
+
+ do {
+ addr = fr.fr_savfp;
+ if (pread(fd, &fr, sizeof (fr), addr) != sizeof (fr)) {
+ lx_debug("ptrace read failed for stack walk");
+ return (0);
+ }
+
+ if (addr >= fr.fr_savfp) {
+ lx_debug("ptrace stack not monotonically increasing "
+ "%p %p (%p)", addr, fr.fr_savfp, done);
+ return (0);
+ }
+ } while (fr.fr_savpc != done);
+
+ /*
+ * The first argument to lx_emulate is known to be an lx_regs_t
+ * structure and the ABI specifies that it will be placed on the stack
+ * immediately preceeding the return address.
+ */
+ addr += sizeof (fr);
+
+ /*
+ * On i386 we need to perform an additional read as we used the stack
+ * to pass the argument to lx_emulate. On amd64 we passed the argument
+ * in %rdi so addr already contains the correct address.
+ */
+#if defined(_ILP32)
+ if (pread(fd, &addr, sizeof (addr), addr) != sizeof (addr)) {
+ lx_debug("ptrace stack failed to read register set address");
+ return (0);
+ }
+#endif
+
+ return (addr);
+}
+
+static int
+getregs(pid_t pid, lwpid_t lwpid, lx_user_regs_t *rp)
+{
+ lwpstatus_t status;
+ uintptr_t addr;
+ int fd, ret;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we find the syscall regs (and are therefore in an emulated
+ * syscall, use the register set at given address. Otherwise, use the
+ * registers as reported by /proc.
+ */
+ if ((addr = syscall_regs(fd, status.pr_reg[REG_FP], pid)) != 0) {
+ lx_regs_t regs;
+
+ if (pread(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
+ (void) close(fd);
+ lx_debug("ptrace failed to read register set");
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+#if defined(_LP64)
+ rp->lxur_r15 = regs.lxr_r15;
+ rp->lxur_r14 = regs.lxr_r14;
+ rp->lxur_r13 = regs.lxr_r13;
+ rp->lxur_r12 = regs.lxr_r12;
+ rp->lxur_rbp = regs.lxr_rbp;
+ rp->lxur_rbx = regs.lxr_rbx;
+ rp->lxur_r11 = regs.lxr_r11;
+ rp->lxur_r10 = regs.lxr_r10;
+ rp->lxur_r9 = regs.lxr_r9;
+ rp->lxur_r8 = regs.lxr_r8;
+ rp->lxur_rax = regs.lxr_rax;
+ rp->lxur_rcx = regs.lxr_rcx;
+ rp->lxur_rdx = regs.lxr_rdx;
+ rp->lxur_rsi = regs.lxr_rsi;
+ rp->lxur_rdi = regs.lxr_rdi;
+ rp->lxur_orig_rax = regs.lxr_orig_rax;
+ rp->lxur_rip = regs.lxr_rip;
+ rp->lxur_xcs = status.pr_reg[REG_CS];
+ rp->lxur_rflags = status.pr_reg[REG_RFL];
+ rp->lxur_rsp = regs.lxr_rsp;
+ rp->lxur_xss = status.pr_reg[REG_SS];
+ rp->lxur_xfs_base = status.pr_reg[REG_FSBASE];
+ rp->lxur_xgs_base = status.pr_reg[REG_GSBASE];
+ rp->lxur_xds = status.pr_reg[REG_DS];
+ rp->lxur_xes = status.pr_reg[REG_ES];
+ rp->lxur_xfs = regs.lxr_fs;
+ rp->lxur_xgs = status.pr_reg[REG_GS];
+#elif defined(_ILP32)
+ rp->lxur_ebx = regs.lxr_ebx;
+ rp->lxur_ecx = regs.lxr_ecx;
+ rp->lxur_edx = regs.lxr_edx;
+ rp->lxur_esi = regs.lxr_esi;
+ rp->lxur_edi = regs.lxr_edi;
+ rp->lxur_ebp = regs.lxr_ebp;
+ rp->lxur_eax = regs.lxr_eax;
+ rp->lxur_xds = status.pr_reg[DS];
+ rp->lxur_xes = status.pr_reg[ES];
+ rp->lxur_xfs = status.pr_reg[FS];
+ rp->lxur_xgs = regs.lxr_gs;
+ rp->lxur_orig_eax = regs.lxr_orig_eax;
+ rp->lxur_eip = regs.lxr_eip;
+ rp->lxur_xcs = status.pr_reg[CS];
+ rp->lxur_eflags = status.pr_reg[EFL];
+ rp->lxur_esp = regs.lxr_esp;
+ rp->lxur_xss = status.pr_reg[SS];
+#endif
+
+ } else {
+ (void) close(fd);
+
+#if defined(_LP64)
+ rp->lxur_r15 = status.pr_reg[REG_R15];
+ rp->lxur_r14 = status.pr_reg[REG_R14];
+ rp->lxur_r13 = status.pr_reg[REG_R13];
+ rp->lxur_r12 = status.pr_reg[REG_R12];
+ rp->lxur_rbp = status.pr_reg[REG_RBP];
+ rp->lxur_rbx = status.pr_reg[REG_RBX];
+ rp->lxur_r11 = status.pr_reg[REG_R11];
+ rp->lxur_r10 = status.pr_reg[REG_R10];
+ rp->lxur_r9 = status.pr_reg[REG_R9];
+ rp->lxur_r8 = status.pr_reg[REG_R8];
+ rp->lxur_rax = status.pr_reg[REG_RAX];
+ rp->lxur_rcx = status.pr_reg[REG_RCX];
+ rp->lxur_rdx = status.pr_reg[REG_RDX];
+ rp->lxur_rsi = status.pr_reg[REG_RSI];
+ rp->lxur_rdi = status.pr_reg[REG_RDI];
+ rp->lxur_orig_rax = 0;
+ rp->lxur_rip = status.pr_reg[REG_RIP];
+ rp->lxur_xcs = status.pr_reg[REG_CS];
+ rp->lxur_rflags = status.pr_reg[REG_RFL];
+ rp->lxur_rsp = status.pr_reg[REG_RSP];
+ rp->lxur_xss = status.pr_reg[REG_SS];
+ rp->lxur_xfs = status.pr_reg[REG_FSBASE];
+ rp->lxur_xgs = status.pr_reg[REG_GSBASE];
+ rp->lxur_xds = status.pr_reg[REG_DS];
+ rp->lxur_xes = status.pr_reg[REG_ES];
+ rp->lxur_xfs = status.pr_reg[REG_FSBASE];
+ rp->lxur_xgs = status.pr_reg[REG_GSBASE];
+#elif defined(_ILP32)
+ rp->lxur_ebx = status.pr_reg[EBX];
+ rp->lxur_ecx = status.pr_reg[ECX];
+ rp->lxur_edx = status.pr_reg[EDX];
+ rp->lxur_esi = status.pr_reg[ESI];
+ rp->lxur_edi = status.pr_reg[EDI];
+ rp->lxur_ebp = status.pr_reg[EBP];
+ rp->lxur_eax = status.pr_reg[EAX];
+ rp->lxur_xds = status.pr_reg[DS];
+ rp->lxur_xes = status.pr_reg[ES];
+ rp->lxur_xfs = status.pr_reg[FS];
+ rp->lxur_xgs = status.pr_reg[GS];
+ rp->lxur_orig_eax = 0;
+ rp->lxur_eip = status.pr_reg[EIP];
+ rp->lxur_xcs = status.pr_reg[CS];
+ rp->lxur_eflags = status.pr_reg[EFL];
+ rp->lxur_esp = status.pr_reg[UESP];
+ rp->lxur_xss = status.pr_reg[SS];
+#endif
+
+ /*
+ * If the target process has just returned from exec, it's not
+ * going to be sitting in the emulation function. In that case
+ * we need to manually fake up the values for %eax and orig_eax
+ * to indicate a successful return and that the traced process
+ * had called execve (respectively).
+ */
+ if (status.pr_why == PR_SYSEXIT &&
+ status.pr_what == SYS_execve) {
+#if defined(_LP64)
+ rp->lxur_rax = 0;
+ rp->lxur_orig_rax = LX_SYS_execve;
+#elif defined(_ILP32)
+ rp->lxur_eax = 0;
+ rp->lxur_orig_eax = LX_SYS_execve;
+#endif
+ }
+ }
+
+ return (0);
+}
+
+static int
+setregs(pid_t pid, lwpid_t lwpid, const lx_user_regs_t *rp)
+{
+ long ctl[1 + sizeof (prgregset_t) / sizeof (long)];
+ lwpstatus_t status;
+ uintptr_t addr;
+ int fd, ret;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ if ((fd = open_procfile(pid, O_RDWR, "as")) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we find the syscall regs (and are therefore in an emulated
+ * syscall, modify the register set at given address and set the
+ * remaining registers through the /proc interface. Otherwise just use
+ * the /proc interface to set register values;
+ */
+ if ((addr = syscall_regs(fd, status.pr_reg[REG_FP], pid)) != 0) {
+#if defined(_ILP32)
+ lx_regs_t regs;
+
+ regs.lxr_ebx = rp->lxur_ebx;
+ regs.lxr_ecx = rp->lxur_ecx;
+ regs.lxr_edx = rp->lxur_edx;
+ regs.lxr_esi = rp->lxur_esi;
+ regs.lxr_edi = rp->lxur_edi;
+ regs.lxr_ebp = rp->lxur_ebp;
+ regs.lxr_eax = rp->lxur_eax;
+ regs.lxr_gs = rp->lxur_xgs;
+ regs.lxr_orig_eax = rp->lxur_orig_eax;
+ regs.lxr_eip = rp->lxur_eip;
+ regs.lxr_esp = rp->lxur_esp;
+
+ if (pwrite(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
+ (void) close(fd);
+ lx_debug("ptrace failed to write register set");
+ return (-EIO);
+ }
+#endif
+
+ (void) close(fd);
+
+#if defined(_ILP32)
+ status.pr_reg[DS] = rp->lxur_xds;
+ status.pr_reg[ES] = rp->lxur_xes;
+ status.pr_reg[FS] = rp->lxur_xfs;
+ status.pr_reg[CS] = rp->lxur_xcs;
+ status.pr_reg[EFL] = rp->lxur_eflags;
+ status.pr_reg[SS] = rp->lxur_xss;
+#endif
+
+ } else {
+ (void) close(fd);
+
+#if defined(_ILP32)
+ status.pr_reg[EBX] = rp->lxur_ebx;
+ status.pr_reg[ECX] = rp->lxur_ecx;
+ status.pr_reg[EDX] = rp->lxur_edx;
+ status.pr_reg[ESI] = rp->lxur_esi;
+ status.pr_reg[EDI] = rp->lxur_edi;
+ status.pr_reg[EBP] = rp->lxur_ebp;
+ status.pr_reg[EAX] = rp->lxur_eax;
+ status.pr_reg[DS] = rp->lxur_xds;
+ status.pr_reg[ES] = rp->lxur_xes;
+ status.pr_reg[FS] = rp->lxur_xfs;
+ status.pr_reg[GS] = rp->lxur_xgs;
+ status.pr_reg[EIP] = rp->lxur_eip;
+ status.pr_reg[CS] = rp->lxur_xcs;
+ status.pr_reg[EFL] = rp->lxur_eflags;
+ status.pr_reg[UESP] = rp->lxur_esp;
+ status.pr_reg[SS] = rp->lxur_xss;
+ status.pr_reg[SS] = rp->lxur_xss;
+#endif
+ }
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl[0] = PCSREG;
+ bcopy(status.pr_reg, &ctl[1], sizeof (prgregset_t));
+
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+getfpregs(pid_t pid, lwpid_t lwpid, lx_user_fpregs_t *rp)
+{
+ lwpstatus_t status;
+ struct _fpstate *fp;
+#if defined(_ILP32)
+ char *data;
+ int i;
+#endif
+ int ret;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
+
+#if defined(_ILP32)
+ rp->lxuf_cwd = fp->cw;
+ rp->lxuf_swd = fp->sw;
+ rp->lxuf_twd = fp->tag;
+ rp->lxuf_fip = fp->ipoff;
+ rp->lxuf_fcs = fp->cssel;
+ rp->lxuf_foo = fp->dataoff;
+ rp->lxuf_fos = fp->datasel;
+
+ /*
+ * The Linux structure uses 10 bytes per floating-point register.
+ */
+ data = (char *)&rp->lxuf_st_space[0];
+ for (i = 0; i < 8; i++) {
+ bcopy(&fp->_st[i], data, 10);
+ data += 10;
+ }
+#endif
+
+ return (0);
+}
+
+static int
+setfpregs(pid_t pid, lwpid_t lwpid, const lx_user_fpregs_t *rp)
+{
+ lwpstatus_t status;
+ struct {
+ long cmd;
+ prfpregset_t regs;
+ } ctl;
+#if defined(_ILP32)
+ struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
+ char *data;
+ int i;
+#endif
+ int ret, fd;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
+
+#if defined(_ILP32)
+ fp->cw = rp->lxuf_cwd;
+ fp->sw = rp->lxuf_swd;
+ fp->tag = rp->lxuf_twd;
+ fp->ipoff = rp->lxuf_fip;
+ fp->cssel = rp->lxuf_fcs;
+ fp->dataoff = rp->lxuf_foo;
+ fp->datasel = rp->lxuf_fos;
+
+ /*
+ * The Linux structure uses 10 bytes per floating-point register.
+ */
+ data = (char *)&rp->lxuf_st_space[0];
+ for (i = 0; i < 8; i++) {
+ bcopy(data, &fp->_st[i], 10);
+ data += 10;
+ }
+#endif
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl.cmd = PCSFPREG;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+
+static int
+getfpxregs(pid_t pid, lwpid_t lwpid, lx_user_fpxregs_t *rp)
+{
+#if defined(_ILP32)
+ lwpstatus_t status;
+ struct _fpstate *fp;
+ int ret, i;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
+
+ rp->lxux_cwd = (uint16_t)fp->cw;
+ rp->lxux_swd = (uint16_t)fp->sw;
+ rp->lxux_twd = (uint16_t)fp->tag;
+ rp->lxux_fop = (uint16_t)(fp->cssel >> 16);
+ rp->lxux_fip = fp->ipoff;
+ rp->lxux_fcs = (uint16_t)fp->cssel;
+ rp->lxux_foo = fp->dataoff;
+ rp->lxux_fos = fp->datasel;
+ rp->lxux_mxcsr = status.pr_fpreg.fp_reg_set.fpchip_state.mxcsr;
+
+ bcopy(fp->xmm, rp->lxux_xmm_space, sizeof (rp->lxux_xmm_space));
+ bzero(rp->lxux_st_space, sizeof (rp->lxux_st_space));
+ for (i = 0; i < 8; i++) {
+ bcopy(&fp->_st[i], &rp->lxux_st_space[i * 4],
+ sizeof (fp->_st[i]));
+ }
+#endif
+
+ return (0);
+}
+
+static int
+setfpxregs(pid_t pid, lwpid_t lwpid, const lx_user_fpxregs_t *rp)
+{
+#if defined(_ILP32)
+ lwpstatus_t status;
+ struct {
+ long cmd;
+ prfpregset_t regs;
+ } ctl;
+ struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
+ int ret, i, fd;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
+
+ fp->cw = rp->lxux_cwd;
+ fp->sw = rp->lxux_swd;
+ fp->tag = rp->lxux_twd;
+ fp->ipoff = rp->lxux_fip;
+ fp->cssel = rp->lxux_fcs | (rp->lxux_fop << 16);
+ fp->dataoff = rp->lxux_foo;
+ fp->datasel = rp->lxux_fos;
+
+ bcopy(rp->lxux_xmm_space, fp->xmm, sizeof (rp->lxux_xmm_space));
+ for (i = 0; i < 8; i++) {
+ bcopy(&rp->lxux_st_space[i * 4], &fp->_st[i],
+ sizeof (fp->_st[i]));
+ }
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl.cmd = PCSFPREG;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+#endif
+
+ return (0);
+}
+
+/*
+ * Solaris does not allow a process to manipulate its own or some
+ * other process's debug registers. Linux ptrace(2) allows this
+ * and gdb manipulates them for its watchpoint implementation.
+ *
+ * We keep a pseudo set of debug registers for each traced process
+ * and map their contents into the appropriate PCWATCH /proc
+ * operations when they are activated by gdb.
+ *
+ * To understand how the debug registers work on x86 machines,
+ * see section 13.1 of the AMD x86-64 Architecture Programmer's
+ * Manual, Volume 2, System Programming.
+ */
+static uintptr_t *
+debug_registers(pid_t pid)
+{
+ ptrace_state_map_t *p;
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_state_map; p != NULL; p = p->psm_next) {
+ if (p->psm_pid == pid)
+ break;
+ }
+ if (p == NULL && (p = malloc(sizeof (*p))) != NULL) {
+ bzero(p, sizeof (*p));
+ p->psm_pid = pid;
+ p->psm_next = ptrace_state_map;
+ p->psm_debugreg[6] = 0xffff0ff0; /* read as ones */
+ ptrace_state_map = p;
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+ return (p != NULL? p->psm_debugreg : NULL);
+}
+
+static void
+free_debug_registers(pid_t pid)
+{
+ ptrace_state_map_t **pp;
+ ptrace_state_map_t *p;
+
+ /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */
+ for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) {
+ if (p->psm_pid == pid) {
+ *pp = p->psm_next;
+ free(p);
+ break;
+ }
+ }
+}
+
+static int
+setup_watchpoints(pid_t pid, uintptr_t *debugreg)
+{
+ int dr7 = debugreg[7];
+ int lrw;
+ int fd;
+ size_t size = NULL;
+ prwatch_t prwatch[4];
+ int nwatch;
+ int i;
+ int wflags = NULL;
+ int error;
+ struct {
+ long req;
+ prwatch_t prwatch;
+ } ctl;
+
+ /* find all watched areas */
+ if ((fd = open_procfile(pid, O_RDONLY, "watch")) < 0)
+ return (-ESRCH);
+ nwatch = read(fd, prwatch, sizeof (prwatch)) / sizeof (prwatch_t);
+ (void) close(fd);
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
+ return (-ESRCH);
+ /* clear all watched areas */
+ for (i = 0; i < nwatch; i++) {
+ ctl.req = PCWATCH;
+ ctl.prwatch = prwatch[i];
+ ctl.prwatch.pr_wflags = 0;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ error = -errno;
+ (void) close(fd);
+ return (error);
+ }
+ }
+ /* establish all new watched areas */
+ for (i = 0; i < 4; i++) {
+ if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */
+ continue;
+ lrw = (dr7 >> (16 + (4 * i))) & 0xf;
+ switch (lrw >> 2) { /* length */
+ case 0: size = 1; break;
+ case 1: size = 2; break;
+ case 2: size = 8; break;
+ case 3: size = 4; break;
+ }
+ switch (lrw & 0x3) { /* mode */
+ case 0: wflags = WA_EXEC; break;
+ case 1: wflags = WA_WRITE; break;
+ case 2: continue;
+ case 3: wflags = WA_READ | WA_WRITE; break;
+ }
+ ctl.req = PCWATCH;
+ ctl.prwatch.pr_vaddr = debugreg[i];
+ ctl.prwatch.pr_size = size;
+ ctl.prwatch.pr_wflags = wflags | WA_TRAPAFTER;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ error = -errno;
+ (void) close(fd);
+ return (error);
+ }
+ }
+ (void) close(fd);
+ return (0);
+}
+
+/*
+ * Returns TRUE if the process is traced, FALSE otherwise. This is only true
+ * if the process is currently stopped, and has been traced using
+ * PTRACE_TRACEME, PTRACE_ATTACH or one of the Linux-specific trace options.
+ */
+static int
+is_traced(pid_t pid)
+{
+ ptrace_monitor_map_t *p;
+ pstatus_t status;
+ uint_t curr_opts;
+
+ /*
+ * First get the stop options since that is an indication that the
+ * process is being traced.
+ */
+ if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid,
+ &curr_opts) != 0)
+ return (0);
+
+ if (get_status(pid, &status) != 0)
+ return (0);
+
+ if ((status.pr_flags & PR_PTRACE || curr_opts != 0) &&
+ (status.pr_ppid == getpid()) &&
+ (status.pr_lwp.pr_flags & PR_ISTOP))
+ return (1);
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
+ if (p->pmm_target == pid) {
+ (void) mutex_unlock(&ptrace_map_mtx);
+ return (1);
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+static int
+ptrace_trace_common(int fd)
+{
+ struct {
+ long cmd;
+ union {
+ long flags;
+ sigset_t signals;
+ fltset_t faults;
+ } arg;
+ } ctl;
+ size_t size;
+
+ ctl.cmd = PCSTRACE;
+ prfillset(&ctl.arg.signals);
+ size = sizeof (long) + sizeof (sigset_t);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ ctl.cmd = PCSFAULT;
+ premptyset(&ctl.arg.faults);
+ size = sizeof (long) + sizeof (fltset_t);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ ctl.cmd = PCUNSET;
+ ctl.arg.flags = PR_FORK;
+ size = sizeof (long) + sizeof (long);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Notify that parent that we wish to be traced. This is the equivalent of:
+ *
+ * 1. Stop on all signals, and nothing else
+ * 2. Turn off inherit-on-fork flag
+ * 3. Set ptrace compatible flag
+ *
+ * If we are not the main thread, then the client is trying to request behavior
+ * by which one of its own thread is to be traced. We don't support this mode
+ * of operation.
+ */
+static int
+ptrace_traceme(void)
+{
+ int fd, ret;
+ int error;
+ long ctl[2];
+ pstatus_t status;
+ pid_t pid = getpid();
+
+ if (_lwp_self() != 1) {
+ lx_unsupported("thread %d calling PTRACE_TRACEME is "
+ "unsupported", _lwp_self());
+ return (-ENOTSUP);
+ }
+
+ if ((ret = get_status(pid, &status)) != 0)
+ return (ret);
+
+ /*
+ * Why would a process try to do this twice? I'm not sure, but there's
+ * a conformance test which wants this to fail just so.
+ */
+ if (status.pr_flags & PR_PTRACE)
+ return (-EPERM);
+
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
+ return (-errno);
+
+ ctl[0] = PCSET;
+ ctl[1] = PR_PTRACE;
+ error = 0;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) ||
+ ptrace_trace_common(fd) != 0)
+ error = -errno;
+
+ (void) close(fd);
+ return (error);
+}
+
+/*
+ * Read a word of data from the given address. Because this is a process-wide
+ * action, we don't need the lwpid.
+ */
+static long
+ptrace_peek(pid_t pid, uintptr_t addr, long *ret)
+{
+ int fd;
+ long data;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
+ return (-ESRCH);
+
+ if (pread(fd, &data, sizeof (data), addr) != sizeof (data)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ if (uucopy(&data, ret, sizeof (data)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+#define LX_USER_BOUND(m) \
+(offsetof(lx_user_t, m) + sizeof (((lx_user_t *)NULL)->m))
+
+static int
+ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret)
+{
+ int err, data;
+ uintptr_t *debugreg;
+ int dreg;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ /*
+ * The offset specified by the user is an offset into the Linux
+ * user structure (seriously). Rather than constructing a full
+ * user structure, we figure out which part of the user structure
+ * the offset is in, and fill in just that component.
+ */
+ if (off < LX_USER_BOUND(lxu_regs)) {
+ lx_user_regs_t regs;
+
+ if ((err = getregs(pid, lwpid, &regs)) != 0)
+ return (err);
+
+ data = *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_regs));
+
+ } else if (off < LX_USER_BOUND(lxu_fpvalid)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_i387)) {
+ lx_user_fpregs_t regs;
+
+ if ((err = getfpregs(pid, lwpid, &regs)) != 0)
+ return (err);
+
+ data = *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_i387));
+
+ } else if (off < LX_USER_BOUND(lxu_tsize)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_dsize)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_ssize)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_start_code)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_start_stack)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_signal)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_reserved)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_ar0)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_fpstate)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_magic)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_comm)) {
+ lx_err("offset = %lu\n", off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_debugreg)) {
+ dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
+ if (dreg == 4) /* aliased */
+ dreg = 6;
+ else if (dreg == 5) /* aliased */
+ dreg = 7;
+ if ((debugreg = debug_registers(pid)) != NULL)
+ data = debugreg[dreg];
+ else
+ data = 0;
+ } else {
+ lx_unsupported("unsupported ptrace peek user offset: 0x%x\n",
+ off);
+ assert(0);
+ return (-ENOTSUP);
+ }
+
+ if (uucopy(&data, ret, sizeof (data)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/*
+ * Write a word of data to the given address. Because this is a process-wide
+ * action, we don't need the lwpid. Returns EINVAL if the address is not
+ * word-aligned.
+ */
+static int
+ptrace_poke(pid_t pid, uintptr_t addr, int data)
+{
+ int fd;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (addr & 0x3)
+ return (-EINVAL);
+
+ if ((fd = open_procfile(pid, O_WRONLY, "as")) < 0)
+ return (-ESRCH);
+
+ if (pwrite(fd, &data, sizeof (data), addr) != sizeof (data)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+ return (0);
+}
+
+static int
+ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data)
+{
+ lx_user_regs_t regs;
+ int err = 0;
+ uintptr_t *debugreg;
+ int dreg;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (off & 0x3)
+ return (-EINVAL);
+
+ if (off < offsetof(lx_user_t, lxu_regs) + sizeof (lx_user_regs_t)) {
+ if ((err = getregs(pid, lwpid, &regs)) != 0)
+ return (err);
+ *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_regs)) = data;
+ return (setregs(pid, lwpid, &regs));
+ }
+
+ if (off >= offsetof(lx_user_t, lxu_debugreg) &&
+ off < offsetof(lx_user_t, lxu_debugreg) + 8 * sizeof (int)) {
+ dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
+ if (dreg == 4) /* aliased */
+ dreg = 6;
+ else if (dreg == 5) /* aliased */
+ dreg = 7;
+ if ((debugreg = debug_registers(pid)) != NULL) {
+ debugreg[dreg] = data;
+ if (dreg == 7)
+ err = setup_watchpoints(pid, debugreg);
+ }
+ return (err);
+ }
+
+ lx_unsupported("unsupported ptrace poke user offset: 0x%x\n", off);
+ assert(0);
+ return (-ENOTSUP);
+}
+
+static int
+ptrace_cont_common(int fd, int sig, int run, int step)
+{
+ long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2];
+ long *ctlp = ctl;
+ size_t size;
+
+ assert(0 <= sig && sig <= LX_NSIG);
+ assert(!step || run);
+
+ /*
+ * Clear the current signal.
+ */
+ *ctlp++ = PCCSIG;
+
+ /*
+ * Send a signal if one was specified.
+ */
+ if (sig != 0 && sig != LX_SIGSTOP) {
+ siginfo_t *infop;
+
+ *ctlp++ = PCSSIG;
+ infop = (siginfo_t *)ctlp;
+ bzero(infop, sizeof (siginfo_t));
+ infop->si_signo = ltos_signo[sig];
+
+ ctlp += sizeof (siginfo_t) / sizeof (long);
+ }
+
+ /*
+ * If run is true, set the lwp running.
+ */
+ if (run) {
+ *ctlp++ = PCRUN;
+ *ctlp++ = step ? PRSTEP : 0;
+ }
+
+ size = (char *)ctlp - (char *)&ctl[0];
+ assert(size <= sizeof (ctl));
+
+ if (write(fd, ctl, size) != size) {
+ lx_debug("failed to continue %s", strerror(errno));
+ return (-EIO);
+ }
+
+ return (0);
+}
+
+static int
+ptrace_cont_monitor(ptrace_monitor_map_t *p)
+{
+ long ctl[2];
+ int fd;
+
+ fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl");
+ if (fd < 0) {
+ lx_debug("failed to open monitor ctl %d",
+ errno);
+ return (-EIO);
+ }
+
+ ctl[0] = PCRUN;
+ ctl[1] = PRCSIG;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step)
+{
+ ptrace_monitor_map_t *p;
+ uintptr_t *debugreg;
+ int fd, ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (sig < 0 || sig > LX_NSIG)
+ return (-EINVAL);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) {
+ (void) close(fd);
+ return (ret);
+ }
+
+ (void) close(fd);
+
+ /* kludge: use debugreg[4] to remember the single-step flag */
+ if ((debugreg = debug_registers(pid)) != NULL)
+ debugreg[4] = step;
+
+ /*
+ * Check for a monitor and get it moving if we find it. If any of the
+ * /proc operations fail, we're kind of sunk so just return an error.
+ */
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
+ if (p->pmm_target == lxpid) {
+ if ((ret = ptrace_cont_monitor(p)) != 0)
+ return (ret);
+ break;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+/*
+ * If a monitor exists for this traced process, dispose of it.
+ * First turn off its ptrace flag so we won't be notified of its
+ * impending demise. We ignore errors for this step since they
+ * indicate only that the monitor has been damaged due to pilot
+ * error. Then kill the monitor, and wait for it. If the wait
+ * succeeds we can dispose of the corpse, otherwise another thread's
+ * wait call has collected it and we need to set a flag in the
+ * structure so that if can be picked up in wait.
+ */
+static void
+monitor_kill(pid_t lxpid, pid_t pid)
+{
+ ptrace_monitor_map_t *p, **pp;
+ pid_t mpid;
+ int fd;
+ long ctl[2];
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ free_debug_registers(pid);
+ for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
+ if (p->pmm_target == lxpid) {
+ mpid = p->pmm_monitor;
+ if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) {
+ ctl[0] = PCUNSET;
+ ctl[1] = PR_PTRACE;
+ (void) write(fd, ctl, sizeof (ctl));
+ (void) close(fd);
+ }
+
+ (void) kill(mpid, SIGKILL);
+
+ if (waitpid(mpid, NULL, 0) == mpid) {
+ *pp = p->pmm_next;
+ free(p);
+ } else {
+ p->pmm_exiting = 1;
+ }
+
+ break;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+}
+
+static int
+ptrace_kill(pid_t lxpid, pid_t pid)
+{
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ ret = kill(pid, SIGKILL);
+
+ /* kill off the monitor process, if any */
+ monitor_kill(lxpid, pid);
+
+ return (ret);
+}
+
+static int
+ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ return (ptrace_cont(lxpid, pid, lwpid, sig, 1));
+}
+
+static int
+ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_regs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_regs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setregs(pid, lwpid, &regs));
+}
+
+static int
+ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpregs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getfpregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpregs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setfpregs(pid, lwpid, &regs));
+}
+
+static int
+ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpxregs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getfpxregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpxregs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setfpxregs(pid, lwpid, &regs));
+}
+
+static void __NORETURN
+ptrace_monitor(int fd)
+{
+ struct {
+ long cmd;
+ union {
+ long flags;
+ sigset_t signals;
+ fltset_t faults;
+ } arg;
+ } ctl;
+ size_t size;
+ int monfd;
+ int rv;
+
+ monfd = open_procfile(getpid(), O_WRONLY, "ctl");
+
+ ctl.cmd = PCSTRACE; /* trace only SIGTRAP */
+ premptyset(&ctl.arg.signals);
+ praddset(&ctl.arg.signals, SIGTRAP);
+ size = sizeof (long) + sizeof (sigset_t);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCSFAULT;
+ premptyset(&ctl.arg.faults);
+ size = sizeof (long) + sizeof (fltset_t);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCUNSET;
+ ctl.arg.flags = PR_FORK;
+ size = sizeof (long) + sizeof (long);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCSET; /* wait()able by the parent */
+ ctl.arg.flags = PR_PTRACE;
+ size = sizeof (long) + sizeof (long);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ (void) close(monfd);
+
+ ctl.cmd = PCWSTOP;
+ size = sizeof (long);
+
+ for (;;) {
+ /*
+ * Wait for the traced process to stop.
+ */
+ if (write(fd, &ctl, size) != size) {
+ rv = (errno == ENOENT)? 0 : 1;
+ lx_debug("monitor failed to wait for LWP to stop: %s",
+ strerror(errno));
+ _exit(rv);
+ }
+
+ lx_debug("monitor caught traced LWP");
+
+ /*
+ * Pull the ptrace trigger by sending ourself a SIGTRAP. This
+ * will cause this, the monitor process, to stop which will
+ * cause the parent's waitid(2) call to return this process
+ * id. In lx_wait(), we remap the monitor process's pid and
+ * status to those of the traced LWP. When the parent process
+ * uses ptrace to resume the traced LWP, it will additionally
+ * restart this process.
+ */
+ (void) _lwp_kill(_lwp_self(), SIGTRAP);
+
+ lx_debug("monitor was resumed");
+ }
+}
+
+static int
+ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run)
+{
+ pid_t child;
+ ptrace_monitor_map_t *p;
+ sigset_t unblock;
+ pstatus_t status;
+ long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2];
+ long *ctlp = ctl;
+ size_t size;
+ sysset_t *sysp;
+ int ret;
+
+ /*
+ * We're going to need this structure so better to fail now before its
+ * too late to turn back.
+ */
+ if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL)
+ return (-EIO);
+
+ if ((ret = get_status(pid, &status)) != 0) {
+ free(p);
+ return (ret);
+ }
+
+ /*
+ * If this process is already traced, bail.
+ */
+ if (status.pr_flags & PR_PTRACE) {
+ free(p);
+ return (-EPERM);
+ }
+
+ /*
+ * Turn on the appropriate tracing flags. It's exceedingly unlikely
+ * that this operation will fail; any failure would probably be due
+ * to another /proc consumer mucking around.
+ */
+ if (ptrace_trace_common(fd) != 0) {
+ free(p);
+ return (-EIO);
+ }
+
+ /*
+ * Native ptrace automatically catches processes when they exec so we
+ * have to do that explicitly here.
+ */
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ praddset(sysp, SYS_execve);
+ if (run) {
+ *ctlp++ = PCRUN;
+ *ctlp++ = 0;
+ }
+
+ size = (char *)ctlp - (char *)&ctl[0];
+
+ if (write(fd, ctl, size) != size) {
+ free(p);
+ return (-EIO);
+ }
+
+ /*
+ * Spawn the monitor proceses to notify this process of events of
+ * interest in the traced process. We block signals here both so
+ * we're not interrupted during this operation and so that the
+ * monitor process doesn't accept signals.
+ */
+ (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock);
+ if ((child = fork1()) == 0)
+ ptrace_monitor(fd);
+ (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
+
+ if (child == -1) {
+ lx_debug("failed to fork monitor process\n");
+ free(p);
+ return (-EIO);
+ }
+
+ p->pmm_monitor = child;
+ p->pmm_target = lxpid;
+ p->pmm_pid = pid;
+ p->pmm_lwpid = lwpid;
+ p->pmm_exiting = 0;
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ p->pmm_next = ptrace_monitor_map;
+ ptrace_monitor_map = p;
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+static int
+ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid)
+{
+ int fd, ret;
+ long ctl;
+
+ /*
+ * Linux doesn't let you trace process 1 -- go figure.
+ */
+ if (lxpid == 1)
+ return (-EPERM);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0)
+ return (errno == EBUSY ? -EPERM : -ESRCH);
+
+ ctl = PCSTOP;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ lx_err("failed to stop %d/%d\n", (int)pid, (int)lwpid);
+ assert(0);
+ }
+
+ ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0);
+
+ (void) close(fd);
+
+ return (ret);
+}
+
+static int
+ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ long ctl[2];
+ int fd, ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (sig < 0 || sig > LX_NSIG)
+ return (-EINVAL);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ /*
+ * The /proc ptrace flag may not be set, but we clear it
+ * unconditionally since doing so doesn't hurt anything.
+ */
+ ctl[0] = PCUNSET;
+ ctl[1] = PR_PTRACE;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ /*
+ * Clear the brand-specific system call tracing flag to ensure that
+ * the target doesn't stop unexpectedly some time in the future.
+ */
+ if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) {
+ (void) close(fd);
+ return (-ret);
+ }
+
+ /* kill off the monitor process, if any */
+ monitor_kill(lxpid, pid);
+
+ /*
+ * Turn on the run-on-last-close flag so that all tracing flags will be
+ * cleared when we close the control file descriptor.
+ */
+ ctl[0] = PCSET;
+ ctl[1] = PR_RLC;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ /*
+ * Clear the current signal (if any) and possibly send the traced
+ * process a new signal.
+ */
+ ret = ptrace_cont_common(fd, sig, 0, 0);
+
+ (void) close(fd);
+
+ return (ret);
+}
+
+static int
+ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0)
+ return (-ret);
+
+ return (ptrace_cont(lxpid, pid, lwpid, sig, 0));
+}
+
+static int
+ptrace_setoptions(pid_t pid, int options)
+{
+ int ret;
+ int fd;
+ int error = 0;
+ struct {
+ long cmd;
+ union {
+ long flags;
+ sigset_t signals;
+ fltset_t faults;
+ } arg;
+ } ctl;
+ size_t size;
+ pstatus_t status;
+
+ if ((ret = get_status(pid, &status)) != 0)
+ return (ret);
+
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
+ return (-errno);
+
+ /* since we're doing option tracing now, only catch sigtrap */
+ if (error == 0) {
+ ctl.cmd = PCSTRACE;
+ premptyset(&ctl.arg.signals);
+ praddset(&ctl.arg.signals, SIGTRAP);
+ size = sizeof (long) + sizeof (sigset_t);
+ if (write(fd, &ctl, size) != size)
+ error = -errno;
+ }
+
+ (void) close(fd);
+
+ if (error != 0)
+ return (error);
+
+ ret = syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_SET, pid,
+ options);
+
+ return (-ret);
+}
+
+void
+lx_ptrace_stop_if_option(int option)
+{
+ pid_t pid;
+ uint_t curr_opts;
+
+ pid = getpid();
+ if (pid == 1)
+ pid = zoneinit_pid;
+
+ /* first we have to see if the stop option is set for this process */
+ if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid,
+ &curr_opts) != 0)
+ return;
+
+ /*
+ * If we just forked/cloned, then the trace flags only carry over to
+ * the child if the specific flag was enabled on the parent. For
+ * example, if only TRACEFORK is enabled and we clone, then we must
+ * clear the trace flags. If TRACEFORK is enabled and we fork, then we
+ * keep the flags.
+ */
+ if ((option == LX_PTRACE_O_TRACECLONE ||
+ option == LX_PTRACE_O_TRACEFORK ||
+ option == LX_PTRACE_O_TRACEVFORK) && (curr_opts & option) == 0) {
+ (void) syscall(SYS_brand, B_PTRACE_EXT_OPTS,
+ B_PTRACE_EXT_OPTS_SET, pid, 0);
+ }
+
+ /* now if the option is/was set, this brand call will stop us */
+ if (curr_opts & option)
+ (void) syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option);
+}
+
+long
+lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ pid_t pid, lxpid = (pid_t)p2;
+ lwpid_t lwpid;
+
+ if ((p1 != LX_PTRACE_TRACEME) &&
+ (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0))
+ return (-ESRCH);
+
+ switch (p1) {
+ case LX_PTRACE_TRACEME:
+ return (ptrace_traceme());
+
+ case LX_PTRACE_PEEKTEXT:
+ case LX_PTRACE_PEEKDATA:
+ return (ptrace_peek(pid, p3, (long *)p4));
+
+ case LX_PTRACE_PEEKUSER:
+ return (ptrace_peek_user(pid, lwpid, p3, (int *)p4));
+
+ case LX_PTRACE_POKETEXT:
+ case LX_PTRACE_POKEDATA:
+ return (ptrace_poke(pid, p3, (int)p4));
+
+ case LX_PTRACE_POKEUSER:
+ return (ptrace_poke_user(pid, lwpid, p3, (int)p4));
+
+ case LX_PTRACE_CONT:
+ return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0));
+
+ case LX_PTRACE_KILL:
+ return (ptrace_kill(lxpid, pid));
+
+ case LX_PTRACE_SINGLESTEP:
+ return (ptrace_step(lxpid, pid, lwpid, (int)p4));
+
+ case LX_PTRACE_GETREGS:
+ return (ptrace_getregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETREGS:
+ return (ptrace_setregs(pid, lwpid, p4));
+
+ case LX_PTRACE_GETFPREGS:
+ return (ptrace_getfpregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETFPREGS:
+ return (ptrace_setfpregs(pid, lwpid, p4));
+
+ case LX_PTRACE_ATTACH:
+ return (ptrace_attach(lxpid, pid, lwpid));
+
+ case LX_PTRACE_DETACH:
+ return (ptrace_detach(lxpid, pid, lwpid, (int)p4));
+
+ case LX_PTRACE_GETFPXREGS:
+ return (ptrace_getfpxregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETFPXREGS:
+ return (ptrace_setfpxregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SYSCALL:
+ return (ptrace_syscall(lxpid, pid, lwpid, (int)p4));
+
+ case LX_PTRACE_SETOPTIONS:
+ return (ptrace_setoptions(pid, (int)p4));
+
+ default:
+ return (-EINVAL);
+ }
+}
+
+void
+lx_ptrace_fork(void)
+{
+ /*
+ * Send a special signal (that has no Linux equivalent) to indicate
+ * that we're in this particularly special case. The signal will be
+ * ignored by this process, but noticed by /proc consumers tracing
+ * this process.
+ */
+ (void) _lwp_kill(_lwp_self(), SIGWAITING);
+}
+
+static void
+ptrace_catch_fork(pid_t pid, int monitor)
+{
+ long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)];
+ long *ctlp;
+ sysset_t *sysp;
+ size_t size;
+ pstatus_t ps;
+ pid_t child;
+ int fd, err;
+
+ /*
+ * If any of this fails, we're really sunk since the child
+ * will be stuck in the middle of lx_ptrace_fork().
+ * Fortunately it's practically assured to succeed unless
+ * something is seriously wrong on the system.
+ */
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) {
+ lx_debug("lx_catch_fork: failed to control %d",
+ (int)pid);
+ return;
+ }
+
+ /*
+ * Turn off the /proc PR_PTRACE flag so the parent doesn't get
+ * spurious wake ups while we're working our dark magic. Arrange to
+ * catch the process when it exits from fork, and turn on the /proc
+ * inherit-on-fork flag so we catcht the child as well. We then run
+ * the process, wait for it to stop on the fork1(2) call and reset
+ * the tracing flags to their original state.
+ */
+ ctlp = ctl;
+ *ctlp++ = PCCSIG;
+ if (!monitor) {
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_PTRACE;
+ }
+ *ctlp++ = PCSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */
+ *ctlp++ = PCRUN;
+ *ctlp++ = 0;
+ *ctlp++ = PCWSTOP;
+ if (!monitor) {
+ *ctlp++ = PCSET;
+ *ctlp++ = PR_PTRACE;
+ }
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ if (monitor)
+ praddset(sysp, SYS_execve);
+
+ size = (char *)ctlp - (char *)&ctl[0];
+ assert(size <= sizeof (ctl));
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to set %d running",
+ (int)pid);
+ return;
+ }
+
+ /*
+ * Get the status so we can find the value returned from fork1() --
+ * the child process's pid.
+ */
+ if (get_status(pid, &ps) != 0) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to get status for %d",
+ (int)pid);
+ return;
+ }
+
+ child = (pid_t)ps.pr_lwp.pr_reg[R_R0];
+
+ /*
+ * We're done with the parent -- off you go.
+ */
+ ctl[0] = PCRUN;
+ ctl[1] = 0;
+ size = 2 * sizeof (long);
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to set %d running",
+ (int)pid);
+ return;
+ }
+
+ (void) close(fd);
+
+ /*
+ * If fork1(2) failed, we're done.
+ */
+ if (child < 0) {
+ lx_debug("lx_catch_fork: fork1 failed");
+ return;
+ }
+
+ /*
+ * Now we need to screw with the child process.
+ */
+ if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) {
+ lx_debug("lx_catch_fork: failed to control %d",
+ (int)child);
+ return;
+ }
+
+ ctlp = ctl;
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ size = (char *)ctlp - (char *)&ctl[0];
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to clear trace flags for %d",
+ (int)child);
+ return;
+ }
+
+ /*
+ * Now treat the child as though we had attached to it explicitly.
+ */
+ err = ptrace_attach_common(fd, child, child, 1, 1);
+ assert(err == 0);
+
+ (void) close(fd);
+}
+
+static void
+set_dr6(pid_t pid, siginfo_t *infop)
+{
+ uintptr_t *debugreg;
+ uintptr_t addr;
+ uintptr_t base;
+ size_t size = NULL;
+ int dr7;
+ int lrw;
+ int i;
+
+ if ((debugreg = debug_registers(pid)) == NULL)
+ return;
+
+ debugreg[6] = 0xffff0ff0; /* read as ones */
+ switch (infop->si_code) {
+ case TRAP_TRACE:
+ debugreg[6] |= 0x4000; /* single-step */
+ break;
+ case TRAP_RWATCH:
+ case TRAP_WWATCH:
+ case TRAP_XWATCH:
+ dr7 = debugreg[7];
+ addr = (uintptr_t)infop->si_addr;
+ for (i = 0; i < 4; i++) {
+ if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */
+ continue;
+ lrw = (dr7 >> (16 + (4 * i))) & 0xf;
+ switch (lrw >> 2) { /* length */
+ case 0: size = 1; break;
+ case 1: size = 2; break;
+ case 2: size = 8; break;
+ case 3: size = 4; break;
+ }
+ base = debugreg[i];
+ if (addr >= base && addr < base + size)
+ debugreg[6] |= (1 << i);
+ }
+ /*
+ * Were we also attempting a single-step?
+ * (kludge: we use debugreg[4] for this flag.)
+ */
+ if (debugreg[4])
+ debugreg[6] |= 0x4000;
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * This is called from the emulation of the wait4, waitpid and waitid system
+ * calls to take into account:
+ * - the monitor processes which we spawn to observe other processes from
+ * ptrace_attach().
+ * - the extended si_status result we can get when extended ptrace options
+ * are enabled.
+ */
+int
+lx_ptrace_wait(siginfo_t *infop)
+{
+ ptrace_monitor_map_t *p, **pp;
+ pid_t lxpid, pid = infop->si_pid;
+ lwpid_t lwpid;
+ int fd;
+ pstatus_t status;
+
+ /*
+ * If the process observed by waitid(2) corresponds to the monitor
+ * process for a traced thread, we need to rewhack the siginfo_t to
+ * look like it came from the traced thread with the flags set
+ * according to the current state.
+ */
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
+ if (p->pmm_monitor == pid) {
+ assert(infop->si_code == CLD_EXITED ||
+ infop->si_code == CLD_KILLED ||
+ infop->si_code == CLD_DUMPED ||
+ infop->si_code == CLD_TRAPPED);
+ goto found;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ if (infop->si_code == CLD_TRAPPED) {
+ /*
+ * If the traced process got a SIGWAITING, we must be in the
+ * middle of a clone(2) with CLONE_PTRACE set.
+ */
+ if (infop->si_status == SIGWAITING) {
+ ptrace_catch_fork(pid, 0);
+ return (-1);
+ }
+
+ /*
+ * If the traced process got a SIGTRAP then Linux ptrace
+ * options might have been set, so setup the extended
+ * si_status to contain the (possible) event.
+ */
+ if (infop->si_status == SIGTRAP) {
+ uint_t event;
+
+ if (syscall(SYS_brand, B_PTRACE_EXT_OPTS,
+ B_PTRACE_EXT_OPTS_EVT, pid, &event) == 0)
+ infop->si_status |= event;
+ }
+ }
+
+ if (get_status(pid, &status) == 0 &&
+ (status.pr_lwp.pr_flags & PR_STOPPED) &&
+ status.pr_lwp.pr_why == PR_SIGNALLED &&
+ status.pr_lwp.pr_info.si_signo == SIGTRAP)
+ set_dr6(pid, &status.pr_lwp.pr_info);
+
+ return (0);
+
+found:
+ /*
+ * If the monitor is in the exiting state, ignore the event and free
+ * the monitor structure if the monitor has exited. By returning -1 we
+ * indicate to the caller that this was a spurious return from
+ * waitid(2) and that it should ignore the result and try again.
+ */
+ if (p->pmm_exiting) {
+ if (infop->si_code == CLD_EXITED ||
+ infop->si_code == CLD_KILLED ||
+ infop->si_code == CLD_DUMPED) {
+ *pp = p->pmm_next;
+ (void) mutex_unlock(&ptrace_map_mtx);
+ free(p);
+ }
+ return (-1);
+ }
+
+ lxpid = p->pmm_target;
+ pid = p->pmm_pid;
+ lwpid = p->pmm_lwpid;
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ /*
+ * If we can't find the traced process, kill off its monitor.
+ */
+ if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) {
+ assert(errno == ENOENT);
+ monitor_kill(lxpid, pid);
+ infop->si_code = CLD_EXITED;
+ infop->si_status = 0;
+ infop->si_pid = lxpid;
+ return (0);
+ }
+
+ if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) !=
+ sizeof (status.pr_lwp)) {
+ lx_err("read lwpstatus failed %d %s", fd, strerror(errno));
+ assert(0);
+ }
+
+ (void) close(fd);
+
+ /*
+ * If the traced process isn't stopped, this is a truly spurious
+ * event probably caused by another /proc consumer tracing the
+ * monitor.
+ */
+ if (!(status.pr_lwp.pr_flags & PR_STOPPED)) {
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+ }
+
+ switch (status.pr_lwp.pr_why) {
+ case PR_SIGNALLED:
+ /*
+ * If the traced process got a SIGWAITING, we must be in the
+ * middle of a clone(2) with CLONE_PTRACE set.
+ */
+ if (status.pr_lwp.pr_what == SIGWAITING) {
+ ptrace_catch_fork(lxpid, 1);
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+ }
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = status.pr_lwp.pr_what;
+ if (status.pr_lwp.pr_info.si_signo == SIGTRAP)
+ set_dr6(pid, &status.pr_lwp.pr_info);
+ break;
+
+ case PR_REQUESTED:
+ /*
+ * Make it look like the traced process stopped on an
+ * event of interest.
+ */
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = SIGTRAP;
+ break;
+
+ case PR_JOBCONTROL:
+ /*
+ * Ignore this as it was probably caused by another /proc
+ * consumer tracing the monitor.
+ */
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+
+ case PR_SYSEXIT:
+ /*
+ * Processes traced via a monitor (rather than using the
+ * native Solaris ptrace support) explicitly trace returns
+ * from exec system calls since it's an implicit ptrace
+ * trace point. Accordingly we need to present a process
+ * in that state as though it had reached the ptrace trace
+ * point.
+ */
+ if (status.pr_lwp.pr_what == SYS_execve) {
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = SIGTRAP;
+ break;
+ }
+
+ /*FALLTHROUGH*/
+
+ case PR_SYSENTRY:
+ case PR_FAULTED:
+ case PR_SUSPENDED:
+ default:
+ lx_err("didn't expect %d (%d %d)", status.pr_lwp.pr_why,
+ status.pr_lwp.pr_what, status.pr_lwp.pr_flags);
+ assert(0);
+ }
+
+ infop->si_pid = lxpid;
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/rlimit.c b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c
new file mode 100644
index 0000000000..61f7afe2f0
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c
@@ -0,0 +1,566 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/systm.h>
+#include <sys/sysconfig.h>
+#include <rctl.h>
+#include <limits.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#define LX_RLIMIT_CPU 0
+#define LX_RLIMIT_FSIZE 1
+#define LX_RLIMIT_DATA 2
+#define LX_RLIMIT_STACK 3
+#define LX_RLIMIT_CORE 4
+#define LX_RLIMIT_RSS 5
+#define LX_RLIMIT_NPROC 6
+#define LX_RLIMIT_NOFILE 7
+#define LX_RLIMIT_MEMLOCK 8
+#define LX_RLIMIT_AS 9
+#define LX_RLIMIT_LOCKS 10 /* NA limit on locks, early 2.4 only */
+#define LX_RLIMIT_SIGPENDING 11
+#define LX_RLIMIT_MSGQUEUE 12
+#define LX_RLIMIT_NICE 13 /* NA ceiling for nice */
+#define LX_RLIMIT_RTPRIO 14 /* NA ceiling on the RT priority */
+#define LX_RLIMIT_RTTIME 15 /* NA cpu limit for RT proc. */
+
+#define LX_RLIMIT_NLIMITS 16
+
+typedef struct {
+ ulong_t rlim_cur;
+ ulong_t rlim_max;
+} lx_rlimit_t;
+
+typedef struct {
+ uint64_t rlim_cur;
+ uint64_t rlim_max;
+} lx_rlimit64_t;
+
+/*
+ * Linux supports many of the same resources that we do, but on Illumos some
+ * are rctls. Instead of using rlimit, we use rctls for all of the limits.
+ * This table is used to translate Linux resource limit keys into the Illumos
+ * rctl.
+ */
+static char *l_to_rctl[LX_RLIMIT_NLIMITS] = {
+ "process.max-cpu-time", /* 0 CPU */
+ "process.max-file-size", /* 1 FSIZE */
+ "process.max-data-size", /* 2 DATA */
+ "process.max-stack-size", /* 3 STACK */
+ "process.max-core-size", /* 4 CORE */
+ "zone.max-physical-memory", /* 5 RSS */
+ "zone.max-lwps", /* 6 NPROC */
+ "process.max-file-descriptor", /* 7 NOFILE */
+ "zone.max-locked-memory", /* 8 MEMLOCK */
+ "process.max-address-space", /* 9 AS */
+ NULL, /* 10 LOCKS */
+ "process.max-sigqueue-size", /* 11 SIGPENDING */
+ "process.max-msg-messages", /* 12 MSGQUEUE */
+ NULL, /* 13 NICE */
+ NULL, /* 14 RTPRIO */
+ NULL /* 15 RTTIME */
+};
+
+/*
+ * Magic values Linux uses to indicate infinity
+ */
+#define LX_RLIM_INFINITY_O (0x7fffffffUL)
+#define LX_RLIM_INFINITY_N ULONG_MAX
+#define LX_RLIM64_INFINITY (~0ULL)
+
+#define BIG_INFINITY_O (0x7fffffffLL)
+#define BIG_INFINITY_N ULONG_MAX
+
+/*
+ * Array to store the rlimits that we track but do not enforce.
+ */
+static lx_rlimit64_t fake_limits[LX_RLIMIT_NLIMITS] = {
+ 0, 0, /* 0 CPU */
+ 0, 0, /* 1 FSIZE */
+ 0, 0, /* 2 DATA */
+ 0, 0, /* 3 STACK */
+ 0, 0, /* 4 CORE */
+ 0, 0, /* 5 RSS */
+ 0, 0, /* 6 NPROC */
+ 0, 0, /* 7 NOFILE */
+ 0, 0, /* 8 MEMLOCK */
+ 0, 0, /* 9 AS */
+ LX_RLIM64_INFINITY, LX_RLIM64_INFINITY, /* 10 LOCKS */
+ 0, 0, /* 11 SIGPENDING */
+ 0, 0, /* 12 MSGQUEUE */
+ 20, 20, /* 13 NICE */
+ LX_RLIM64_INFINITY, LX_RLIM64_INFINITY, /* 14 RTPRIO */
+ LX_RLIM64_INFINITY, LX_RLIM64_INFINITY /* 15 RTTIME */
+};
+
+static int
+getrlimit_common(int resource, uint64_t *rlim_curp, uint64_t *rlim_maxp)
+{
+ char *rctl;
+ rctlblk_t *rblk;
+ int64_t cur = -1;
+ boolean_t cur_inf = B_FALSE;
+ int64_t max = -1;
+ boolean_t max_inf = B_FALSE;
+
+ if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
+ return (-EINVAL);
+
+ rctl = l_to_rctl[resource];
+ if (rctl == NULL) {
+ switch (resource) {
+ case LX_RLIMIT_LOCKS:
+ case LX_RLIMIT_NICE:
+ case LX_RLIMIT_RTPRIO:
+ case LX_RLIMIT_RTTIME:
+ *rlim_maxp = fake_limits[resource].rlim_max;
+ *rlim_curp = fake_limits[resource].rlim_cur;
+ return (0);
+ default:
+ lx_unsupported("Unsupported resource type %d\n",
+ resource);
+ return (-ENOTSUP);
+ }
+ }
+
+ /*
+ * The brand library cannot use malloc(3C) so we allocate the space
+ * with SAFE_ALLOCA(). Thus there's no need to free it when we're done.
+ */
+ rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size());
+
+ if (getrctl(rctl, NULL, rblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ do {
+ switch (rctlblk_get_privilege(rblk)) {
+ case RCPRIV_BASIC:
+ cur = rctlblk_get_value(rblk);
+ if (rctlblk_get_local_flags(rblk) &
+ RCTL_LOCAL_MAXIMAL &&
+ rctlblk_get_global_flags(rblk) &
+ RCTL_GLOBAL_INFINITE)
+ cur_inf = B_TRUE;
+ break;
+ case RCPRIV_PRIVILEGED:
+ max = rctlblk_get_value(rblk);
+ if (rctlblk_get_local_flags(rblk) &
+ RCTL_LOCAL_MAXIMAL &&
+ rctlblk_get_global_flags(rblk) &
+ RCTL_GLOBAL_INFINITE)
+ max_inf = B_TRUE;
+ break;
+ }
+ } while (getrctl(rctl, rblk, rblk, RCTL_NEXT) != -1);
+
+ /* Confirm we got values. For many rctls "basic" is not set. */
+ if (max == -1)
+ max = LX_RLIM64_INFINITY;
+ if (cur == -1)
+ cur = max;
+
+ if (resource == LX_RLIMIT_STACK && cur > LX_RLIM_INFINITY_O) {
+ /*
+ * Stunningly, Linux has somehow managed to confuse the concept
+ * of a "limit" with that of a "default" -- and the value of
+ * RLIMIT_STACK is used by NPTL as the _default_ stack size if
+ * it isn't specified. (!!) Even for a system that prides
+ * itself on slapdash castles of junk, this is an amazingly
+ * willful act of incompetence -- and one that is gleefully
+ * confessed in the pthread_create() man page: "if the
+ * RLIMIT_STACK soft resource limit at the time the program
+ * started has any value other than 'unlimited', then it
+ * determines the default stack size of new threads." A
+ * typical stack limit for us is 32TB; if it needs to be said,
+ * setting the default stack size to be 32TB doesn't work so
+ * well! Of course, glibc dropping a deuce in its pants
+ * becomes our problem -- so to prevent smelly accidents we
+ * tell Linux that any stack limit over the old (32-bit) values
+ * for infinity are just infinitely large.
+ */
+ cur_inf = B_TRUE;
+ max_inf = B_TRUE;
+ }
+
+ if (cur_inf)
+ *rlim_curp = LX_RLIM64_INFINITY;
+ else
+ *rlim_curp = cur;
+
+ if (max_inf)
+ *rlim_maxp = LX_RLIM64_INFINITY;
+ else
+ *rlim_maxp = max;
+
+ return (0);
+}
+
+/*
+ * This is the 'new' getrlimit, variously called getrlimit or ugetrlimit
+ * in Linux headers and code. The only difference between this and the old
+ * getrlimit (variously called getrlimit or old_getrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version. Modern code will
+ * use this version by default.
+ */
+long
+lx_getrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ lx_rlimit_t *rlp = (lx_rlimit_t *)p2;
+ int rv;
+ lx_rlimit_t rl;
+ uint64_t rlim_cur, rlim_max;
+
+ rv = getrlimit_common(resource, &rlim_cur, &rlim_max);
+ if (rv != 0)
+ return (rv);
+
+ if (rlim_cur == LX_RLIM64_INFINITY)
+ rl.rlim_cur = LX_RLIM_INFINITY_N;
+ else if (rlim_cur > BIG_INFINITY_N)
+ rl.rlim_cur = LX_RLIM_INFINITY_N;
+ else
+ rl.rlim_cur = (ulong_t)rlim_cur;
+
+ if (rlim_max == LX_RLIM64_INFINITY)
+ rl.rlim_max = LX_RLIM_INFINITY_N;
+ else if (rlim_max > BIG_INFINITY_N)
+ rl.rlim_max = LX_RLIM_INFINITY_N;
+ else
+ rl.rlim_max = (ulong_t)rlim_max;
+
+ if ((uucopy(&rl, rlp, sizeof (rl))) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/*
+ * This is the 'old' getrlimit, variously called getrlimit or old_getrlimit
+ * in Linux headers and code. The only difference between this and the new
+ * getrlimit (variously called getrlimit or ugetrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version.
+ */
+long
+lx_oldgetrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ lx_rlimit_t *rlp = (lx_rlimit_t *)p2;
+ int rv;
+ lx_rlimit_t rl;
+ uint64_t rlim_cur, rlim_max;
+
+ rv = getrlimit_common(resource, &rlim_cur, &rlim_max);
+ if (rv != 0)
+ return (rv);
+
+ if (rlim_cur == LX_RLIM64_INFINITY)
+ rl.rlim_cur = LX_RLIM_INFINITY_O;
+ else if (rlim_cur > BIG_INFINITY_O)
+ rl.rlim_cur = LX_RLIM_INFINITY_O;
+ else
+ rl.rlim_cur = (ulong_t)rlim_cur;
+
+ if (rlim_max == LX_RLIM64_INFINITY)
+ rl.rlim_max = LX_RLIM_INFINITY_O;
+ else if (rlim_max > BIG_INFINITY_O)
+ rl.rlim_max = LX_RLIM_INFINITY_O;
+ else
+ rl.rlim_max = (ulong_t)rlim_max;
+
+ if ((uucopy(&rl, rlp, sizeof (rl))) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static uint64_t
+get_rctl_max(char *rctl)
+{
+ rctlblk_t *rblk;
+ uint64_t inf;
+
+ /*
+ * The brand library cannot use malloc(3C) so we allocate the space
+ * with SAFE_ALLOCA(). Thus there's no need to free it when we're done.
+ */
+ rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size());
+
+ if (getrctl(rctl, NULL, rblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ do {
+ switch (rctlblk_get_privilege(rblk)) {
+ case RCPRIV_BASIC:
+ case RCPRIV_PRIVILEGED:
+ inf = rctlblk_get_value(rblk);
+ if (rctlblk_get_local_flags(rblk) &
+ RCTL_LOCAL_MAXIMAL &&
+ rctlblk_get_global_flags(rblk) &
+ RCTL_GLOBAL_INFINITE)
+ return (inf);
+ break;
+
+ case RCPRIV_SYSTEM:
+ inf = rctlblk_get_value(rblk);
+ return (inf);
+ break;
+ }
+ } while (getrctl(rctl, rblk, rblk, RCTL_NEXT) != -1);
+
+ /* Somehow we have no max, use the Linux infinite value */
+ return (LX_RLIM64_INFINITY);
+}
+
+static int
+set_rctl(char *rctl, uint64_t value, rctl_priv_t priv)
+{
+ rctlblk_t *oblk, *nblk;
+ boolean_t priv_deny = B_FALSE;
+ int priv_sig = 0;
+
+ if (value == LX_RLIM64_INFINITY)
+ value = get_rctl_max(rctl);
+
+ /*
+ * The brand library cannot use malloc(3C) so we allocate the space
+ * with SAFE_ALLOCA(). Thus there's no need to free it when we're done.
+ */
+ oblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size());
+ nblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size());
+
+ if (getrctl(rctl, NULL, oblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ do {
+ if (rctlblk_get_privilege(oblk) == RCPRIV_PRIVILEGED &&
+ rctlblk_get_local_action(oblk, &priv_sig) & RCTL_LOCAL_DENY)
+ priv_deny = B_TRUE;
+
+ if (rctlblk_get_privilege(oblk) != priv)
+ continue;
+
+ /* we're already at this value, nothing to do */
+ if (rctlblk_get_value(oblk) == value)
+ return (0);
+
+ /* non-root cannot raise privileged limit */
+ if (priv == RCPRIV_PRIVILEGED && geteuid() != 0 &&
+ value > rctlblk_get_value(oblk))
+ return (-EPERM);
+
+ bcopy(oblk, nblk, rctlblk_size());
+ rctlblk_set_value(nblk, value);
+ if (setrctl(rctl, oblk, nblk, RCTL_REPLACE) == -1)
+ return (-errno);
+ return (0);
+ } while (getrctl(rctl, oblk, oblk, RCTL_NEXT) != -1);
+
+ /* not there, add it */
+ bzero(nblk, rctlblk_size());
+ rctlblk_set_value(nblk, value);
+ rctlblk_set_privilege(nblk, priv);
+ if (priv_deny) {
+ rctlblk_set_local_action(nblk, RCTL_LOCAL_DENY, 0);
+ } else {
+ rctlblk_set_local_action(nblk, RCTL_LOCAL_SIGNAL, priv_sig);
+ }
+
+ if (setrctl(rctl, NULL, nblk, RCTL_INSERT) == -1)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+setrlimit_common(int resource, uint64_t rlim_cur, uint64_t rlim_max)
+{
+ int rv;
+ char *rctl;
+
+ if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
+ return (-EINVAL);
+
+ rctl = l_to_rctl[resource];
+ if (rctl == NULL) {
+ switch (resource) {
+ case LX_RLIMIT_LOCKS:
+ case LX_RLIMIT_NICE:
+ case LX_RLIMIT_RTPRIO:
+ case LX_RLIMIT_RTTIME:
+ fake_limits[resource].rlim_max = rlim_max;
+ fake_limits[resource].rlim_cur = rlim_cur;
+ return (0);
+ }
+
+ lx_unsupported("Unsupported resource type %d\n", resource);
+ return (-ENOTSUP);
+ }
+
+ /*
+ * If we're emulating the value via a zone rctl, we can't set that
+ * from within the zone. Lie and say we set the value.
+ */
+ if (strncmp(rctl, "zone.", 5) == 0)
+ return (0);
+
+ /*
+ * On Ubuntu at least, the login and sshd processes expect to set this
+ * limit to 16k and login will fail if this fails. On Illumos we have a
+ * system limit of 8k and normally the privileged limit is 512. We
+ * simply pretend this works to allow login to work.
+ */
+ if (strcmp(rctl, "process.max-sigqueue-size") == 0 && rlim_max > 8192)
+ return (0);
+
+ /*
+ * Linux limits the max number of open files to 1m and there is a test
+ * for this.
+ */
+ if (resource == LX_RLIMIT_NOFILE && rlim_max > (1024 * 1024))
+ return (-EPERM);
+
+ if ((rv = set_rctl(rctl, rlim_max, RCPRIV_PRIVILEGED)) != 0)
+ return (rv);
+
+ return (set_rctl(rctl, rlim_cur, RCPRIV_BASIC));
+}
+
+long
+lx_setrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ lx_rlimit_t rl;
+ uint64_t rlim_cur, rlim_max;
+
+ if (uucopy((void *)p2, &rl, sizeof (rl)) != 0)
+ return (-errno);
+
+ if ((rl.rlim_max != LX_RLIM_INFINITY_N &&
+ rl.rlim_cur == LX_RLIM_INFINITY_N) ||
+ rl.rlim_cur > rl.rlim_max)
+ return (-EINVAL);
+
+ if (rl.rlim_cur == LX_RLIM_INFINITY_N)
+ rlim_cur = LX_RLIM64_INFINITY;
+ else
+ rlim_cur = rl.rlim_cur;
+
+ if (rl.rlim_max == LX_RLIM_INFINITY_N)
+ rlim_max = LX_RLIM64_INFINITY;
+ else
+ rlim_max = rl.rlim_max;
+
+ return (setrlimit_common(resource, rlim_cur, rlim_max));
+}
+
+/*
+ * From the man page:
+ * The Linux-specific prlimit() system call combines and extends the
+ * functionality of setrlimit() and getrlimit(). It can be used to both set
+ * and get the resource limits of an arbitrary process.
+ *
+ * If pid is 0, then the call applies to the calling process.
+ */
+long
+lx_prlimit64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ pid_t pid = (pid_t)p1;
+ int resource = (int)p2;
+ lx_rlimit64_t *nrlp = (lx_rlimit64_t *)p3;
+ lx_rlimit64_t *orlp = (lx_rlimit64_t *)p4;
+ int rv = 0;
+ uint64_t rlim_cur, rlim_max;
+ lx_rlimit64_t nrl, orl;
+
+ if (pid != 0) {
+ /* XXX TBD if needed */
+ lx_unsupported("setting prlimit %d for another process\n",
+ resource);
+ return (-ENOTSUP);
+ }
+
+ if (orlp != NULL) {
+ /* we first get the current limits */
+ rv = getrlimit_common(resource, &rlim_cur, &rlim_max);
+ if (rv != 0)
+ return (rv);
+ }
+
+ if (nrlp != NULL) {
+ if (uucopy((void *)p3, &nrl, sizeof (nrl)) != 0)
+ return (-errno);
+
+ if ((nrl.rlim_max != LX_RLIM64_INFINITY &&
+ nrl.rlim_cur == LX_RLIM64_INFINITY) ||
+ nrl.rlim_cur > nrl.rlim_max)
+ return (-EINVAL);
+
+ rv = setrlimit_common(resource, nrl.rlim_cur, nrl.rlim_max);
+ }
+
+ if (rv == 0 && orlp != NULL) {
+ /* now return the original limits, if necessary */
+ orl.rlim_cur = rlim_cur;
+ orl.rlim_max = rlim_max;
+
+ if ((uucopy(&orl, orlp, sizeof (orl))) != 0)
+ rv = -errno;
+ }
+
+ return (rv);
+}
+
+/*
+ * We lucked out here. Linux and Solaris have exactly the same
+ * rusage structures.
+ */
+long
+lx_getrusage(uintptr_t p1, uintptr_t p2)
+{
+ int who = (int)p1;
+ struct rusage *rup = (struct rusage *)p2;
+ int rv, swho;
+
+ if (who == LX_RUSAGE_SELF)
+ swho = RUSAGE_SELF;
+ else if (who == LX_RUSAGE_CHILDREN)
+ swho = RUSAGE_CHILDREN;
+ else if (who == LX_RUSAGE_THREAD)
+ swho = RUSAGE_LWP;
+ else
+ return (-EINVAL);
+
+ rv = getrusage(swho, rup);
+
+ return (rv < 0 ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c
new file mode 100644
index 0000000000..62d7c8d170
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c
@@ -0,0 +1,624 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/cred_impl.h>
+#include <sys/ucred.h>
+#include <ucred.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sched.h>
+#include <strings.h>
+#include <pthread.h>
+#include <time.h>
+#include <thread.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_sched.h>
+
+/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
+static int
+validate_policy(int policy)
+{
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ return (SCHED_FIFO);
+
+ case LX_SCHED_RR:
+ return (SCHED_RR);
+
+ case LX_SCHED_OTHER:
+ return (SCHED_OTHER);
+
+ default:
+ lx_debug("validate_policy: illegal policy: %d", policy);
+ return (-EINVAL);
+ }
+}
+
+/*
+ * Check to see if we have the permissions to set scheduler parameters and
+ * policy, based on Linux' demand that such commands fail with errno set to
+ * EPERM if the current euid is not the euid or ruid of the process in
+ * question.
+ */
+static int
+check_schedperms(pid_t pid)
+{
+ size_t sz;
+ ucred_t *cr;
+ uid_t euid;
+
+ euid = geteuid();
+
+ if (pid == getpid()) {
+ /*
+ * If we're the process to be checked, simply check the euid
+ * against our ruid.
+ */
+ if (euid != getuid())
+ return (-EPERM);
+
+ return (0);
+ }
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ /*
+ * If we can't access the process' credentials, fail with errno EPERM
+ * as the call would not have succeeded anyway.
+ */
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
+ return ((errno == EACCES) ? -EPERM : -errno);
+
+ if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
+ return (-EPERM);
+
+ return (0);
+}
+
+static int
+ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
+ return (-errno);
+
+ bzero(sp, sizeof (struct sched_param));
+
+ /*
+ * Linux has a fixed priority range, 0 - 99, which we need to convert to
+ * Solaris's dynamic range. Linux considers lower numbers to be
+ * higher priority, so we'll invert the priority within Solaris's range.
+ *
+ * The formula to convert between ranges is:
+ *
+ * L * (smax - smin)
+ * S = ----------------- + smin
+ * (lmax - lmin)
+ *
+ * where S is the Solaris equivalent of the linux priority L.
+ *
+ * To invert the priority, we use:
+ * S' = smax - S + smin
+ *
+ * Together, these two formulas become:
+ *
+ * L * (smax - smin)
+ * S = smax - ----------------- + 2smin
+ * 99
+ */
+ sp->sched_priority = smax -
+ ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
+
+ lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
+ "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
+ smin, smax);
+
+ return (0);
+}
+
+static int
+stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (policy == SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ ls.lx_sched_prio = 0;
+ } else {
+ /*
+ * Convert Solaris's dynamic, inverted priority range to the
+ * fixed Linux range of 1 - 99.
+ *
+ * The formula is (see above):
+ *
+ * (smax - s + 2smin) * 99
+ * l = -----------------------
+ * smax - smin
+ */
+ ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
+ LX_PRI_MAX) / (smax - smin);
+ }
+
+ lx_debug("stol_sparam: policy %d: Solaris prio %d = linux prio %d "
+ "(Solaris range %d,%d)\n", policy,
+ sp->sched_priority, ls.lx_sched_prio, smin, smax);
+
+ return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
+ ? -errno : 0);
+}
+
+#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8))
+#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8)))
+
+/* ARGSUSED */
+long
+lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int sz;
+ ulong_t *lmask, *zmask;
+ int i;
+
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
+ if (sz == -1)
+ return (-errno);
+
+ /*
+ * If the target LWP hasn't ever had an affinity mask set, the kernel
+ * will return a mask of all 0's. If that is the case we must build a
+ * default mask that has all valid bits turned on.
+ */
+ lmask = SAFE_ALLOCA(sz);
+ zmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL || zmask == NULL)
+ return (-ENOMEM);
+
+ bzero(zmask, sz);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ if (bcmp(lmask, zmask, sz) != 0)
+ return (sz);
+
+ for (i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) != -1) {
+ lmask[BITINDEX(i)] |= BITSHIFT(i);
+ }
+ }
+
+ if (uucopy(lmask, (void *)maskp, sz) != 0)
+ return (-EFAULT);
+
+ return (sz);
+}
+
+/* ARGSUSED */
+long
+lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int ret;
+ int sz;
+ int i;
+ int found;
+ ulong_t *lmask;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ processorid_t cpuid = NULL;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * We only support setting affinity masks for threads in
+ * the calling process.
+ */
+ if (s_pid != getpid())
+ return (-EPERM);
+
+ /*
+ * First, get the minimum bitmask size from the kernel.
+ */
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
+ if (sz == -1)
+ return (-errno);
+
+ lmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ /*
+ * Make sure the mask contains at least one processor that is
+ * physically on the system. Reduce the user's mask to the set of
+ * physically present CPUs. Keep track of how many valid
+ * bits are set in the user's mask.
+ */
+
+ for (found = 0, i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) == -1) {
+ /*
+ * This CPU doesn't exist, so clear this bit from
+ * the user's mask.
+ */
+ lmask[BITINDEX(i)] &= ~BITSHIFT(i);
+ continue;
+ }
+
+ if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
+ found++;
+ cpuid = i;
+ }
+ }
+
+ if (found == 0) {
+ lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
+ return (-EINVAL);
+ }
+
+ /*
+ * If only one bit is set, bind the thread to that procesor;
+ * otherwise, clear the binding.
+ */
+ if (found == 1) {
+ lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
+ s_tid, cpuid);
+ if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
+ /*
+ * It could be that the requested processor is offline,
+ * so we'll just abandon our good-natured attempt to
+ * bind to it.
+ */
+ lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
+ cpuid, strerror(errno));
+ } else {
+ lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
+ s_tid);
+ if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
+ lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
+ s_tid, strerror(errno));
+ }
+ }
+
+ /*
+ * Finally, ask the kernel to make a note of our current (though fairly
+ * meaningless) affinity mask.
+ */
+ ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+long
+lx_sched_getparam(uintptr_t pid, uintptr_t param)
+{
+ int policy, ret;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we're attempting to get information on our own process, we can
+ * get data on a per-thread basis; if not, punt and use the specified
+ * pid.
+ */
+ if (s_pid == getpid()) {
+ if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
+ return (-ret);
+ } else {
+ if (sched_getparam(s_pid, &sp) == -1)
+ return (-errno);
+
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+ }
+
+ /*
+ * Make sure that any non-SCHED_FIFO non-SCHED_RR scheduler is mapped
+ * onto SCHED_OTHER.
+ */
+ if (policy != SCHED_FIFO && policy != SCHED_RR)
+ policy = SCHED_OTHER;
+
+ return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
+}
+
+long
+lx_sched_setparam(uintptr_t pid, uintptr_t param)
+{
+ int err, policy;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-err);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ lx_debug("sched_setparam(): current policy %d", policy);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
+ &sp)) != 0)
+ return (err);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification
+ *
+ * If we're operating on a process, we can't just call sched_setparam()
+ * because Solaris will allow the call to succeed if the scheduler
+ * parameters do not differ from those being installed, but Linux wants
+ * the call to fail.
+ */
+ if ((err = check_schedperms(s_pid)) != 0)
+ return (err);
+
+ if (s_pid == getpid())
+ return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
+ ? -err : 0);
+
+ return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
+}
+
+long
+lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
+{
+ struct timespec ts;
+ pid_t s_pid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
+ return (-ESRCH);
+
+ if (uucopy((struct timespec *)timespec, &ts,
+ sizeof (struct timespec)) != 0)
+ return (-errno);
+
+ return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
+}
+
+long
+lx_sched_getscheduler(uintptr_t pid)
+{
+ int policy, rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-rv);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ /*
+ * Linux only supports certain policies; avoid confusing apps with
+ * alien policies.
+ */
+ switch (policy) {
+ case SCHED_FIFO:
+ return (LX_SCHED_FIFO);
+ case SCHED_OTHER:
+ return (LX_SCHED_OTHER);
+ case SCHED_RR:
+ return (LX_SCHED_RR);
+ default:
+ break;
+ }
+
+ return (LX_SCHED_OTHER);
+}
+
+long
+lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
+{
+ int rt_pol;
+ int rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if ((rt_pol = validate_policy((int)policy)) < 0)
+ return (rt_pol);
+
+ if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
+ &sp)) != 0)
+ return (rv);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ if (rt_pol == LX_SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if (lp.lx_sched_prio != 0)
+ return (-EINVAL);
+
+ /*
+ * If we're already SCHED_OTHER, there's nothing else to do.
+ */
+ if (lx_sched_getscheduler(pid) == LX_SCHED_OTHER)
+ return (0);
+ }
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification.
+ *
+ * If we're operating on a process, we can't just call
+ * sched_setscheduler() because Solaris will allow the call to succeed
+ * if the scheduler and scheduler parameters do not differ from those
+ * being installed, but Linux wants the call to fail.
+ */
+ if ((rv = check_schedperms(s_pid)) != 0)
+ return (rv);
+
+ if (s_pid == getpid()) {
+ struct sched_param param;
+ int pol;
+
+ if ((pol = sched_getscheduler(s_pid)) == -1)
+ return (-errno);
+
+ /*
+ * sched_setscheduler() returns the previous scheduling policy
+ * on success, so call pthread_getschedparam() to get the
+ * current thread's scheduling policy and return that if the
+ * call to pthread_setschedparam() succeeds.
+ */
+ if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
+ return (-rv);
+
+ return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
+ ? -rv : pol);
+ }
+
+ return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
+ ? -errno : rv);
+}
+
+long
+lx_sched_get_priority_min(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MIN_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MIN_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
+
+long
+lx_sched_get_priority_max(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MAX_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MAX_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sendfile.c b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
new file mode 100644
index 0000000000..0ab0e881ba
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lx_sendfile() and lx_sendfile64() are primarily branded versions of the
+ * library calls available in the Solaris libsendfile (see sendfile(3EXT)).
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#if defined(_ILP32)
+long
+lx_sendfile(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off_t off = 0;
+ off_t *offp = (off_t *)p3;
+ int error;
+ struct sendfilevec sfv;
+ size_t xferred;
+ size_t sz = (size_t)p4;
+
+ if (offp == NULL) {
+ /* If no offp, we must use the current offset */
+ if ((off = lseek((int)p2, 0, SEEK_CUR)) == -1)
+ return (-errno);
+ } else {
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+ }
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV, p1, &sfv,
+ 1, &xferred);
+
+ if (error == 0 && xferred > 0) {
+ if (offp == NULL) {
+ /* If no offp, we must adjust current offset */
+ if (lseek((int)p2, xferred, SEEK_CUR) == -1)
+ return (-errno);
+ } else {
+ off += xferred;
+ error = uucopy(&off, offp, sizeof (off));
+ }
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
+#endif
+
+long
+lx_sendfile64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off64_t off = 0;
+ off64_t *offp = (off64_t *)p3;
+ size_t sz = (size_t)p4;
+ int error;
+ struct sendfilevec64 sfv;
+ size_t xferred;
+
+ if (offp == NULL) {
+ /* If no offp, we must use the current offset */
+ if ((off = lseek((int)p2, 0, SEEK_CUR)) == -1)
+ return (-errno);
+ } else {
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+ }
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ xferred = 0;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV64, p1, &sfv,
+ 1, &xferred);
+
+ if (error == 0 && xferred > 0) {
+ if (offp == NULL) {
+ /* If no offp, we must adjust current offset */
+ if (lseek((int)p2, xferred, SEEK_CUR) == -1)
+ return (-errno);
+ } else {
+ off += xferred;
+ error = uucopy(&off, offp, sizeof (off));
+ }
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c
new file mode 100644
index 0000000000..1ba48cc0ea
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c
@@ -0,0 +1,2326 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/segments.h>
+#include <sys/lx_types.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_poll.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_sigstack.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/syscall.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <rctl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <thread.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <libintl.h>
+#include <ieeefp.h>
+
+#if defined(_ILP32)
+extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
+ const timespec_t *tsp, const sigset_t *sp);
+#else
+static int lx_setcontext(const ucontext_t *ucp);
+#endif
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Illumos and Linux.
+ *
+ * To support user-level signal handlers, the brand uses a double layer of
+ * indirection to process and deliver signals to branded threads.
+ *
+ * When a Linux process sends a signal using the kill(2) system call, we must
+ * translate the signal into the Illumos equivalent before handing control off
+ * to the standard signalling mechanism. When a signal is delivered to a Linux
+ * process, we translate the signal number from Illumos to back to Linux.
+ * Translating signals both at generation and delivery time ensures both that
+ * Illumos signals are sent properly to Linux applications and that signals'
+ * default behavior works as expected.
+ *
+ * In a normal Illumos process, signal delivery is interposed on for any thread
+ * registering a signal handler by libc. Libc needs to do various bits of magic
+ * to provide thread-safe critical regions, so it registers its own handler,
+ * named sigacthandler(), using the sigaction(2) system call. When a signal is
+ * received, sigacthandler() is called, and after some processing, libc turns
+ * around and calls the user's signal handler via a routine named
+ * call_user_handler().
+ *
+ * Adding a Linux branded thread to the mix complicates things somewhat.
+ *
+ * First (for 32-bit code), when a thread receives a signal, it may be running
+ * with a Linux value in the x86 %gs segment register as opposed to the value
+ * Illumos threads expect; if control were passed directly to Illumos code,
+ * such as libc's sigacthandler(), that code would experience a segmentation
+ * fault the first time it tried to dereference a memory location using %gs.
+ *
+ * For 64-bit code the %gs is usually 0 for both native and Linux code and the
+ * thread pointer for both Illumos and Linux libc is referenced off the %fsbase
+ * register, as per the AMD64 ABI. When a thread receives a signal, it may be
+ * running with the Linux value in the x86 %fsbase register as opposed to the
+ * value Illumos libc expects. Switching the %fsbase value is handled in the
+ * kernel module at the same time as we switch the syscall mode flag. We track
+ * the syscall mode flag in the kernel using the per-lwp br_scms integer so we
+ * can save/restore the correct mode at the end of the signal handling. The
+ * flag value is saved/restored in the per-thread br_scms variable which is
+ * used like a stack to push/pop the flag bit as we take signals and return.
+ *
+ * Second, the signal number translation referenced above must take place.
+ * Further, for 32-bit code, as was the case with Illumos libc, before the
+ * Linux signal handler is called, the value of the %gs segment register MUST
+ * be restored to the value Linux code expects.
+ *
+ * This need to translate signal numbers (and manipulate the %gs register)
+ * means that with standard Illumos libc, following a signal from generation to
+ * delivery looks something like:
+ *
+ * kernel ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * user signal handler
+ *
+ * but for the brand's Linux threads, this would look like:
+ *
+ * kernel ->
+ * lx_sigacthandler() ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * lx_call_user_handler() ->
+ * lx_sigdeliver() JMP to
+ * Linux user signal handler
+ *
+ * The new addtions are:
+ *
+ * lx_sigacthandler
+ * ================
+ * This routine is responsible for setting the %gs segment register to the
+ * value 32-bit Illumos code expects (it does nothing in 64-bit code) and
+ * jumping to Illumos' libc signal interposition handler, sigacthandler().
+ *
+ * lx_call_user_handler
+ * ====================
+ * This routine is responsible for translating Illumos signal numbers to
+ * their Linux equivalents, building a Linux signal stack based on the
+ * information Illumos has provided, and passing the stack to the
+ * registered Linux signal handler. It is, in effect, the Linux thread
+ * equivalent to libc's call_user_handler().
+ *
+ * Installing lx_sigacthandler() is a bit tricky, as normally libc's
+ * sigacthandler() routine is hidden from user programs. To facilitate this, a
+ * libc private function is used; setsigacthandler():
+ *
+ * void setsigacthandler(void (*new_handler)(int, siginfo_t *, void *),
+ * void (**old_handler)(int, siginfo_t *, void *)
+ * int (*brsetctxt)(const ucontext_t *))
+ *
+ * The routine works by modifying the per-thread data structure (uberdata) in
+ * libc that keeps track of the address of its own interposition handler with
+ * the address passed in; the old handler's address is returned in the pointer
+ * pointed to by the second argument, if it is non-NULL, mimicking the behavior
+ * of sigaction() itself. In a similar way, this function can also set a
+ * replacement handler for the libc __setcontext call which is made by libc's
+ * setcontext() when returning from a signal handler. Using this we can hook
+ * in to managing the syscall mode flag for 64-bit code when returning to the
+ * interrupted code. Once setsigacthandler() has been executed, all future
+ * branded threads this thread may create will automatically have the proper
+ * interposition handler(s) invoked as the result of a normal sigaction() call.
+ *
+ * Note that none of this interposition is necessary unless a Linux thread
+ * registers a user signal handler, as the default action for all signals is the
+ * same between Illumos and Linux save for one signal, SIGPWR. For this reason,
+ * the brand ALWAYS installs its own internal signal handler for SIGPWR that
+ * translates the action to the Linux default, to terminate the process.
+ * (Illumos' default action is to ignore SIGPWR.)
+ *
+ * It is also important to note that when signals are not translated, the brand
+ * relies upon code interposing upon the wait(2) system call to translate
+ * signals to their proper values for any Linux threads retrieving the status
+ * of others. So while the Illumos signal number for a particular signal is set
+ * in a process' data structures (and would be returned as the result of say,
+ * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
+ * translating the value WTERMSIG() would return from a Illumos signal number
+ * to the appropriate Linux value.
+ *
+ * lx_call_user_handler() calls lx_sigdeliver with a helper function (typically
+ * lx_build_signal_frame) which builds a stack frame for the 32-bit Linux
+ * signal handler, or populates a local (on the stack) structure for the 64-bit
+ * Linux signal handler, then jmp's into the handler. The stack at that time
+ * looks like this:
+ *
+ * =================================================
+ * | | LX_SIGRT_MAGIC |
+ * | =================================================
+ * | | Linux signal frame (32-bit) or local data |
+ * V | (64-bit) built by stack_builder() |
+ * =================================================
+ *
+ * The process of returning to an interrupted thread of execution from a user
+ * signal handler is entirely different between Illumos and Linux. While
+ * Illumos generally expects to set the context to the interrupted one on a
+ * normal return from a signal handler, in the normal case Linux instead calls
+ * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
+ * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler
+ * completes execution, instead of returning through what would in libc be a
+ * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
+ * for accomplishing much the same thing. It's for this reason that the stack
+ * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
+ * stack.
+ *
+ * The lx_rt_sigreturn() function will handle the syscall, do its cleanup,
+ * then return to the libc signal handling code (call_user_handler) so that
+ * libc can use setcontext() to get back to the point where things were
+ * interrupted. However, for the 64-bit case, due to the syscall mode switching,
+ * we cannot simply let the libc setcontext() take us back because we may also
+ * have to switch the syscall mode back to Linux (it depends on where we were
+ * when we took the signal). For the 64-bit case we used setsigacthandler()
+ * to setup a libc replacement function on __setcontext(). This is the
+ * lx_setcontext() function. This function uses a brand call (B_SIGNAL_RETURN)
+ * which combines the syscall mode switching and setcontext handling in the lx
+ * kernel module.
+ *
+ * An additional oddity in the signal return code is that in the stack builder
+ * function we push some x86 code onto the bottom of the stack that looks like
+ * it invokes the Linux (rt)_sigreturn syscall. This is needed by gdb to
+ * tell that it's in a signal handler. The code looks like this:
+ *
+ * 32-bit 64-bit
+ * -------------------------------- -----------------------------
+ * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax
+ * int $0x80 syscall
+ *
+ * We also use these same functions (lx_rt_sigreturn_tramp or
+ * lx_sigreturn_tramp) to actually return from the signal handler.
+ *
+ * (Note that this trampoline code actually lives in a proper executable segment
+ * and not on the stack, but gdb checks for the exact code sequence of the
+ * trampoline code on the stack to determine whether it is in a signal stack
+ * frame or not. Really.)
+ *
+ * When the 32-bit Linux user signal handler is eventually called, the stack
+ * frame looks like this (in the case of a "modern" signal stack; see the
+ * lx_sigstack structure definition):
+ *
+ * =========================================================
+ * | | Trampoline code (marker for gdb, not really executed) |
+ * | =========================================================
+ * | | Linux struct _fpstate |
+ * | =========================================================
+ * V | Linux ucontext_t | <--+
+ * ========================================================= |
+ * | Linux siginfo_t | <--|-----+
+ * ========================================================= | |
+ * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ |
+ * ========================================================= |
+ * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+
+ * =========================================================
+ * | Linux signal number (sigaction arg0)|
+ * =========================================================
+ * | Pointer to signal return code (trampoline code) |
+ * =========================================================
+ *
+ * The 64-bit stack-local data looks like this:
+ *
+ * =========================================================
+ * | | Trampoline code (marker for gdb, not really executed) |
+ * | =========================================================
+ * | | Linux struct _fpstate |
+ * | =========================================================
+ * V | Linux ucontext_t | %rdx arg2
+ * =========================================================
+ * | Linux siginfo_t | %rsi arg1
+ * =========================================================
+ * | Pointer to signal return code (trampoline code) |
+ * =========================================================
+ *
+ * As usual in 64-bit code, %rdi is arg0 which is the signal number.
+ *
+ * As mentioned above, the brand intercepts the Linux (rt_)sigreturn(2) system
+ * call. This turns into some stack cleanup and a call to lx_sigreturn_tolibc()
+ * which returns through the libc call stack that Illumos expects, with the
+ * caveat that 64-bit code combines the __setcontext and syscall mode switch
+ * via a brand call. This returns the thread executing the code back to the
+ * location originally interrupted by receipt of the signal.
+ */
+
+
+struct lx_oldsigstack {
+ void (*retaddr)(); /* address of real lx_sigreturn code */
+ int sig; /* signal number */
+ lx_sigcontext_t sigc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */
+ char trampoline[8]; /* code for trampoline to lx_sigreturn() */
+};
+
+/*
+ * libc_sigacthandler is set to the address of the libc signal interposition
+ * routine, sigacthandler().
+ */
+void (*libc_sigacthandler)(int, siginfo_t *, void*);
+
+/*
+ * The lx_sighandlers structure needs to be a global due to the semantics of
+ * clone().
+ *
+ * If CLONE_SIGHAND is set, the calling process and child share signal
+ * handlers, and if either calls sigaction(2) it should change the behavior
+ * in the other thread. Each thread does, however, have its own signal mask
+ * and set of pending signals.
+ *
+ * If CLONE_SIGHAND is not set, the child process should inherit a copy of
+ * the signal handlers at the time of the clone() but later calls to
+ * sigaction(2) should only affect the individual thread calling it.
+ *
+ * This maps perfectly to a thr_create(3C) thread semantic in the first
+ * case and a fork(2)-type semantic in the second case. By making
+ * lx_sighandlers global, we automatically get the correct behavior.
+ */
+static lx_sighandlers_t lx_sighandlers;
+
+/*
+ * Cache result of process.max-file-descriptor to avoid calling getrctl()
+ * for each lx_ppoll().
+ */
+static rlim_t maxfd = 0;
+
+/*
+ * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
+ * structures.
+ *
+ * These routines are needed because although the two structures have the same
+ * contents, their contents are declared in a different order, so the content
+ * of the structures cannot be copied with a simple bcopy().
+ */
+static void
+stol_stack(stack_t *fr, lx_stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static void
+ltos_stack(lx_stack_t *fr, stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static int
+ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
+{
+ lx_sigset_t l;
+ int lx_sig, sig;
+
+ if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
+ if (lx_sigismember(&l, lx_sig) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+ }
+
+ return (0);
+}
+
+static int
+stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
+{
+ lx_sigset_t l;
+ int sig, lx_sig;
+
+ bzero(&l, sizeof (lx_sigset_t));
+
+ for (sig = 1; sig < NSIG; sig++) {
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0))
+ lx_sigaddset(&l, lx_sig);
+ }
+
+ return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
+ ? -errno : 0);
+}
+
+#if defined(_ILP32)
+static int
+ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
+{
+ lx_osigset_t lo;
+ int lx_sig, sig;
+
+ if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
+ if ((lo & OSIGSET_BITSET(lx_sig)) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+
+ return (0);
+}
+
+static int
+stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
+{
+ lx_osigset_t lo = 0;
+ int lx_sig, sig;
+
+ /*
+ * Note that an lx_osigset_t can only represent the signals from
+ * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
+ * Illumos sigset_t, it may not be representable as a bit in the
+ * lx_osigset_t.
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0) &&
+ (lx_sig <= OSIGSET_NBITS))
+ lo |= OSIGSET_BITSET(lx_sig);
+
+ return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
+ ? -errno : 0);
+}
+#endif
+
+static int
+stol_sigcode(int si_code)
+{
+ switch (si_code) {
+ case SI_USER:
+ return (LX_SI_USER);
+ case SI_LWP:
+ return (LX_SI_TKILL);
+ case SI_QUEUE:
+ return (LX_SI_QUEUE);
+ case SI_TIMER:
+ return (LX_SI_TIMER);
+ case SI_ASYNCIO:
+ return (LX_SI_ASYNCIO);
+ case SI_MESGQ:
+ return (LX_SI_MESGQ);
+ default:
+ return (si_code);
+ }
+}
+
+static int
+ltos_sigcode(int si_code)
+{
+ switch (si_code) {
+ case LX_SI_USER:
+ return (SI_USER);
+ case LX_SI_TKILL:
+ return (SI_LWP);
+ case LX_SI_QUEUE:
+ return (SI_QUEUE);
+ case LX_SI_TIMER:
+ return (SI_TIMER);
+ case LX_SI_ASYNCIO:
+ return (SI_ASYNCIO);
+ case LX_SI_MESGQ:
+ return (SI_MESGQ);
+ default:
+ return (LX_SI_CODE_NOT_EXIST);
+ }
+}
+
+int
+stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
+{
+ int ret = 0;
+ lx_siginfo_t lx_siginfo;
+
+ bzero(&lx_siginfo, sizeof (*lx_siginfop));
+
+ if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
+ /*
+ * Depending on the caller we may still need to get a usable
+ * converted siginfo struct.
+ */
+ lx_siginfo.lsi_signo = LX_SIGKILL;
+ errno = EINVAL;
+ ret = -1;
+ }
+
+ lx_siginfo.lsi_code = stol_sigcode(siginfop->si_code);
+ lx_siginfo.lsi_errno = siginfop->si_errno;
+
+ switch (lx_siginfo.lsi_signo) {
+ /*
+ * Semantics ARE defined for SIGKILL, but since
+ * we can't catch it, we can't translate it. :-(
+ */
+ case LX_SIGPOLL:
+ lx_siginfo.lsi_band = siginfop->si_band;
+ lx_siginfo.lsi_fd = siginfop->si_fd;
+ break;
+
+ case LX_SIGCHLD:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ lx_siginfo.lsi_status = siginfop->si_status;
+ lx_siginfo.lsi_utime = siginfop->si_utime;
+ lx_siginfo.lsi_stime = siginfop->si_stime;
+ break;
+
+ case LX_SIGILL:
+ case LX_SIGBUS:
+ case LX_SIGFPE:
+ case LX_SIGSEGV:
+ lx_siginfo.lsi_addr = siginfop->si_addr;
+ break;
+
+ default:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ lx_siginfo.lsi_uid =
+ LX_UID32_TO_UID16(siginfop->si_uid);
+ break;
+ }
+
+ if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
+ return (-errno);
+ return ((ret != 0) ? -errno : 0);
+}
+
+static void
+stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
+{
+ size_t copy_len;
+
+#if defined(_LP64)
+ /*
+ * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
+ * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
+ * for __amd64's struct fpu).
+ */
+ copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
+ bcopy(fpr, lfpr, copy_len);
+
+#else /* is _ILP32 */
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+
+ /*
+ * The Illumos struct _fpstate and lx_fpstate_t are identical from the
+ * beginning of the structure to the lx_fpstate_t "magic" field, so
+ * just bcopy() those entries.
+ */
+ copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
+ bcopy(fpsp, lfpr, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ lfpr->cw &= 0xffff; /* x87 control word */
+ lfpr->tag &= 0xffff; /* x87 tag word */
+ lfpr->cssel &= 0xffff; /* cs selector */
+ lfpr->datasel &= 0xffff; /* ds selector */
+
+ /*
+ * Linux wants the x87 status word field to contain the value of the
+ * x87 saved exception status word.
+ */
+ lfpr->sw = lfpr->status & 0xffff; /* x87 status word */
+
+ lfpr->mxcsr = fpsp->mxcsr;
+
+ if (fpsp->mxcsr != 0) {
+ /*
+ * Linux uses the "magic" field to denote whether the XMM
+ * registers contain legal data or not. Since we can't get to
+ * %cr4 from userland to check the status of the OSFXSR bit,
+ * check the mxcsr field to see if it's 0, which it should
+ * never be on a system with the OXFXSR bit enabled.
+ */
+ lfpr->magic = LX_X86_FXSR_MAGIC;
+ bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
+ } else {
+ lfpr->magic = LX_X86_FXSR_NONE;
+ }
+#endif
+}
+
+static void
+ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
+{
+ size_t copy_len;
+
+#if defined(_LP64)
+ /*
+ * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
+ * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
+ * for __amd64's struct fpu).
+ */
+ copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
+ bcopy(lfpr, fpr, copy_len);
+
+#else /* is _ILP32 */
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+
+ /*
+ * The lx_fpstate_t and Illumos struct _fpstate are identical from the
+ * beginning of the structure to the struct _fpstate "mxcsr" field, so
+ * just bcopy() those entries.
+ *
+ * Note that we do NOT have to propogate changes the user may have made
+ * to the "status" word back to the "sw" word, unlike the way we have
+ * to deal with processing the ESP and UESP register values on return
+ * from a signal handler.
+ */
+ copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
+ bcopy(lfpr, fpsp, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ fpsp->cw &= 0xffff; /* x87 control word */
+ fpsp->sw &= 0xffff; /* x87 status word */
+ fpsp->tag &= 0xffff; /* x87 tag word */
+ fpsp->cssel &= 0xffff; /* cs selector */
+ fpsp->datasel &= 0xffff; /* ds selector */
+ fpsp->status &= 0xffff; /* saved status */
+
+ fpsp->mxcsr = lfpr->mxcsr;
+
+ if (lfpr->magic == LX_X86_FXSR_MAGIC)
+ bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
+#endif
+}
+
+/*
+ * The brand needs a lx version of this because the format of the lx stack_t
+ * differs from the Illumos stack_t not really in content but in ORDER,
+ * so we can't simply pass pointers and expect things to work (sigh...)
+ */
+long
+lx_sigaltstack(uintptr_t nsp, uintptr_t osp)
+{
+ lx_stack_t ls;
+ stack_t newsstack, oldsstack;
+ stack_t *nssp = (nsp ? &newsstack : NULL);
+ stack_t *ossp = (osp ? &oldsstack : NULL);
+
+ if (nsp) {
+ if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0)
+ return (-errno);
+
+ if ((ls.ss_flags & LX_SS_DISABLE) == 0 &&
+ ls.ss_size < LX_MINSIGSTKSZ)
+ return (-ENOMEM);
+
+ newsstack.ss_sp = (int *)ls.ss_sp;
+ newsstack.ss_size = (long)ls.ss_size;
+ newsstack.ss_flags = ls.ss_flags;
+ }
+
+ if (sigaltstack(nssp, ossp) != 0)
+ return (-errno);
+
+ if (osp) {
+ ls.ss_sp = (void *)oldsstack.ss_sp;
+ ls.ss_size = (size_t)oldsstack.ss_size;
+ ls.ss_flags = oldsstack.ss_flags;
+
+ if (uucopy(&ls, (void *)osp, sizeof (lx_stack_t)) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * The following routines are needed because sigset_ts and siginfo_ts are
+ * different in format between Linux and Illumos.
+ *
+ * Note that there are two different lx_sigset structures, lx_sigset_ts and
+ * lx_osigset_ts:
+ *
+ * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
+ * more than 32 signals.
+ *
+ * + An lx_osigset_t is simply a uint32_t, so it by definition only supports
+ * 32 signals.
+ *
+ * When there are two versions of a routine, one prefixed with lx_rt_ and
+ * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
+ * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately,
+ * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
+ */
+long
+lx_sigpending(uintptr_t sigpend)
+{
+ sigset_t sigpendset;
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
+}
+#endif
+
+long
+lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
+{
+ sigset_t sigpendset;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
+}
+
+/*
+ * Create a common routine to encapsulate all of the sigprocmask code,
+ * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
+ * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
+ * the setting of the "sigset_type" flag.
+ */
+static int
+lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
+ uintptr_t sigset_type)
+{
+ int err = 0;
+ sigset_t set, oset;
+ sigset_t *s_setp = NULL;
+ sigset_t *s_osetp;
+
+ if (l_setp) {
+ switch (how) {
+ case LX_SIG_BLOCK:
+ how = SIG_BLOCK;
+ break;
+
+ case LX_SIG_UNBLOCK:
+ how = SIG_UNBLOCK;
+ break;
+
+ case LX_SIG_SETMASK:
+ how = SIG_SETMASK;
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ s_setp = &set;
+
+ /* Only 32-bit code passes other than USE_SIGSET */
+ if (sigset_type == USE_SIGSET)
+ err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
+#if defined(_ILP32)
+ else
+ err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
+#endif
+
+ if (err != 0)
+ return (err);
+ }
+
+ s_osetp = (l_osetp ? &oset : NULL);
+
+ /*
+ * In a multithreaded environment, a call to sigprocmask(2) should
+ * only affect the current thread's signal mask so we don't need to
+ * explicitly call thr_sigsetmask(3C) here.
+ */
+ if (sigprocmask(how, s_setp, s_osetp) != 0)
+ return (-errno);
+
+ if (l_osetp) {
+ if (sigset_type == USE_SIGSET)
+ err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
+#if defined(_ILP32)
+ else
+ err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
+#endif
+
+ if (err != 0) {
+ /*
+ * Encountered a fault while writing to the old signal
+ * mask buffer, so unwind the signal mask change made
+ * above.
+ */
+ (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
+ return (err);
+ }
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+long
+lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
+{
+ return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
+}
+#endif
+
+long
+lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
+ uintptr_t setsize)
+{
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
+}
+
+#if defined(_ILP32)
+long
+lx_sigsuspend(uintptr_t set)
+{
+ sigset_t s_set;
+
+ if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+#endif
+
+long
+lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
+{
+ sigset_t s_set;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+
+long
+lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (stol_signo[rc]);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0)
+ ? -errno : stol_signo[rc]);
+}
+
+long
+lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
+ uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ /*
+ * "If timeout is the NULL pointer, the behavior is unspecified."
+ * Match what LTP expects.
+ */
+ if ((rc = sigtimedwait(&s_set, s_sinfop,
+ (struct timespec *)toutp)) == -1)
+ return (toutp == NULL ? -EINTR : -errno);
+
+ if (s_sinfop == NULL)
+ return (stol_signo[rc]);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0)
+ ? -errno : stol_signo[rc]);
+}
+
+#if defined(_ILP32)
+/*
+ * Intercept the Linux sigreturn() syscall to turn it into the return through
+ * the libc call stack that Illumos expects.
+ *
+ * When control returns to libc's call_user_handler() routine, a setcontext(2)
+ * will be done that returns thread execution to the point originally
+ * interrupted by receipt of the signal.
+ *
+ * This is only used by 32-bit code.
+ */
+long
+lx_sigreturn(void)
+{
+ struct lx_oldsigstack *lx_ossp;
+ lx_sigset_t lx_sigset;
+ lx_regs_t *rp;
+ ucontext_t *ucp;
+ uintptr_t sp;
+
+ rp = lx_syscall_regs();
+
+ /*
+ * NOTE: The sp saved in the context is eight bytes off of where we
+ * need it to be (either due to trampoline or the copying of
+ * sp = uesp, not clear which).
+ */
+ sp = (uintptr_t)rp->lxr_esp - 8;
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_oldsigstack that lx_build_old_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ossp = (struct lx_oldsigstack *)sp;
+ sp += sizeof (struct lx_oldsigstack);
+
+ /*
+ * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it
+ * creates the struct lx_oldsigstack.
+ *
+ * If we don't find it here, the stack's been corrupted and we need to
+ * kill ourselves.
+ */
+ if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
+ lx_err_fatal("sp @ 0x%p, expected 0x%x, found 0x%x!",
+ sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
+
+ sp += sizeof (uint32_t);
+
+ /*
+ * For signal mask handling to be done properly, this call needs to
+ * return to the libc routine that originally called the signal handler
+ * rather than directly set the context back to the place the signal
+ * interrupted execution as the original Linux code would do.
+ *
+ * Here *sp points to the Illumos ucontext_t, so we need to copy
+ * machine registers the Linux signal handler may have modified
+ * back to the Illumos version.
+ */
+ ucp = (ucontext_t *)(*(ssize_t *)sp);
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * changes made to uc_mcontext.gregs[ESP] will be reflected when the
+ * interrupted thread resumes execution after the signal handler. To
+ * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
+ * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
+ * value to ESP.
+ */
+ lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
+ bcopy(&lx_ossp->sigc, &ucp->uc_mcontext, sizeof (gregset_t));
+
+ /* copy back FP regs if present */
+ if (lx_ossp->sigc.sc_fpstate != NULL)
+ ltos_fpstate(&lx_ossp->fpstate, &ucp->uc_mcontext.fpregs);
+
+ /* convert Linux signal mask back to its Illumos equivalent */
+ bzero(&lx_sigset, sizeof (lx_sigset_t));
+ lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
+ lx_sigset.__bits[1] = lx_ossp->sig_extra;
+ (void) ltos_sigset(&lx_sigset, &ucp->uc_sigmask);
+
+ /*
+ * At this point sp contains the value of the stack pointer when
+ * lx_call_user_handler() was called.
+ *
+ * Pop one more value off the stack and pass the new sp to
+ * lx_sigreturn_tolibc(), which will in turn manipulate the x86
+ * registers to make it appear to libc's call_user_handler() as if the
+ * handler it had called returned.
+ */
+ sp += sizeof (uint32_t);
+ lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
+ lx_sigreturn_tolibc(sp);
+
+ /*NOTREACHED*/
+ return (0);
+}
+#endif
+
+/*
+ * This signal return syscall is used by both 32-bit and 64-bit code.
+ */
+long
+lx_rt_sigreturn(void)
+{
+ struct lx_sigstack *lx_ssp;
+ lx_regs_t *rp;
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp;
+ uintptr_t sp;
+
+ /* Get the registers at the emulated Linux rt_sigreturn syscall */
+ rp = lx_syscall_regs();
+
+#if defined(_ILP32)
+ /*
+ * For 32-bit
+ *
+ * NOTE: Because of the silly compatibility measures done in the
+ * signal trampoline code to make sure the stack holds the
+ * _exact same_ instruction sequence Linux does, we have to
+ * manually "pop" some extra instructions off the stack here
+ * before passing the stack address to the syscall because the
+ * trampoline code isn't allowed to do it due to the gdb
+ * compatability issues.
+ *
+ * No, I'm not kidding.
+ *
+ * The sp saved in the context is eight bytes off of where we
+ * need it to be (either due to trampoline or the copying of
+ * sp = uesp, not clear which but looks like the uesp case), so
+ * the need to pop the extra four byte instruction means we need
+ * to subtract a net four bytes from the sp before "popping" the
+ * struct lx_sigstack off the stack.
+ *
+ * This will yield the value the stack pointer had before
+ * lx_sigdeliver() created the stack frame for the Linux signal
+ * handler.
+ */
+ sp = (uintptr_t)rp->lxr_esp - 4;
+#else
+ /*
+ * We need to make an adjustment for 64-bit code as well. Since 64-bit
+ * does not use the trampoline, it's probably for the same reason as
+ * alluded to above.
+ */
+ sp = (uintptr_t)rp->lxr_rsp - 8;
+#endif
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_sigstack that lx_build_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ssp = (struct lx_sigstack *)sp;
+ sp += sizeof (struct lx_sigstack);
+
+ /*
+ * We handle 32 vs. 64 bit differently here, but first, lx_sigdeliver()
+ * pushed LX_SIGRT_MAGIC on the stack before it created the
+ * struct lx_sigstack (and possibly struct lx_fpstate_t).
+ *
+ * If we don't find LX_SIGRT_MAGIC here, the stack's been corrupted and
+ * we need to kill ourselves.
+ *
+ * Check for and remove LX_SIGRT_MAGIC from the stack.
+ */
+#if defined(_LP64)
+ /* account for extra word used in lx_sigdeliver for stack alignment */
+ sp += 8;
+
+ if (*(uint64_t *)sp != LX_SIGRT_MAGIC)
+ lx_err_fatal("sp @ 0x%p, expected 0x%x, found 0x%x!",
+ sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
+ sp += sizeof (uint64_t);
+
+ /*
+ * Now *(sp + 24) points to the Illumos ucontext_t (working backwards
+ * through the Linux signal hander, the stack builder, and the stack
+ * size) which we saved on the stack in the lx_sigdeliver assembly
+ * prologue before we pushed LX_SIGRT_MAGIC, so we need to copy machine
+ * registers the Linux signal handler may have modified back to the
+ * Illumos version.
+ */
+ ucp = (ucontext_t *)(*(ssize_t *)(sp + 24));
+
+ lx_ucp = &lx_ssp->uc;
+
+ /*
+ * General register layout is completely different.
+ */
+ ucp->uc_mcontext.gregs[REG_R15] = lx_ucp->uc_sigcontext.sc_r15;
+ ucp->uc_mcontext.gregs[REG_R14] = lx_ucp->uc_sigcontext.sc_r14;
+ ucp->uc_mcontext.gregs[REG_R13] = lx_ucp->uc_sigcontext.sc_r13;
+ ucp->uc_mcontext.gregs[REG_R12] = lx_ucp->uc_sigcontext.sc_r12;
+ ucp->uc_mcontext.gregs[REG_R11] = lx_ucp->uc_sigcontext.sc_r11;
+ ucp->uc_mcontext.gregs[REG_R10] = lx_ucp->uc_sigcontext.sc_r10;
+ ucp->uc_mcontext.gregs[REG_R9] = lx_ucp->uc_sigcontext.sc_r9;
+ ucp->uc_mcontext.gregs[REG_R8] = lx_ucp->uc_sigcontext.sc_r8;
+ ucp->uc_mcontext.gregs[REG_RDI] = lx_ucp->uc_sigcontext.sc_rdi;
+ ucp->uc_mcontext.gregs[REG_RSI] = lx_ucp->uc_sigcontext.sc_rsi;
+ ucp->uc_mcontext.gregs[REG_RBP] = lx_ucp->uc_sigcontext.sc_rbp;
+ ucp->uc_mcontext.gregs[REG_RBX] = lx_ucp->uc_sigcontext.sc_rbx;
+ ucp->uc_mcontext.gregs[REG_RDX] = lx_ucp->uc_sigcontext.sc_rdx;
+ ucp->uc_mcontext.gregs[REG_RCX] = lx_ucp->uc_sigcontext.sc_rcx;
+ ucp->uc_mcontext.gregs[REG_RAX] = lx_ucp->uc_sigcontext.sc_rax;
+ ucp->uc_mcontext.gregs[REG_TRAPNO] = lx_ucp->uc_sigcontext.sc_trapno;
+ ucp->uc_mcontext.gregs[REG_ERR] = lx_ucp->uc_sigcontext.sc_err;
+ ucp->uc_mcontext.gregs[REG_RIP] = lx_ucp->uc_sigcontext.sc_rip;
+ ucp->uc_mcontext.gregs[REG_CS] = lx_ucp->uc_sigcontext.sc_cs;
+ ucp->uc_mcontext.gregs[REG_RFL] = lx_ucp->uc_sigcontext.sc_eflags;
+ ucp->uc_mcontext.gregs[REG_RSP] = lx_ucp->uc_sigcontext.sc_rsp;
+ ucp->uc_mcontext.gregs[REG_SS] = lx_ucp->uc_sigcontext.sc_pad0;
+ ucp->uc_mcontext.gregs[REG_FS] = lx_ucp->uc_sigcontext.sc_fs;
+ ucp->uc_mcontext.gregs[REG_GS] = lx_ucp->uc_sigcontext.sc_gs;
+
+#else /* is _ILP32 */
+ if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
+ lx_err_fatal("sp @ 0x%p, expected 0x%x, found 0x%x!",
+ sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
+ sp += sizeof (uint32_t);
+
+ /*
+ * Here *sp points to the Illumos ucontext_t which was saved on stack
+ * right before we pushed LX_SIGRT_MAGIC in the 32-bit lx_sigdeliver
+ * assembly code. We need to copy machine registers the Linux signal
+ * handler may have modified back to the Illumos version.
+ */
+ ucp = (ucontext_t *)(*(ssize_t *)sp);
+
+ lx_ucp = lx_ssp->ucp;
+
+ /*
+ * Illumos and Linux both follow the SysV i386 ABI layout for the
+ * mcontext.
+ *
+ * General registers copy across as-is, except Linux expects that
+ * changes made to uc_mcontext.gregs[ESP] will be reflected when the
+ * interrupted thread resumes execution after the signal handler. To
+ * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
+ * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
+ * to ESP.
+ */
+ lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
+
+ bcopy(&lx_ucp->uc_sigcontext, &ucp->uc_mcontext.gregs,
+ sizeof (gregset_t));
+#endif
+
+ if (lx_ucp->uc_sigcontext.sc_fpstate != NULL)
+ ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
+ &ucp->uc_mcontext.fpregs);
+
+ /*
+ * Convert the Linux signal mask and stack back to their
+ * Illumos equivalents.
+ */
+ (void) ltos_sigset(&lx_ucp->uc_sigmask, &ucp->uc_sigmask);
+ ltos_stack(&lx_ucp->uc_stack, &ucp->uc_stack);
+
+ /*
+ * For signal mask handling to be done properly, this function must
+ * return to the libc call_user_handler() routine that originally
+ * called the signal handler, rather than directly set the context back
+ * to the place the signal interrupted execution, as the original Linux
+ * code would do.
+ *
+ * For the 64-bit case we can't simply let call_user_handler() invoke
+ * __setcontext() since we need to also manage the syscall mode. Thus
+ * we use the lx_setcontext callback hook into libc to manage this via
+ * a brand call which combines the setcontext with setting the mode
+ * switch.
+ */
+#if defined(_LP64)
+ /*
+ * At this point sp points to the end of the stack frame we constructed
+ * on entry to lx_sigdeliver. Pop this frame off the stack.
+ */
+ sp += 0x30;
+
+#else
+ /*
+ * At this point sp points to the ucontext_t pointer we pushed on the
+ * stack right before we pushed LX_SIGRT_MAGIC in lx_sigdeliver. Pop
+ * this value off the stack.
+ */
+ sp += sizeof (uint32_t);
+#endif
+
+ /*
+ * At this point sp points to the base frame we had on entry to
+ * lx_sigdeliver (%ebp/%rbp at TOS, return address next).
+ *
+ * Pass the new sp to lx_sigreturn_tolibc(), which will in turn
+ * manipulate the x86 registers to make it appear that
+ * lx_call_user_handler() has returned. This will then take us directly
+ * back to libc's call_user_handler().
+ */
+ lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
+ lx_sigreturn_tolibc(sp);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+#if defined(_LP64)
+static int
+lx_setcontext(const ucontext_t *ucp)
+{
+ extern int lx_traceflag;
+
+ /*
+ * Since we don't return via lx_emulate, issue a trace msg here if
+ * necessary. We know this is only called in the 64-bit rt_sigreturn
+ * code path to the syscall number is 15.
+ */
+ if (lx_traceflag != 0) {
+ (void) syscall(SYS_brand, B_SYSRETURN, 15, 0);
+ }
+ return (syscall(SYS_brand, B_SIGNAL_RETURN, ucp));
+}
+#endif
+
+
+#if defined(_ILP32)
+/*
+ * Build signal frame for processing for "old" (legacy) Linux signals
+ * This stack-builder function is only used by 32-bit code.
+ */
+static void
+lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
+{
+ extern void lx_sigreturn_tramp();
+
+ lx_sigset_t lx_sigset;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigaction *lxsap;
+ struct lx_oldsigstack *lx_ossp = sp;
+
+ lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ossp->sig = lx_sig;
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ lx_ossp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
+ } else {
+ lx_ossp->retaddr = lx_sigreturn_tramp;
+ lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
+ }
+
+ lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
+
+ /* convert Illumos signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
+ lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
+ lx_ossp->sig_extra = lx_sigset.__bits[1];
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
+ lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
+
+ /*
+ * cr2 contains the faulting address, and Linux only sets cr2 for a
+ * a segmentation fault.
+ */
+ lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0);
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
+ lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
+ } else {
+ lx_ossp->sigc.sc_fpstate = NULL;
+ }
+
+ /*
+ * Believe it or not, gdb wants to SEE the trampoline code on the
+ * bottom of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is no longer actually
+ * called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
+ sizeof (lx_ossp->trampoline));
+}
+#endif
+
+/*
+ * Build stack frame (32-bit) or stack local data (64-bit) for processing for
+ * modern Linux signals. This is the only stack-builder function for 64-bit
+ * code (32-bit code also calls this when using "modern" signals).
+ */
+static void
+lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
+{
+ extern void lx_rt_sigreturn_tramp();
+
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigstack *lx_ssp = sp;
+ struct lx_sigaction *lxsap;
+
+ lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ucp = &lx_ssp->uc;
+#if defined(_ILP32)
+ lx_ssp->ucp = lx_ucp;
+ lx_ssp->sig = lx_sig;
+#endif
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ /*
+ * lxsa_restorer is explicitly set by sigaction in 32-bit code
+ * but it can also be implicitly set for both 32 and 64 bit
+ * code via lx_sigaction_common when we bcopy the user-supplied
+ * lx_sigaction element into the proper slot in the sighandler
+ * array.
+ */
+ lx_ssp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
+ } else {
+ lx_ssp->retaddr = lx_rt_sigreturn_tramp;
+ lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
+ }
+
+ /* Linux has these fields but always clears them to 0 */
+ lx_ucp->uc_flags = 0;
+ lx_ucp->uc_link = NULL;
+
+ /* convert Illumos signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
+ stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
+
+#if defined(_LP64)
+ /*
+ * General register layout is completely different.
+ */
+ lx_ucp->uc_sigcontext.sc_r8 = ucp->uc_mcontext.gregs[REG_R8];
+ lx_ucp->uc_sigcontext.sc_r9 = ucp->uc_mcontext.gregs[REG_R9];
+ lx_ucp->uc_sigcontext.sc_r10 = ucp->uc_mcontext.gregs[REG_R10];
+ lx_ucp->uc_sigcontext.sc_r11 = ucp->uc_mcontext.gregs[REG_R11];
+ lx_ucp->uc_sigcontext.sc_r12 = ucp->uc_mcontext.gregs[REG_R12];
+ lx_ucp->uc_sigcontext.sc_r13 = ucp->uc_mcontext.gregs[REG_R13];
+ lx_ucp->uc_sigcontext.sc_r14 = ucp->uc_mcontext.gregs[REG_R14];
+ lx_ucp->uc_sigcontext.sc_r15 = ucp->uc_mcontext.gregs[REG_R15];
+ lx_ucp->uc_sigcontext.sc_rdi = ucp->uc_mcontext.gregs[REG_RDI];
+ lx_ucp->uc_sigcontext.sc_rsi = ucp->uc_mcontext.gregs[REG_RSI];
+ lx_ucp->uc_sigcontext.sc_rbp = ucp->uc_mcontext.gregs[REG_RBP];
+ lx_ucp->uc_sigcontext.sc_rbx = ucp->uc_mcontext.gregs[REG_RBX];
+ lx_ucp->uc_sigcontext.sc_rdx = ucp->uc_mcontext.gregs[REG_RDX];
+ lx_ucp->uc_sigcontext.sc_rax = ucp->uc_mcontext.gregs[REG_RAX];
+ lx_ucp->uc_sigcontext.sc_rcx = ucp->uc_mcontext.gregs[REG_RCX];
+ lx_ucp->uc_sigcontext.sc_rsp = ucp->uc_mcontext.gregs[REG_RSP];
+ lx_ucp->uc_sigcontext.sc_rip = ucp->uc_mcontext.gregs[REG_RIP];
+ lx_ucp->uc_sigcontext.sc_eflags = ucp->uc_mcontext.gregs[REG_RFL];
+ lx_ucp->uc_sigcontext.sc_cs = ucp->uc_mcontext.gregs[REG_CS];
+ lx_ucp->uc_sigcontext.sc_gs = ucp->uc_mcontext.gregs[REG_GS];
+ lx_ucp->uc_sigcontext.sc_fs = ucp->uc_mcontext.gregs[REG_FS];
+ lx_ucp->uc_sigcontext.sc_pad0 = ucp->uc_mcontext.gregs[REG_SS];
+ lx_ucp->uc_sigcontext.sc_err = ucp->uc_mcontext.gregs[REG_ERR];
+ lx_ucp->uc_sigcontext.sc_trapno = ucp->uc_mcontext.gregs[REG_TRAPNO];
+
+#else /* is _ILP32 */
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
+ lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
+#endif
+
+ /*
+ * cr2 contains the faulting address, which Linux only sets for a
+ * a segmentation fault.
+ */
+ lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0;
+
+ /*
+ * Point the lx_siginfo_t pointer to the signal stack's lx_siginfo_t
+ * if there was a Illumos siginfo_t to convert, otherwise set it to
+ * NULL. For 64-bit code a NULL sip is handled in the lx_deliver
+ * assembly code.
+ */
+#if defined(_ILP32)
+ if (sip != NULL)
+ lx_ssp->sip = &lx_ssp->si;
+ else
+ lx_ssp->sip = NULL;
+#endif
+
+ /*
+ * This should only return an error if the signum is invalid but that
+ * also gets converted into a LX_SIGKILL by this function.
+ */
+ if (sip != NULL)
+ (void) stol_siginfo(sip, &lx_ssp->si);
+ else
+ bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ /*
+ * Copy FP regs to the appropriate place in the the lx_sigstack
+ * structure.
+ */
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
+ lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
+ } else {
+ lx_ucp->uc_sigcontext.sc_fpstate = NULL;
+ }
+
+#if defined(_ILP32)
+ /*
+ * Believe it or not, gdb wants to SEE the sigreturn code on the
+ * top of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is not actually called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
+ sizeof (lx_ssp->trampoline));
+#endif
+
+#if defined(_LP64)
+ /*
+ * For the 64-bit code this must be the last syscall we do in the
+ * emulation code path before we return back to the Linux signal
+ * handler. This will disable native syscalls so the next time a
+ * syscall happens on this thread, it will come back into the emulation.
+ */
+ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
+#endif
+
+ /* We return to lx_sigdeliver to jump into the Linux signal handler */
+}
+
+/*
+ * This is the second level interposition handler for Linux signals.
+ */
+static void
+lx_call_user_handler(int sig, siginfo_t *sip, void *p)
+{
+ void (*user_handler)();
+ void (*stk_builder)();
+#if defined(_ILP32)
+ lx_tsd_t *lx_tsd;
+ int err;
+#endif
+ struct lx_sigaction *lxsap;
+ ucontext_t *ucp = (ucontext_t *)p;
+ uintptr_t gs;
+ size_t stksize;
+ int lx_sig;
+
+ /*
+ * If Illumos signal has no Linux equivalent, effectively ignore it.
+ */
+ if ((lx_sig = stol_signo[sig]) == -1) {
+ lx_unsupported("caught Illumos signal %d, no Linux equivalent",
+ sig);
+ return;
+ }
+
+ lx_debug("interpose caught Illumos signal %d, translating to Linux "
+ "signal %d", sig, lx_sig);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
+ /*
+ * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
+ * emulation will translate SIGPWR to LX_SIGPWR.
+ */
+ (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
+ /* This should never return */
+ assert(0);
+ }
+
+ if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
+ lx_err_fatal("lxsa_handler set to %s? How?!?!?",
+ (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
+
+#if defined(_LP64)
+ /* %gs is ignored in the 64-bit lx_sigdeliver */
+ gs = 0;
+
+ stksize = sizeof (struct lx_sigstack);
+ stk_builder = lx_build_signal_frame;
+
+#else
+ if ((err = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal("lx_call_user_handler: unable to read "
+ "thread-specific data: %s", strerror(err));
+
+ assert(lx_tsd != 0);
+
+ gs = lx_tsd->lxtsd_gs & 0xffff; /* gs is only 16 bits */
+
+ /*
+ * Any zero %gs value should be caught when a save is attempted in
+ * lx_emulate(), but this extra check will catch any zero values due to
+ * bugs in the library. This is only applicable to 32-bit code.
+ */
+ assert(gs != 0);
+
+ if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
+ stksize = sizeof (struct lx_sigstack);
+ stk_builder = lx_build_signal_frame;
+ } else {
+ stksize = sizeof (struct lx_oldsigstack);
+ stk_builder = lx_build_old_signal_frame;
+ }
+#endif
+
+ user_handler = lxsap->lxsa_handler;
+
+ lx_debug("delivering %d (lx %d) to handler at 0x%p with gs 0x%x", sig,
+ lx_sig, lxsap->lxsa_handler, gs);
+
+ if (lxsap->lxsa_flags & LX_SA_RESETHAND)
+ lxsap->lxsa_handler = SIG_DFL;
+
+ /*
+ * lx_sigdeliver() doesn't return, so it relies on the Linux signal
+ * handler to clean up the stack, reset the current signal mask and
+ * make a system call (sigreturn or rt_sigreturn) which is intended to
+ * return to the code interrupted by the signal. The emulation will
+ * catch that syscall, finish it's own cleanup, then actually return
+ * back through here via lx_sigreturn_tolibc(), which leads us back
+ * into libc and then back to the point where we were interrupted.
+ */
+ lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, gs);
+}
+
+/*
+ * Common routine to modify sigaction characteristics of a thread.
+ *
+ * We shouldn't need any special locking code here as we actually use our copy
+ * of libc's sigaction() to do all the real work, so its thread locking should
+ * take care of any issues for us.
+ */
+static int
+lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
+ struct lx_sigaction *olxsp)
+{
+ struct lx_sigaction *lxsap;
+ struct sigaction sa;
+
+ if (lx_sig <= 0 || lx_sig > LX_NSIG)
+ return (-EINVAL);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
+
+ if ((olxsp != NULL) &&
+ ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
+ return (-errno);
+
+ if (lxsp != NULL) {
+ int err, sig;
+ struct lx_sigaction lxsa;
+ sigset_t new_set, oset;
+
+ if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
+ return (-errno);
+
+ if ((sig = ltos_signo[lx_sig]) != -1) {
+ /*
+ * Block this signal while messing with its dispostion
+ */
+ (void) sigemptyset(&new_set);
+ (void) sigaddset(&new_set, sig);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
+ err = errno;
+ lx_debug("unable to block signal %d: %s", sig,
+ strerror(err));
+ return (-err);
+ }
+
+ /*
+ * We don't really need the old signal disposition at
+ * this point, but this weeds out signals that would
+ * cause sigaction() to return an error before we change
+ * anything other than the current signal mask.
+ */
+ if (sigaction(sig, NULL, &sa) < 0) {
+ err = errno;
+ lx_debug("sigaction() to get old "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ return (-err);
+ }
+
+ if ((lxsa.lxsa_handler != SIG_DFL) &&
+ (lxsa.lxsa_handler != SIG_IGN)) {
+ sa.sa_handler = lx_call_user_handler;
+
+ /*
+ * The interposition signal handler needs the
+ * information provided via the SA_SIGINFO flag.
+ */
+ sa.sa_flags = SA_SIGINFO;
+
+ if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
+ sa.sa_flags |= SA_NOCLDSTOP;
+ if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
+ sa.sa_flags |= SA_NOCLDWAIT;
+ if (lxsa.lxsa_flags & LX_SA_ONSTACK)
+ sa.sa_flags |= SA_ONSTACK;
+ if (lxsa.lxsa_flags & LX_SA_RESTART)
+ sa.sa_flags |= SA_RESTART;
+ if (lxsa.lxsa_flags & LX_SA_NODEFER)
+ sa.sa_flags |= SA_NODEFER;
+
+ /*
+ * Can't use RESETHAND with SIGPWR due to
+ * different default actions between Linux
+ * and Illumos.
+ */
+ if ((sig != SIGPWR) &&
+ (lxsa.lxsa_flags & LX_SA_RESETHAND))
+ sa.sa_flags |= SA_RESETHAND;
+
+ if (ltos_sigset(&lxsa.lxsa_mask,
+ &sa.sa_mask) != 0) {
+ err = errno;
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+
+ lx_debug("interposing handler @ 0x%p for "
+ "signal %d (lx %d), flags 0x%x",
+ lxsa.lxsa_handler, sig, lx_sig,
+ lxsa.lxsa_flags);
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction() to set new "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ } else if ((sig != SIGPWR) ||
+ ((sig == SIGPWR) &&
+ (lxsa.lxsa_handler == SIG_IGN))) {
+ /*
+ * There's no need to interpose for SIG_DFL or
+ * SIG_IGN so just call our copy of libc's
+ * sigaction(), but don't allow SIG_DFL for
+ * SIGPWR due to differing default actions
+ * between Linux and Illumos.
+ *
+ * Get the previous disposition first so things
+ * like sa_mask and sa_flags are preserved over
+ * a transition to SIG_DFL or SIG_IGN, which is
+ * what Linux expects.
+ */
+
+ sa.sa_handler = lxsa.lxsa_handler;
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction(%d, %s) failed: %s",
+ sig, ((sa.sa_handler == SIG_DFL) ?
+ "SIG_DFL" : "SIG_IGN"),
+ strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ }
+ } else {
+ lx_debug("Linux signal with no kill support "
+ "specified: %d", lx_sig);
+ }
+
+ /*
+ * Save the new disposition for the signal in the global
+ * lx_sighandlers structure.
+ */
+ bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
+
+ /*
+ * Reset the signal mask to what we came in with if
+ * we were modifying a kill-supported signal.
+ */
+ if (sig != -1)
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * sigaction is only used in 32-bit code.
+ */
+long
+lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
+{
+ int val;
+ struct lx_sigaction sa, osa;
+ struct lx_sigaction *sap, *osap;
+ struct lx_osigaction *osp;
+
+ sap = (actp ? &sa : NULL);
+ osap = (oactp ? &osa : NULL);
+
+ /*
+ * If we have a source pointer, convert source lxsa_mask from
+ * lx_osigset_t to lx_sigset_t format.
+ */
+ if (sap) {
+ osp = (struct lx_osigaction *)actp;
+ sap->lxsa_handler = osp->lxsa_handler;
+
+ bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
+
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (osp->lxsa_mask & OSIGSET_BITSET(val))
+ (void) lx_sigaddset(&sap->lxsa_mask, val);
+
+ sap->lxsa_flags = osp->lxsa_flags;
+ sap->lxsa_restorer = osp->lxsa_restorer;
+ }
+
+ if ((val = lx_sigaction_common(lx_sig, sap, osap)))
+ return (val);
+
+ /*
+ * If we have a save pointer, convert the old lxsa_mask from
+ * lx_sigset_t to lx_osigset_t format.
+ */
+ if (osap) {
+ osp = (struct lx_osigaction *)oactp;
+
+ osp->lxsa_handler = osap->lxsa_handler;
+
+ bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (lx_sigismember(&osap->lxsa_mask, val))
+ osp->lxsa_mask |= OSIGSET_BITSET(val);
+
+ osp->lxsa_flags = osap->lxsa_flags;
+ osp->lxsa_restorer = osap->lxsa_restorer;
+ }
+
+ return (0);
+}
+#endif
+
+long
+lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
+ uintptr_t setsize)
+{
+ /*
+ * The "new" rt_sigaction call checks the setsize
+ * parameter.
+ */
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
+ (struct lx_sigaction *)oactp));
+}
+
+#if defined(_ILP32)
+/*
+ * Convert signal syscall to a call to the lx_sigaction() syscall
+ * Only used in 32-bit code.
+ */
+long
+lx_signal(uintptr_t lx_sig, uintptr_t handler)
+{
+ struct sigaction act;
+ struct sigaction oact;
+ int rc;
+
+ /*
+ * Use sigaction to mimic SYSV signal() behavior; glibc will
+ * actually call sigaction(2) itself, so we're really reaching
+ * back for signal(2) semantics here.
+ */
+ bzero(&act, sizeof (act));
+ act.sa_handler = (void (*)())handler;
+ act.sa_flags = SA_RESETHAND | SA_NODEFER;
+
+ rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
+ return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
+}
+#endif
+
+#if defined(_ILP32)
+/*
+ * This is only used in 32-bit code and is called by the assembly routine
+ * lx_sigacthandler.
+ *
+ * This C routine saves the passed %gs value into the thread-specific save area.
+ */
+void
+lx_sigsavegs(uintptr_t signalled_gs)
+{
+ lx_tsd_t *lx_tsd;
+ int err;
+
+ signalled_gs &= 0xffff; /* gs is only 16 bits */
+
+ /*
+ * While a %gs of 0 is technically legal (as long as the application
+ * never dereferences memory using %gs), Illumos has its own ideas as
+ * to how a zero %gs should be handled in _update_sregs(), such that
+ * any 32-bit user process with a %gs of zero running on a system with
+ * a 64-bit kernel will have its %gs hidden base register stomped on on
+ * return from a system call, leaving an incorrect base address in
+ * place until the next time %gs is actually reloaded (forcing a reload
+ * of the base address from the appropriate descriptor table.)
+ *
+ * Of course the kernel will once again stomp on THAT base address when
+ * returning from a system call, resulting in an application
+ * segmentation fault.
+ *
+ * To avoid this situation, disallow a save of a zero %gs here in order
+ * to try and capture any Linux process that takes a signal with a zero
+ * %gs installed.
+ */
+ assert(signalled_gs != 0);
+
+ if (signalled_gs != LWPGS_SEL) {
+ if ((err = thr_getspecific(lx_tsd_key,
+ (void **)&lx_tsd)) != 0)
+ lx_err_fatal("sigsavegs: unable to read "
+ "thread-specific data: %s", strerror(err));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_gs = signalled_gs;
+ lx_debug("lx_sigsavegs(): gsp 0x%p, saved gs: 0x%x\n",
+ lx_tsd, signalled_gs);
+ }
+}
+#endif
+
+int
+lx_siginit(void)
+{
+ extern void set_setcontext_enforcement(int);
+ extern void lx_sigacthandler(int, siginfo_t *, void *);
+
+ struct sigaction sa;
+ sigset_t new_set, oset;
+ int lx_sig, sig;
+
+ /*
+ * Block all signals possible while setting up the signal imposition
+ * mechanism.
+ */
+ (void) sigfillset(&new_set);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
+ lx_err_fatal("unable to block signals while setting up "
+ "imposition mechanism: %s", strerror(errno));
+
+ /*
+ * Ignore any signals that have no Linux analog so that those
+ * signals cannot be sent to Linux processes from the global zone
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (stol_signo[sig] < 0)
+ (void) sigignore(sig);
+
+ /*
+ * As mentioned previously, when a user signal handler is installed
+ * via sigaction(), libc interposes on the mechanism by actually
+ * installing an internal routine sigacthandler() as the signal
+ * handler. On receipt of the signal, libc does some thread-related
+ * processing via sigacthandler(), then calls the registered user
+ * signal handler on behalf of the user.
+ *
+ * For 32-bit code we need to interpose on that mechanism to make sure
+ * the correct %gs segment register value is installed before the libc
+ * routine is called, otherwise the libc code will die with a
+ * segmentation fault.
+ *
+ * For 64-bit code we overload the %gs register as a mechanism to pass
+ * the syscall mode flag out of the kernel.
+ *
+ * The private libc routine setsigacthandler() will set our
+ * interposition routine, lx_sigacthandler(), as the default
+ * "sigacthandler" routine for all new signal handlers for this
+ * thread. We also use this in 64-bit code to set the libc interposition
+ * routine for setting the context when returning from a signal handler.
+ * This is needed so we can combine changing the syscall mode flag and
+ * doing __setcontext() in one call.
+ */
+#if defined(_LP64)
+ setsigacthandler(lx_sigacthandler, &libc_sigacthandler, lx_setcontext);
+#else
+ setsigacthandler(lx_sigacthandler, &libc_sigacthandler, NULL);
+#endif
+ lx_debug("lx_sigacthandler installed, libc_sigacthandler = 0x%p",
+ libc_sigacthandler);
+
+ /*
+ * Mark any signals that are ignored as ignored in our interposition
+ * handler array
+ */
+ for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
+ if (((sig = ltos_signo[lx_sig]) != -1) &&
+ (sigaction(sig, NULL, &sa) < 0))
+ lx_err_fatal("unable to determine previous disposition "
+ "for signal %d: %s", sig, strerror(errno));
+
+ if (sa.sa_handler == SIG_IGN) {
+ lx_debug("marking signal %d (lx %d) as SIG_IGN",
+ sig, lx_sig);
+ lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
+ }
+ }
+
+ /*
+ * Have our interposition handler handle SIGPWR to start with,
+ * as it has a default action of terminating the process in Linux
+ * but its default is to be ignored in Illumos.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = lx_call_user_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGPWR, &sa, NULL) < 0)
+ lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
+
+ /*
+ * Illumos' libc forces certain register values in the ucontext_t
+ * used to restore a post-signal user context to be those Illumos
+ * expects; however that is not what we want to happen if the signal
+ * was taken while branded code was executing, so we must disable
+ * that behavior.
+ */
+ set_setcontext_enforcement(0);
+
+ /*
+ * Reset the signal mask to what we came in with
+ */
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+
+ lx_debug("interposition handler setup for SIGPWR");
+ return (0);
+}
+
+/*
+ * This code stongly resemebles lx_poll(), but is here to be able to take
+ * advantage of the Linux signal helper routines.
+ */
+long
+lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
+{
+ struct pollfd *lfds, *sfds;
+ nfds_t nfds = (nfds_t)p2;
+ timespec_t ts, *tsp = NULL;
+ int fds_size, i, rval, revents;
+ lx_sigset_t lxsig, *lxsigp = NULL;
+ sigset_t sigset, *sp = NULL;
+ rctlblk_t *rblk;
+
+ lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5);
+
+ if (p3 != NULL) {
+ if (uucopy((void *)p3, &ts, sizeof (ts)) != 0)
+ return (-errno);
+
+ tsp = &ts;
+ }
+
+ if (p4 != NULL) {
+ if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0)
+ return (-errno);
+
+ lxsigp = &lxsig;
+ if ((size_t)p5 != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (lxsigp) {
+ if ((rval = ltos_sigset(lxsigp, &sigset)) != 0)
+ return (rval);
+
+ sp = &sigset;
+ }
+ }
+
+ /*
+ * Deal with the NULL fds[] case.
+ */
+ if (nfds == 0 || p1 == NULL) {
+ if ((rval = ppoll(NULL, 0, tsp, sp)) < 0)
+ return (-errno);
+
+ return (rval);
+ }
+
+ if (maxfd == 0) {
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL)
+ return (-ENOMEM);
+
+ if (getrctl("process.max-file-descriptor", NULL, rblk,
+ RCTL_FIRST) == -1)
+ return (-EINVAL);
+
+ maxfd = rctlblk_get_value(rblk);
+ }
+
+ if (nfds > maxfd)
+ return (-EINVAL);
+
+ /*
+ * Note: we are assuming that the Linux and Illumos pollfd
+ * structures are identical. Copy in the Linux poll structure.
+ */
+ fds_size = sizeof (struct pollfd) * nfds;
+ lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (lfds == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p1, lfds, fds_size) != 0)
+ return (-errno);
+
+ /*
+ * The poll system call modifies the poll structures passed in
+ * so we'll need to make an extra copy of them.
+ */
+ sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (sfds == NULL)
+ return (-ENOMEM);
+
+ /* Convert the Linux events bitmask into the Illumos equivalent. */
+ for (i = 0; i < nfds; i++) {
+ /*
+ * If the caller is polling for an unsupported event, we
+ * have to bail out.
+ */
+ if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
+ lx_unsupported("unsupported poll events requested: "
+ "events=0x%x", lfds[i].events);
+ return (-ENOTSUP);
+ }
+
+ sfds[i].fd = lfds[i].fd;
+ sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
+ if (lfds[i].events & LX_POLLWRNORM)
+ sfds[i].events |= POLLWRNORM;
+ if (lfds[i].events & LX_POLLWRBAND)
+ sfds[i].events |= POLLWRBAND;
+ if (lfds[i].events & LX_POLLRDHUP)
+ sfds[i].events |= POLLRDHUP;
+ sfds[i].revents = 0;
+ }
+
+ if ((rval = ppoll(sfds, nfds, tsp, sp) < 0))
+ return (-errno);
+
+ /* Convert the Illumos revents bitmask into the Linux equivalent */
+ for (i = 0; i < nfds; i++) {
+ revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
+ if (sfds[i].revents & POLLWRBAND)
+ revents |= LX_POLLWRBAND;
+ if (sfds[i].revents & POLLRDHUP)
+ revents |= LX_POLLRDHUP;
+
+ /*
+ * Be careful because on Illumos POLLOUT and POLLWRNORM
+ * are defined to the same values but on Linux they
+ * are not.
+ */
+ if (sfds[i].revents & POLLOUT) {
+ if ((lfds[i].events & LX_POLLOUT) == 0)
+ revents &= ~LX_POLLOUT;
+ if (lfds[i].events & LX_POLLWRNORM)
+ revents |= LX_POLLWRNORM;
+ }
+
+ lfds[i].revents = revents;
+ }
+
+ /* Copy out the results */
+ if (uucopy(lfds, (void *)p1, fds_size) != 0)
+ return (-errno);
+
+ return (rval);
+}
+
+/*
+ * This code stongly resemebles lx_select(), but is here to be able to take
+ * advantage of the Linux signal helper routines.
+ */
+long
+lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ int nfds = (int)p1;
+ fd_set *rfdsp = NULL;
+ fd_set *wfdsp = NULL;
+ fd_set *efdsp = NULL;
+ timespec_t ts, *tsp = NULL;
+ int fd_set_len = howmany(nfds, 8);
+ int r;
+ sigset_t sigset, *sp = NULL;
+
+ lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)",
+ p1, p2, p3, p4, p4, p6);
+
+ if (nfds > 0) {
+ if (p2 != NULL) {
+ rfdsp = SAFE_ALLOCA(fd_set_len);
+ if (rfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p3 != NULL) {
+ wfdsp = SAFE_ALLOCA(fd_set_len);
+ if (wfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p4 != NULL) {
+ efdsp = SAFE_ALLOCA(fd_set_len);
+ if (efdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ }
+
+ if (p5 != NULL) {
+ if (uucopy((void *)p5, &ts, sizeof (ts)) != 0)
+ return (-errno);
+
+ tsp = &ts;
+ }
+
+ if (p6 != NULL) {
+ /*
+ * To force the number of arguments to be no more than six,
+ * Linux bundles both the sigset and the size into a structure
+ * that becomes the sixth argument.
+ */
+ struct {
+ lx_sigset_t *addr;
+ size_t size;
+ } lx_sigset;
+
+ if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0)
+ return (-errno);
+
+ /*
+ * Yes, that's right: Linux forces a size to be passed only
+ * so it can check that it's the size of a sigset_t.
+ */
+ if (lx_sigset.size != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ /*
+ * This is where we check if the sigset is *really* NULL.
+ */
+ if (lx_sigset.addr) {
+ if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0)
+ return (r);
+
+ sp = &sigset;
+ }
+ }
+
+#if defined(_LP64)
+ r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
+#else
+ if (nfds >= FD_SETSIZE)
+ r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
+ else
+ r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
+#endif
+
+ if (r < 0)
+ return (-errno);
+
+ /*
+ * For pselect6(), we don't honor the strange Linux select() semantics
+ * with respect to the timestruc parameter because glibc ignores it
+ * anyway -- just copy out the fd pointers and return.
+ */
+ if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
+ return (-errno);
+ if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
+ return (-errno);
+ if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
+ return (-errno);
+
+ return (r);
+}
+
+/*
+ * The first argument is the pid (Linux tgid) to send the signal to, second
+ * argument is the signal to send (an lx signal), and third is the siginfo_t
+ * with extra information. We translate the code and signal only from the
+ * siginfo_t, and leave everything else the same as it gets passed through the
+ * signalling system. This is enough to get sigqueue working. See Linux man
+ * page rt_sigqueueinfo(2).
+ */
+long
+lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ pid_t tgid = (pid_t)p1;
+ int lx_sig = (int)p2;
+ int sig;
+ lx_siginfo_t lx_siginfo;
+ siginfo_t siginfo;
+ int s_code;
+ pid_t s_pid;
+
+ if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
+ return (-EFAULT);
+ s_code = ltos_sigcode(lx_siginfo.lsi_code);
+ if (s_code == LX_SI_CODE_NOT_EXIST)
+ return (-EINVAL);
+ if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
+ return (-EINVAL);
+ }
+ /*
+ * This case (when trying to kill pid 0) just has a different errno
+ * returned in illumos than in Linux.
+ */
+ if (tgid == 0)
+ return (-ESRCH);
+ if (lx_lpid_to_spid(tgid, &s_pid) != 0)
+ return (-ESRCH);
+ if (SI_CANQUEUE(s_code)) {
+ return ((syscall(SYS_sigqueue, s_pid, sig,
+ lx_siginfo.lsi_value, s_code, 0) == -1) ?
+ (-errno): 0);
+ } else {
+ /*
+ * This case is unlikely, as the main entry point is through
+ * sigqueue, which always has a queuable si_code.
+ */
+ siginfo.si_signo = sig;
+ siginfo.si_code = s_code;
+ siginfo.si_pid = lx_siginfo.lsi_pid;
+ siginfo.si_value = lx_siginfo.lsi_value;
+ siginfo.si_uid = lx_siginfo.lsi_uid;
+ return ((syscall(SYS_brand, B_IKE_SYSCALL +
+ LX_EMUL_rt_sigqueueinfo, tgid, sig, &siginfo)) ?
+ (-errno) : 0);
+ }
+
+}
+
+/*
+ * Adds an additional argument for which thread within a thread group to send
+ * the signal to (added as the second argument).
+ */
+long
+lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ pid_t tgid = (pid_t)p1;
+ pid_t tid = (pid_t)p2;
+ int lx_sig = (int)p3;
+ int sig;
+ lx_siginfo_t lx_siginfo;
+ siginfo_t siginfo;
+ int si_code;
+
+ if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
+ return (-EFAULT);
+ if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
+ return (-EINVAL);
+ }
+ si_code = ltos_sigcode(lx_siginfo.lsi_code);
+ if (si_code == LX_SI_CODE_NOT_EXIST)
+ return (-EINVAL);
+ /*
+ * Check for invalid tgid and tids. That appears to be only negatives
+ * and 0 values. Everything else that doesn't exist is instead ESRCH.
+ */
+ if (tgid <= 0 || tid <= 0)
+ return (-EINVAL);
+ siginfo.si_signo = sig;
+ siginfo.si_code = si_code;
+ siginfo.si_pid = lx_siginfo.lsi_pid;
+ siginfo.si_value = lx_siginfo.lsi_value;
+ siginfo.si_uid = lx_siginfo.lsi_uid;
+
+ return ((syscall(SYS_brand, B_IKE_SYSCALL +
+ LX_EMUL_rt_tgsigqueueinfo, tgid, tid, sig, &siginfo)) ?
+ (-errno) : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c
new file mode 100644
index 0000000000..dd3f5171e1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c
@@ -0,0 +1,2144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <libintl.h>
+#include <strings.h>
+#include <alloca.h>
+#include <ucred.h>
+#include <limits.h>
+
+#include <sys/param.h>
+#include <sys/brand.h>
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <netinet/tcp.h>
+#include <netinet/igmp.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_socket.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+
+/*
+ * This string is used to prefix all abstract namespace Unix sockets, ie all
+ * abstract namespace sockets are converted to regular sockets in the /tmp
+ * directory with .ABSK_ prefixed to their names.
+ */
+#define ABST_PRFX "/tmp/.ABSK_"
+#define ABST_PRFX_LEN 11
+
+#define LX_DEV_LOG "/dev/log"
+#define LX_DEV_LOG_REDIRECT "/var/run/.dev_log_redirect"
+#define LX_DEV_LOG_REDIRECT_LEN 18
+
+typedef enum {
+ lxa_none,
+ lxa_abstract,
+ lxa_devlog
+} lx_addr_type_t;
+
+#ifdef __i386
+
+static int lx_socket32(ulong_t *);
+static int lx_bind32(ulong_t *);
+static int lx_connect32(ulong_t *);
+static int lx_listen32(ulong_t *);
+static int lx_accept32(ulong_t *);
+static int lx_getsockname32(ulong_t *);
+static int lx_getpeername32(ulong_t *);
+static int lx_socketpair32(ulong_t *);
+static int lx_send(ulong_t *);
+static int lx_recv(ulong_t *);
+static int lx_sendto32(ulong_t *);
+static int lx_recvfrom32(ulong_t *);
+static int lx_shutdown32(ulong_t *);
+static int lx_setsockopt32(ulong_t *);
+static int lx_getsockopt32(ulong_t *);
+static int lx_sendmsg32(ulong_t *);
+static int lx_recvmsg32(ulong_t *);
+static int lx_accept4_32(ulong_t *);
+static int lx_recvmmsg32(ulong_t *);
+static int lx_sendmmsg32(ulong_t *);
+
+typedef int (*sockfn_t)(ulong_t *);
+
+static struct {
+ sockfn_t s_fn; /* Function implementing the subcommand */
+ int s_nargs; /* Number of arguments the function takes */
+} sockfns[] = {
+ lx_socket32, 3,
+ lx_bind32, 3,
+ lx_connect32, 3,
+ lx_listen32, 2,
+ lx_accept32, 3,
+ lx_getsockname32, 3,
+ lx_getpeername32, 3,
+ lx_socketpair32, 4,
+ lx_send, 4,
+ lx_recv, 4,
+ lx_sendto32, 6,
+ lx_recvfrom32, 6,
+ lx_shutdown32, 2,
+ lx_setsockopt32, 5,
+ lx_getsockopt32, 5,
+ lx_sendmsg32, 3,
+ lx_recvmsg32, 3,
+ lx_accept4_32, 4,
+ lx_recvmmsg32, 5,
+ lx_sendmmsg32, 4
+};
+#endif /* __i386 */
+
+/*
+ * What follows are a series of tables we use to translate Linux constants
+ * into equivalent Illumos constants and back again. I wish this were
+ * cleaner, more programmatic, and generally nicer. Sadly, life is messy,
+ * and Unix networking even more so.
+ */
+static const int ltos_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, AF_UNIX, AF_INET, AF_CCITT, AF_IPX,
+ AF_APPLETALK, AF_NOTSUPPORTED, AF_OSI, AF_NOTSUPPORTED,
+ AF_X25, AF_INET6, AF_CCITT, AF_DECnet,
+ AF_802, AF_POLICY, AF_KEY, AF_LX_NETLINK,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_SNA, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED
+};
+
+#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL)
+
+static const int ltos_socktype[LX_SOCK_PACKET + 1] = {
+ SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW,
+ SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED,
+ SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED
+};
+
+#define LTOS_SOCKTYPE(t) \
+ ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL)
+
+static const int stol_socktype[SOCK_SEQPACKET + 1] = {
+ SOCK_NOTSUPPORTED, LX_SOCK_DGRAM, LX_SOCK_STREAM, SOCK_NOTSUPPORTED,
+ LX_SOCK_RAW, LX_SOCK_RDM, LX_SOCK_SEQPACKET
+};
+
+/*
+ * Linux socket option type definitions
+ *
+ * The protocol `levels` are well defined (see in.h) The option values are
+ * not so well defined. Linux often uses different values vs. Illumos
+ * although they mean the same thing. For example, IP_TOS in Linux is
+ * defined as value 1 but in Illumos it is defined as value 3. This table
+ * maps all the Protocol levels to their options and maps them between
+ * Linux and Illumos and vice versa. Hence the reason for the complexity.
+ */
+
+typedef struct lx_proto_opts {
+ const int *proto; /* Linux to Illumos mapping table */
+ int maxentries; /* max entries in this table */
+} lx_proto_opts_t;
+
+#define OPTNOTSUP -1 /* we don't support it */
+
+/*
+ * Linux Illumos
+ * ----- -------
+ * IP_TOS 1 IP_TOS 3
+ * IP_TTL 2 IP_TTL 4
+ * IP_HDRINCL 3 IP_HDRINCL 2
+ * IP_OPTIONS 4 IP_OPTIONS 1
+ * IP_ROUTER_ALERT 5
+ * IP_RECVOPTS 6 IP_RECVOPTS 5
+ * IP_RETOPTS 7 IP_RETOPTS 8
+ * IP_PKTINFO 8
+ * IP_PKTOPTIONS 9
+ * IP_MTU_DISCOVER 10 emulated for traceroute
+ * IP_RECVERR 11 emulated for traceroute
+ * IP_RECVTTL 12 IP_RECVTTL 11
+ * IP_RECVTOS 13
+ * IP_MTU 14
+ * IP_FREEBIND 15
+ * IP_IPSEC_POLICY 16
+ * IP_XFRM_POLICY 17
+ * IP_PASSSEC 18
+ * IP_TRANSPARENT 19
+ * IP_ORIGDSTADDR 20
+ * IP_MINTTL 21
+ * IP_NODEFRAG 22
+ *
+ * apparent gap
+ *
+ * IP_MULTICAST_IF 32 IP_MULTICAST_IF 16
+ * IP_MULTICAST_TTL 33 IP_MULTICAST_TTL 17
+ * IP_MULTICAST_LOOP 34 IP_MULTICAST_LOOP 18
+ * IP_ADD_MEMBERSHIP 35 IP_ADD_MEMBERSHIP 19
+ * IP_DROP_MEMBERSHIP 36 IP_DROP_MEMBERSHIP 20
+ * IP_UNBLOCK_SOURCE 37 IP_UNBLOCK_SOURCE 22
+ * IP_BLOCK_SOURCE 38 IP_BLOCK_SOURCE 21
+ * IP_ADD_SOURCE_MEMBERSHIP 39 IP_ADD_SOURCE_MEMBERSHIP 23
+ * IP_DROP_SOURCE_MEMBERSHIP 40 IP_DROP_SOURCE_MEMBERSHIP 24
+ * IP_MSFILTER 41
+ * MCAST_JOIN_GROUP 42 -> MCAST_JOIN_GROUP
+ * MCAST_BLOCK_SOURCE 43 -> MCAST_BLOCK_SOURCE
+ * MCAST_UNBLOCK_SOURCE 44 -> MCAST_UNBLOCK_SOURCE
+ * MCAST_LEAVE_GROUP 45 -> MCAST_LEAVE_GROUP
+ * MCAST_JOIN_SOURCE_GROUP 46 -> MCAST_JOIN_SOURCE_GROUP
+ * MCAST_LEAVE_SOURCE_GROUP 47 -> MCAST_LEAVE_SOURCE_GROUP
+ * MCAST_MSFILTER 48
+ * IP_MULTICAST_ALL 49
+ * IP_UNICAST_IF 50
+ *
+ * The Illumos options preceeded by '->' can be added but we might also need
+ * emulation to convert the ip_mreq_source struct.
+ */
+static const int ltos_ip_sockopts[LX_IP_UNICAST_IF + 1] = {
+ OPTNOTSUP, IP_TOS, IP_TTL, IP_HDRINCL, /* 3 */
+ IP_OPTIONS, OPTNOTSUP, IP_RECVOPTS, IP_RETOPTS, /* 7 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 11 */
+ IP_RECVTTL, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 15 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 19 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 23 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 27 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 31 */
+ IP_MULTICAST_IF, IP_MULTICAST_TTL, /* 33 */
+ IP_MULTICAST_LOOP, IP_ADD_MEMBERSHIP, /* 35 */
+ IP_DROP_MEMBERSHIP, IP_UNBLOCK_SOURCE, /* 37 */
+ IP_BLOCK_SOURCE, IP_ADD_SOURCE_MEMBERSHIP, /* 39 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 43 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 47 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP /* 50 */
+};
+
+/*
+ *
+ * TCP socket option mapping:
+ *
+ * Linux Illumos
+ * ----- -------
+ * TCP_NODELAY 1 TCP_NODELAY 1
+ * TCP_MAXSEG 2 TCP_MAXSEG 2
+ * TCP_CORK 3 TCP_CORK 24
+ * TCP_KEEPIDLE 4 TCP_KEEPIDLE 34
+ * TCP_KEEPINTVL 5 TCP_KEEPINTVL 36
+ * TCP_KEEPCNT 6 TCP_KEEPCNT 35
+ * TCP_SYNCNT 7
+ * TCP_LINGER2 8 TCP_LINGER2 28
+ * TCP_DEFER_ACCEPT 9
+ * TCP_WINDOW_CLAMP 10
+ * TCP_INFO 11
+ * TCP_QUICKACK 12
+ * TCP_CONGESTION 13
+ * TCP_MD5SIG 14
+ * TCP_THIN_LINEAR_TIMEOUTS 16
+ * TCP_THIN_DUPACK 17
+ * TCP_USER_TIMEOUT 18
+ * TCP_REPAIR 19
+ * TCP_REPAIR_QUEUE 20
+ * TCP_QUEUE_SEQ 21
+ * TCP_REPAIR_OPTIONS 22
+ * TCP_FASTOPEN 23
+ * TCP_TIMESTAMP 24
+ * TCP_NOTSENT_LOWAT 25
+ */
+
+static const int ltos_tcp_sockopts[LX_TCP_NOTSENT_LOWAT + 1] = {
+ OPTNOTSUP, TCP_NODELAY, TCP_MAXSEG, TCP_CORK, /* 0-3 */
+ TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_KEEPCNT, OPTNOTSUP, /* 4-7 */
+ TCP_LINGER2, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 8-11 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 12-15 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 16-19 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 20-23 */
+ OPTNOTSUP, OPTNOTSUP /* 24-25 */
+};
+
+static const int ltos_igmp_sockopts[IGMP_MTRACE + 1] = {
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IGMP_MINLEN, OPTNOTSUP, OPTNOTSUP, /* XXX: was IGMP_TIMER_SCALE */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, IGMP_MEMBERSHIP_QUERY,
+ IGMP_V1_MEMBERSHIP_REPORT, IGMP_DVMRP,
+ IGMP_PIM, OPTNOTSUP, IGMP_V2_MEMBERSHIP_REPORT,
+ IGMP_V2_LEAVE_GROUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IGMP_MTRACE_RESP, IGMP_MTRACE
+};
+
+/*
+ * Socket option mapping:
+ *
+ * Linux Illumos
+ * ----- -------
+ * SO_DEBUG 1 SO_DEBUG 0x0001
+ * SO_REUSEADDR 2 SO_REUSEADDR 0x0004
+ * SO_TYPE 3 SO_TYPE 0x1008
+ * SO_ERROR 4 SO_ERROR 0x1007
+ * SO_DONTROUTE 5 SO_DONTROUTE 0x0010
+ * SO_BROADCAST 6 SO_BROADCAST 0x0020
+ * SO_SNDBUF 7 SO_SNDBUF 0x1001
+ * SO_RCVBUF 8 SO_RCVBUF 0x1002
+ * SO_KEEPALIVE 9 SO_KEEPALIVE 0x0008
+ * SO_OOBINLINE 10 SO_OOBINLINE 0x0100
+ * SO_NO_CHECK 11
+ * SO_PRIORITY 12
+ * SO_LINGER 13 SO_LINGER 0x0080
+ * SO_BSDCOMPAT 14 ignored by linux, emulation returns 0
+ * SO_REUSEPORT 15
+ * SO_PASSCRED 16 SO_RECVUCRED 0x0400
+ * SO_PEERCRED 17 emulated with getpeerucred
+ * SO_RCVLOWAT 18 SO_RCVLOWAT 0x1004
+ * SO_SNDLOWAT 19 SO_SNDLOWAT 0x1003
+ * SO_RCVTIMEO 20 SO_RCVTIMEO 0x1006
+ * SO_SNDTIMEO 21 SO_SNDTIMEO 0x1005
+ * SO_SECURITY_AUTHENTICATION 22
+ * SO_SECURITY_ENCRYPTION_TRANSPORT 23
+ * SO_SECURITY_ENCRYPTION_NETWORK 24
+ * SO_BINDTODEVICE 25
+ * SO_ATTACH_FILTER 26 SO_ATTACH_FILTER 0x40000001
+ * SO_DETACH_FILTER 27 SO_DETACH_FILTER 0x40000002
+ * SO_PEERNAME 28
+ * SO_TIMESTAMP 29 SO_TIMESTAMP 0x1013
+ * SO_ACCEPTCONN 30 SO_ACCEPTCONN 0x0002
+ * SO_PEERSEC 31
+ * SO_SNDBUFFORCE 32
+ * SO_RCVBUFFORCE 33
+ * SO_PASSSEC 34
+ * SO_TIMESTAMPNS 35
+ * SO_MARK 36
+ * SO_TIMESTAMPING 37
+ * SO_PROTOCOL 38 SO_PROTOTYPE 0x1009
+ * SO_DOMAIN 39 SO_DOMAIN 0x100c
+ * SO_RXQ_OVFL 40
+ * SO_WIFI_STATUS 41
+ * SO_PEEK_OFF 42
+ * SO_NOFCS 43
+ * SO_LOCK_FILTER 44
+ * SO_SELECT_ERR_QUEUE 45
+ * SO_BUSY_POLL 46
+ * SO_MAX_PACING_RATE 47
+ * SO_BPF_EXTENSIONS 48
+ */
+static const int ltos_socket_sockopts[LX_SO_BPF_EXTENSIONS + 1] = {
+ OPTNOTSUP, SO_DEBUG, SO_REUSEADDR, SO_TYPE, /* 3 */
+ SO_ERROR, SO_DONTROUTE, SO_BROADCAST, SO_SNDBUF, /* 7 */
+ SO_RCVBUF, SO_KEEPALIVE, SO_OOBINLINE, OPTNOTSUP, /* 11 */
+ OPTNOTSUP, SO_LINGER, OPTNOTSUP, OPTNOTSUP, /* 15 */
+ SO_RECVUCRED, OPTNOTSUP, SO_RCVLOWAT, SO_SNDLOWAT, /* 19 */
+ SO_RCVTIMEO, SO_SNDTIMEO, OPTNOTSUP, OPTNOTSUP, /* 23 */
+ OPTNOTSUP, OPTNOTSUP, SO_ATTACH_FILTER, SO_DETACH_FILTER, /* 27 */
+ OPTNOTSUP, SO_TIMESTAMP, SO_ACCEPTCONN, OPTNOTSUP, /* 31 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 35 */
+ OPTNOTSUP, OPTNOTSUP, SO_PROTOTYPE, SO_DOMAIN, /* 39 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 43 */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, /* 47 */
+ OPTNOTSUP /* 48 */
+};
+
+/*
+ * See the Linux raw.7 man page for description of the socket options.
+ * In Linux ICMP_FILTER is defined as 1 in include/uapi/linux/icmp.h
+ */
+static const int ltos_raw_sockopts[LX_ICMP_FILTER + 1] = {
+ OPTNOTSUP, OPTNOTSUP
+};
+
+#define PROTO_SOCKOPTS(opts) \
+ { (opts), sizeof ((opts)) / sizeof ((opts)[0]) }
+
+/*
+ * [gs]etsockopt options mapping tables
+ */
+static lx_proto_opts_t ip_sockopts_tbl = PROTO_SOCKOPTS(ltos_ip_sockopts);
+static lx_proto_opts_t socket_sockopts_tbl =
+ PROTO_SOCKOPTS(ltos_socket_sockopts);
+static lx_proto_opts_t igmp_sockopts_tbl = PROTO_SOCKOPTS(ltos_igmp_sockopts);
+static lx_proto_opts_t tcp_sockopts_tbl = PROTO_SOCKOPTS(ltos_tcp_sockopts);
+static lx_proto_opts_t raw_sockopts_tbl = PROTO_SOCKOPTS(ltos_raw_sockopts);
+
+/*
+ * Lifted from socket.h, since these definitions are contained within
+ * _KERNEL guards.
+ */
+#define _CMSG_HDR_ALIGNMENT 4
+#define _CMSG_HDR_ALIGN(x) (((uintptr_t)(x) + _CMSG_HDR_ALIGNMENT - 1) & \
+ ~(_CMSG_HDR_ALIGNMENT - 1))
+#define _CMSG_DATA_ALIGN(x) \
+ (((uintptr_t)(x) + sizeof (int) - 1) & ~(sizeof (int) - 1))
+
+#define CMSG_FIRSTHDR(m) \
+ (((m)->msg_controllen < sizeof (struct cmsghdr)) ? \
+ (struct cmsghdr *)0 : (struct cmsghdr *)((m)->msg_control))
+
+#define CMSG_NXTHDR(m, c) \
+ (((c) == 0) ? CMSG_FIRSTHDR(m) : \
+ ((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \
+ ((struct cmsghdr *)(c))->cmsg_len) + sizeof (struct cmsghdr)) > \
+ (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \
+ ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \
+ ((struct cmsghdr *)0) : \
+ ((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \
+ ((struct cmsghdr *)(c))->cmsg_len))))
+
+#define CMSG_LEN(l) \
+ ((unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr)) + (l))
+
+#define CMSG_DATA(c) \
+ ((unsigned char *)_CMSG_DATA_ALIGN((struct cmsghdr *)(c) + 1))
+
+#define LX_TO_SOL 1
+#define SOL_TO_LX 2
+
+#define LX_AF_NETLINK 16
+#define LX_NETLINK_KOBJECT_UEVENT 15
+#define LX_NETLINK_ROUTE 0
+
+typedef struct {
+ sa_family_t nl_family;
+ unsigned short nl_pad;
+ uint32_t nl_pid;
+ uint32_t nl_groups;
+} lx_sockaddr_nl_t;
+
+#if defined(_LP64)
+/*
+ * For 32-bit code the Illumos and Linux cmsghdr structure definition is the
+ * same, but for 64-bit Linux code the cmsg_len value is a long instead of an
+ * int. As a result, we need to go through a bunch of work to transform the
+ * csmgs back and forth.
+ */
+typedef struct {
+ long cmsg_len;
+ int cmsg_level;
+ int cmsg_type;
+} lx_cmsghdr64_t;
+
+/*
+ * When converting from Illumos to Linux we don't know in advance how many
+ * control msgs we recv, but we do know that the Linux header is 4 bytes
+ * bigger, plus any additional alignment bytes. We'll take a guess and assume
+ * there is not 64 msgs (1 is common) and alloc an extra 256 bytes.
+ */
+#define LX_CMSG_EXTRA 256
+
+#define LX_CMSG_HDR_ALIGN(x) \
+ (((uintptr_t)(x) + sizeof (long) - 1) & ~(sizeof (long) - 1))
+
+#define LX_CMSG_DATA_ALIGN(x) \
+ (((uintptr_t)(x) + sizeof (int) - 1) & ~(sizeof (int) - 1))
+
+#define LX_CMSG_DATA(c) \
+ ((unsigned char *)LX_CMSG_DATA_ALIGN((lx_cmsghdr64_t *)(c) + 1))
+
+#define LX_CMSG_FIRSTHDR(m) \
+ (((m)->msg_controllen < sizeof (lx_cmsghdr64_t)) ? \
+ (lx_cmsghdr64_t *)NULL : (lx_cmsghdr64_t *)((m)->msg_control))
+
+#define LX_CMSG_LEN(l) (LX_CMSG_HDR_ALIGN(sizeof (lx_cmsghdr64_t)) + (l))
+
+#define LX_CMSG_NXTHDR(m, c) \
+ (((c) == 0) ? LX_CMSG_FIRSTHDR(m) : \
+ ((((uintptr_t)LX_CMSG_HDR_ALIGN((char *)(c) + \
+ ((lx_cmsghdr64_t *)(c))->cmsg_len) + sizeof (lx_cmsghdr64_t)) > \
+ (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \
+ ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \
+ ((lx_cmsghdr64_t *)0) : \
+ ((lx_cmsghdr64_t *)LX_CMSG_HDR_ALIGN((char *)(c) + \
+ ((lx_cmsghdr64_t *)(c))->cmsg_len))))
+
+
+static void
+ltos_xform_cmsgs(struct lx_msghdr *msg, struct cmsghdr *ntv_cmsg)
+{
+ lx_cmsghdr64_t *lcmsg, *last;
+ struct cmsghdr *cmsg, *lp;
+ int nlen = 0;
+
+ cmsg = ntv_cmsg;
+ lcmsg = LX_CMSG_FIRSTHDR(msg);
+ while (lcmsg != NULL) {
+ cmsg->cmsg_len =
+ CMSG_LEN(lcmsg->cmsg_len - sizeof (lx_cmsghdr64_t));
+ cmsg->cmsg_level = lcmsg->cmsg_level;
+ cmsg->cmsg_type = lcmsg->cmsg_type;
+
+ bcopy((void *)LX_CMSG_DATA(lcmsg), (void *)CMSG_DATA(cmsg),
+ lcmsg->cmsg_len - sizeof (lx_cmsghdr64_t));
+
+ last = lcmsg;
+ lcmsg = LX_CMSG_NXTHDR(msg, last);
+
+ lp = cmsg;
+ cmsg = CMSG_NXTHDR(msg, lp);
+
+ nlen += (int)((uint64_t)cmsg - (uint64_t)lp);
+ }
+
+ msg->msg_control = ntv_cmsg;
+ msg->msg_controllen = nlen;
+}
+
+static int
+stol_xform_cmsgs(struct lx_msghdr *msg, lx_cmsghdr64_t *lx_cmsg)
+{
+ lx_cmsghdr64_t *lcmsg, *last;
+ struct cmsghdr *cmsg, *lp;
+ int nlen = 0;
+ int err = 0;
+
+ lcmsg = lx_cmsg;
+ cmsg = CMSG_FIRSTHDR(msg);
+ while (cmsg != NULL && err == 0) {
+ lcmsg->cmsg_len =
+ LX_CMSG_LEN(cmsg->cmsg_len - sizeof (struct cmsghdr));
+ lcmsg->cmsg_level = cmsg->cmsg_level;
+ lcmsg->cmsg_type = cmsg->cmsg_type;
+
+ bcopy((void *)CMSG_DATA(cmsg), (void *)LX_CMSG_DATA(lcmsg),
+ cmsg->cmsg_len - sizeof (struct cmsghdr));
+
+ lp = cmsg;
+ cmsg = CMSG_NXTHDR(msg, lp);
+
+ last = lcmsg;
+ lcmsg = LX_CMSG_NXTHDR(msg, last);
+
+ nlen += (int)((uint64_t)lcmsg - (uint64_t)last);
+
+ if (nlen > (msg->msg_controllen + LX_CMSG_EXTRA))
+ err = ENOTSUP;
+ }
+
+ if (err) {
+ lx_unsupported("stol_xform_cmsgs exceeded the allocation "
+ "%d %d\n", nlen, (msg->msg_controllen + LX_CMSG_EXTRA));
+ } else {
+ msg->msg_control = lx_cmsg;
+ msg->msg_controllen = nlen;
+ }
+ return (err);
+}
+#endif
+
+static int
+convert_cmsgs(int direction, struct lx_msghdr *msg, void *new_cmsg,
+ char *caller)
+{
+ struct cmsghdr *cmsg, *last;
+ int err = 0;
+ int level = 0;
+ int type = 0;
+
+#if defined(_LP64)
+ if (direction == LX_TO_SOL) {
+ ltos_xform_cmsgs(msg, (struct cmsghdr *)new_cmsg);
+ }
+#endif
+
+ cmsg = CMSG_FIRSTHDR(msg);
+ while (cmsg != NULL && err == 0) {
+ level = cmsg->cmsg_level;
+ type = cmsg->cmsg_type;
+
+ if (direction == LX_TO_SOL) {
+ if (cmsg->cmsg_level == LX_SOL_SOCKET) {
+ cmsg->cmsg_level = SOL_SOCKET;
+ if (cmsg->cmsg_type == LX_SCM_RIGHTS)
+ cmsg->cmsg_type = SCM_RIGHTS;
+ else if (cmsg->cmsg_type == LX_SCM_CRED)
+ cmsg->cmsg_type = SCM_UCRED;
+ else if (cmsg->cmsg_type == LX_SCM_TIMESTAMP)
+ cmsg->cmsg_type = SCM_TIMESTAMP;
+ else
+ err = ENOTSUP;
+ } else {
+ err = ENOTSUP;
+ }
+ } else {
+ if (cmsg->cmsg_level == SOL_SOCKET) {
+ cmsg->cmsg_level = LX_SOL_SOCKET;
+ if (cmsg->cmsg_type == SCM_RIGHTS)
+ cmsg->cmsg_type = LX_SCM_RIGHTS;
+ else if (cmsg->cmsg_type == SCM_UCRED)
+ cmsg->cmsg_type = LX_SCM_CRED;
+ else if (cmsg->cmsg_type == SCM_TIMESTAMP)
+ cmsg->cmsg_type = LX_SCM_TIMESTAMP;
+ else
+ err = ENOTSUP;
+ } else {
+ err = ENOTSUP;
+ }
+ }
+
+ last = cmsg;
+ cmsg = CMSG_NXTHDR(msg, last);
+ }
+ if (err)
+ lx_unsupported("Unsupported socket control message %d "
+ "(%d) in %s\n.", type, level, caller);
+
+#if defined(_LP64)
+ if (direction == SOL_TO_LX && err == 0) {
+ err = stol_xform_cmsgs(msg, (lx_cmsghdr64_t *)new_cmsg);
+ }
+#endif
+
+ return (err);
+}
+
+/*
+ * We may need a different size socket address vs. the one passed in.
+ */
+static int
+calc_addr_size(struct sockaddr *a, int nlen, lx_addr_type_t *type)
+{
+ struct sockaddr name;
+ size_t fsize = sizeof (name.sa_family);
+
+ if (uucopy(a, &name, sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+ if (name.sa_family != AF_UNIX) {
+ *type = lxa_none;
+ return (nlen);
+ }
+
+ /*
+ * Handle Linux abstract sockets, which are Unix sockets whose path
+ * begins with a NULL character.
+ */
+ if (name.sa_data[0] == '\0') {
+ *type = lxa_abstract;
+ return (nlen + ABST_PRFX_LEN);
+ }
+
+ /*
+ * For /dev/log, we need to create the Unix domain socket away from
+ * the (unwritable) /dev.
+ */
+ if (strncmp(name.sa_data, LX_DEV_LOG, nlen - fsize) == 0) {
+ *type = lxa_devlog;
+ return (nlen + LX_DEV_LOG_REDIRECT_LEN);
+ }
+
+ *type = lxa_none;
+ return (nlen);
+}
+
+/*
+ * If inaddr is an abstract namespace Unix socket, this function expects addr
+ * to have enough memory to hold the expanded socket name, ie it must be of
+ * size *len + ABST_PRFX_LEN. If inaddr is a netlink socket then we expect
+ * addr to have enough memory to hold an Unix socket address.
+ */
+static int
+convert_sockaddr(struct sockaddr *addr, socklen_t *len,
+ struct sockaddr *inaddr, socklen_t inlen, lx_addr_type_t type)
+{
+ sa_family_t family;
+ int lx_in6_len;
+ int size;
+ int i, orig_len;
+
+ /*
+ * Note that if the buffer at inaddr is ever smaller than inlen bytes,
+ * we may erroneously return EFAULT rather than a possible EINVAL
+ * as the copy comes before the various checks as to whether inlen
+ * is of the proper length for the socket type.
+ *
+ * This isn't an issue at present because all callers to this routine
+ * do meet that constraint.
+ */
+ if ((int)inlen < 0)
+ return (-EINVAL);
+ if (uucopy(inaddr, addr, inlen) != 0)
+ return (-errno);
+
+ family = LTOS_FAMILY(addr->sa_family);
+
+ switch (family) {
+ case (sa_family_t)AF_NOTSUPPORTED:
+ return (-EPROTONOSUPPORT);
+ case (sa_family_t)AF_INVAL:
+ return (-EAFNOSUPPORT);
+ case AF_INET:
+ size = sizeof (struct sockaddr);
+
+ if (inlen < size)
+ return (-EINVAL);
+
+ *len = size;
+ break;
+
+ case AF_INET6:
+ /*
+ * The Solaris sockaddr_in6 has one more 32-bit
+ * field than the Linux version.
+ */
+ size = sizeof (struct sockaddr_in6);
+ lx_in6_len = size - sizeof (uint32_t);
+
+ if (inlen != lx_in6_len)
+ return (-EINVAL);
+
+ *len = (sizeof (struct sockaddr_in6));
+ bzero((char *)addr + lx_in6_len, sizeof (uint32_t));
+ break;
+
+ case AF_UNIX:
+ if (inlen > sizeof (struct sockaddr_un))
+ return (-EINVAL);
+
+ *len = inlen;
+
+ /*
+ * In order to support /dev/log -- a Unix domain socket
+ * used for logging that has had its path hard-coded
+ * far and wide -- we need to relocate the socket
+ * into a writable filesystem. This also necessitates
+ * some cleanup in bind(); see lx_bind() for details.
+ */
+ if (type == lxa_devlog) {
+ *len = inlen + LX_DEV_LOG_REDIRECT_LEN;
+ strcpy(addr->sa_data, LX_DEV_LOG_REDIRECT);
+ break;
+ }
+
+ /*
+ * Linux supports abstract Unix sockets, which are
+ * simply sockets that do not exist on the file system.
+ * These sockets are denoted by beginning the path with
+ * a NULL character. To support these, we strip out the
+ * leading NULL character and change the path to point
+ * to a real place in /tmp directory, by prepending
+ * ABST_PRFX and replacing all illegal characters with
+ * '_'.
+ */
+ if (type == lxa_abstract) {
+ /*
+ * inlen is the entire size of the sockaddr_un
+ * data structure, including the sun_family, so
+ * we need to subtract this out. We subtract
+ * 1 since we want to overwrite the leadin NULL
+ * character, and thus do not include it in the
+ * length.
+ */
+ orig_len = inlen - sizeof (addr->sa_family) - 1;
+
+ /*
+ * Since abstract paths can contain illegal
+ * filename characters, we simply replace these
+ * with '_'
+ */
+ for (i = 1; i < orig_len + 1; i++) {
+ if (addr->sa_data[i] == '\0' ||
+ addr->sa_data[i] == '/')
+ addr->sa_data[i] = '_';
+ }
+
+ /*
+ * prepend ABST_PRFX to file name, minus the
+ * leading NULL character. This places the
+ * socket as a hidden file in the /tmp
+ * directory.
+ */
+ (void) memmove(addr->sa_data + ABST_PRFX_LEN,
+ addr->sa_data + 1, orig_len);
+ bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN);
+
+ /*
+ * Since abstract socket paths may not be NULL
+ * terminated, we must explicitly NULL terminate
+ * our string.
+ */
+ addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0';
+
+ /*
+ * Make len reflect the new len of our string.
+ * Although we removed the NULL character at the
+ * beginning of the string, we added a NULL
+ * character to the end, so the net gain in
+ * length is simply ABST_PRFX_LEN.
+ */
+ *len = inlen + ABST_PRFX_LEN;
+ }
+ break;
+
+ default:
+ *len = inlen;
+ }
+
+ addr->sa_family = family;
+ return (0);
+}
+
+static int
+convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom,
+ int *out_type, int *out_options)
+{
+ int domain, type, options;
+
+ if (in_dom < 0 || in_type < 0 || in_protocol < 0)
+ return (-EINVAL);
+
+ domain = LTOS_FAMILY(in_dom);
+ if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC)
+ return (-EAFNOSUPPORT);
+ if (domain == AF_INVAL)
+ return (-EINVAL);
+
+ type = LTOS_SOCKTYPE(in_type & LX_SOCK_TYPE_MASK);
+ if (type == SOCK_NOTSUPPORTED)
+ return (-ESOCKTNOSUPPORT);
+ if (type == SOCK_INVAL)
+ return (-EINVAL);
+
+ /*
+ * Linux does not allow the app to specify IP Protocol for raw
+ * sockets. Solaris does, so bail out here.
+ */
+ if (domain == AF_INET && type == SOCK_RAW && in_protocol == IPPROTO_IP)
+ return (-ESOCKTNOSUPPORT);
+
+ options = 0;
+ if (in_type & LX_SOCK_NONBLOCK)
+ options |= SOCK_NONBLOCK;
+ if (in_type & LX_SOCK_CLOEXEC)
+ options |= SOCK_CLOEXEC;
+
+ *out_dom = domain;
+ *out_type = type;
+ *out_options = options;
+ return (0);
+}
+
+static int
+convert_sockflags(int lx_flags, char *call)
+{
+ int solaris_flags = 0;
+
+ if (lx_flags & LX_MSG_OOB) {
+ solaris_flags |= MSG_OOB;
+ lx_flags &= ~LX_MSG_OOB;
+ }
+
+ if (lx_flags & LX_MSG_PEEK) {
+ solaris_flags |= MSG_PEEK;
+ lx_flags &= ~LX_MSG_PEEK;
+ }
+
+ if (lx_flags & LX_MSG_DONTROUTE) {
+ solaris_flags |= MSG_DONTROUTE;
+ lx_flags &= ~LX_MSG_DONTROUTE;
+ }
+
+ if (lx_flags & LX_MSG_CTRUNC) {
+ solaris_flags |= MSG_CTRUNC;
+ lx_flags &= ~LX_MSG_CTRUNC;
+ }
+
+ if (lx_flags & LX_MSG_PROXY) {
+ lx_unsupported("%s: unsupported socket flag MSG_PROXY", call);
+ lx_flags &= ~LX_MSG_PROXY;
+ }
+
+ if (lx_flags & LX_MSG_TRUNC) {
+ solaris_flags |= MSG_TRUNC;
+ lx_flags &= ~LX_MSG_TRUNC;
+ }
+
+ if (lx_flags & LX_MSG_DONTWAIT) {
+ solaris_flags |= MSG_DONTWAIT;
+ lx_flags &= ~LX_MSG_DONTWAIT;
+ }
+
+ if (lx_flags & LX_MSG_EOR) {
+ solaris_flags |= MSG_EOR;
+ lx_flags &= ~LX_MSG_EOR;
+ }
+
+ if (lx_flags & LX_MSG_WAITALL) {
+ solaris_flags |= MSG_WAITALL;
+ lx_flags &= ~LX_MSG_WAITALL;
+ }
+
+ if (lx_flags & LX_MSG_FIN) {
+ lx_unsupported("%s: unsupported socket flag MSG_FIN", call);
+ lx_flags &= ~LX_MSG_FIN;
+ }
+
+ if (lx_flags & LX_MSG_SYN) {
+ lx_unsupported("%s: unsupported socket flag MSG_SYN", call);
+ lx_flags &= ~LX_MSG_SYN;
+ }
+
+ if (lx_flags & LX_MSG_CONFIRM) {
+ /*
+ * See the Linux arp.7 and sendmsg.2 man pages. We can ignore
+ * this option.
+ */
+ lx_flags &= ~LX_MSG_CONFIRM;
+ }
+
+ if (lx_flags & LX_MSG_RST) {
+ lx_unsupported("%s: unsupported socket flag MSG_RST", call);
+ lx_flags &= ~LX_MSG_RST;
+ }
+
+ if (lx_flags & LX_MSG_ERRQUEUE) {
+ lx_unsupported("%s: unsupported socket flag MSG_ERRQUEUE",
+ call);
+ lx_flags &= ~LX_MSG_ERRQUEUE;
+ }
+
+ if (lx_flags & LX_MSG_NOSIGNAL) {
+ /* MSG_NOSIGNAL handled within each caller */
+ lx_flags &= ~LX_MSG_NOSIGNAL;
+ }
+
+ if (lx_flags & LX_MSG_MORE) {
+ lx_unsupported("%s: unsupported socket flag MSG_MORE", call);
+ lx_flags &= ~LX_MSG_MORE;
+ }
+
+ if (lx_flags & LX_MSG_WAITFORONE) {
+ lx_unsupported("%s: unsupported socket flag MSG_WAITFORONE",
+ call);
+ lx_flags &= ~LX_MSG_WAITFORONE;
+ }
+
+ if (lx_flags & LX_MSG_FASTOPEN) {
+ lx_unsupported("%s: unsupported socket flag MSG_FASTOPEN",
+ call);
+ lx_flags &= ~LX_MSG_FASTOPEN;
+ }
+
+ if (lx_flags & LX_MSG_CMSG_CLOEXEC) {
+ lx_unsupported("%s: unsupported socket flag MSG_CMSG_CLOEXEC",
+ call);
+ lx_flags &= ~LX_MSG_CMSG_CLOEXEC;
+ }
+
+ if (lx_flags != 0)
+ lx_unsupported("%s: unknown socket flag(s) 0x%x", call,
+ lx_flags);
+
+ return (solaris_flags);
+}
+
+long
+lx_socket(int domain, int type, int protocol)
+{
+ int options;
+ int fd;
+ int err;
+
+ err = convert_sock_args(domain, type, protocol,
+ &domain, &type, &options);
+ if (err != 0)
+ return (err);
+
+ lx_debug("\tsocket(%d, %d, %d)", domain, type, protocol);
+
+ fd = socket(domain, type | options, protocol);
+
+ if (fd >= 0)
+ return (fd);
+
+ if (errno == EPROTONOSUPPORT)
+ return (-ESOCKTNOSUPPORT);
+
+ return (-errno);
+}
+
+long
+lx_bind(int sockfd, void *np, int nl)
+{
+ struct stat64 statbuf;
+ struct sockaddr *name;
+ socklen_t len;
+ int r, r2, ret, tmperrno;
+ int nlen;
+ lx_addr_type_t type;
+ struct stat sb;
+
+ if ((nlen = calc_addr_size(np, nl, &type)) < 0)
+ return (nlen);
+
+ if ((name = SAFE_ALLOCA(nlen)) == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(name, &len, np, nl, type)) < 0)
+ return (r);
+
+ /*
+ * There are two types of Unix domain sockets for which we need to
+ * do some special handling with respect to bind: abstract namespace
+ * sockets and /dev/log. Abstract namespace sockets are simply Unix
+ * domain sockets that do not exist on the filesystem; we emulate them
+ * by changing their paths in convert_sockaddr() to point to real
+ * file names in the filesystem. /dev/log is a special Unix domain
+ * socket that is used for system logging. On us, /dev isn't writable,
+ * so we rewrite these sockets in convert_sockaddr() to point to a
+ * writable file (defined by LX_DEV_LOG_REDIRECT). In both cases, we
+ * introduce a new problem with respect to cleanup: abstract namespace
+ * sockets don't need to be cleaned up (when they are closed they are
+ * removed) and /dev/log can't be cleaned up because it's in the
+ * non-writable /dev. We solve these problems by cleaning up here in
+ * lx_bind(): before we create the socket, we check to see if it
+ * exists. If it does, we attempt to connect to it to see if it is in
+ * use, or just left over from a previous lx_bind() call. If we are
+ * unable to connect, we assume it is not in use and remove the file,
+ * then continue on as if the file never existed.
+ */
+ if ((type == lxa_abstract || type == lxa_devlog) &&
+ stat(name->sa_data, &sb) == 0 && S_ISSOCK(sb.st_mode)) {
+ if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-ENOSR);
+ ret = connect(r2, name, len);
+ tmperrno = errno;
+ if (close(r2) < 0)
+ return (-EINVAL);
+
+ /*
+ * if we can't connect to the socket, assume no one is using it
+ * and remove it, otherwise assume it is in use and return
+ * EADDRINUSE.
+ */
+ if ((ret < 0) && (tmperrno == ECONNREFUSED)) {
+ if (unlink(name->sa_data) < 0) {
+ return (-EADDRINUSE);
+ }
+ } else {
+ return (-EADDRINUSE);
+ }
+ }
+
+ lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len);
+
+ if (name->sa_family == AF_UNIX)
+ lx_debug("\t\tAF_UNIX, path = %s", name->sa_data);
+
+ r = bind(sockfd, name, len);
+
+ /*
+ * Linux returns EADDRINUSE for attempts to bind to Unix domain
+ * sockets that aren't sockets.
+ */
+ if ((r < 0) && (errno == EINVAL) && (name->sa_family == AF_UNIX) &&
+ ((stat64(name->sa_data, &statbuf) == 0) &&
+ (!S_ISSOCK(statbuf.st_mode))))
+ return (-EADDRINUSE);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_connect(int sockfd, void *np, int nl)
+{
+ struct sockaddr *name;
+ socklen_t len;
+ int r;
+ int nlen;
+ lx_addr_type_t type;
+
+ if ((nlen = calc_addr_size(np, nl, &type)) < 0)
+ return (nlen);
+
+ if ((name = SAFE_ALLOCA(nlen)) == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(name, &len, np, nl, type)) < 0)
+ return (r);
+
+ lx_debug("\tconnect(%d, 0x%p, %d)", sockfd, name, len);
+
+ if (name->sa_family == AF_UNIX)
+ lx_debug("\t\tAF_UNIX, path = %s", name->sa_data);
+
+ r = connect(sockfd, name, len);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_listen(int sockfd, int backlog)
+{
+ int r;
+
+ lx_debug("\tlisten(%d, %d)", sockfd, backlog);
+ r = listen(sockfd, backlog);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_accept(int sockfd, void *name, int *nlp)
+{
+ socklen_t namelen = 0;
+ int r;
+
+ lx_debug("\taccept(%d, 0x%p, 0x%p", sockfd, (struct sockaddr *)name,
+ nlp);
+
+ /*
+ * The Linux man page says that -1 is returned and errno is set to
+ * EFAULT if the "name" address is bad, but it is silent on what to
+ * set errno to if the "namelen" address is bad. Experimentation
+ * shows that Linux (at least the 2.4.21 kernel in CentOS) actually
+ * sets errno to EINVAL in both cases.
+ *
+ * Note that we must first check the name pointer, as the Linux
+ * docs state nothing is copied out if the "name" pointer is NULL.
+ * If it is NULL, we don't care about the namelen pointer's value
+ * or about dereferencing it.
+ *
+ * Happily, Solaris' accept(3SOCKET) treats NULL name pointers and
+ * zero namelens the same way.
+ */
+ if ((name != NULL) &&
+ (uucopy((void *)nlp, &namelen, sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept namelen = %d", namelen);
+
+ if ((r = accept(sockfd, (struct sockaddr *)name, &namelen)) < 0)
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept namelen returned %d bytes", namelen);
+
+ /*
+ * In Linux, accept()ed sockets do not inherit anything set by
+ * fcntl(), so filter those out.
+ */
+ if (fcntl(r, F_SETFL, 0) < 0)
+ return (-errno);
+
+ /*
+ * Once again, a bad "namelen" address sets errno to EINVAL, not
+ * EFAULT. If namelen was zero, there's no need to copy a zero back
+ * out.
+ *
+ * Logic might dictate that we should check if we can write to
+ * the namelen pointer earlier so we don't accept a pending connection
+ * only to fail the call because we can't write the namelen value back
+ * out. However, testing shows Linux does indeed fail the call after
+ * accepting the connection so we must behave in a compatible manner.
+ */
+ if ((name != NULL) && (namelen != 0) &&
+ (uucopy(&namelen, (void *)nlp, sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ return (r);
+}
+
+long
+lx_getsockname(int sockfd, void *np, int *nlp)
+{
+ struct sockaddr *name = NULL;
+ socklen_t namelen, namelen_orig;
+
+ if (uucopy((void *)nlp, &namelen, sizeof (socklen_t)) != 0)
+ return (-errno);
+ namelen_orig = namelen;
+
+ lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))", sockfd,
+ (struct sockaddr *)np, nlp, namelen);
+
+ if (namelen > 0) {
+ if ((name = SAFE_ALLOCA(namelen)) == NULL)
+ return (-EINVAL);
+ bzero(name, namelen);
+ }
+
+ if (getsockname(sockfd, name, &namelen) < 0)
+ return (-errno);
+
+ /*
+ * If the name that getsockname() wants to return is larger
+ * than namelen, getsockname() will copy out the maximum amount
+ * of data possible and then update namelen to indicate the
+ * actually size of all the data that it wanted to copy out.
+ */
+ if (uucopy(name, np, namelen_orig) != 0)
+ return (-errno);
+ if (uucopy(&namelen, (void *)nlp, sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_getpeername(int sockfd, void *np, int *nlp)
+{
+ struct sockaddr *name;
+ socklen_t namelen;
+
+ if (uucopy((void *)nlp, &namelen, sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))", sockfd,
+ (struct sockaddr *)np, nlp, namelen);
+
+ /* LTP can pass -1 but we'll limit the allocation to a page */
+ if ((uint32_t)namelen > 4096)
+ return (-EINVAL);
+
+ /*
+ * Linux returns EFAULT in this case, even if the namelen parameter
+ * is 0 (some test cases use -1, so we check for that too). This check
+ * will not catch other illegal addresses, but the benefit catching a
+ * non-null illegal address here is not worth the cost of another
+ * system call.
+ */
+ if (np == NULL || np == (void *)-1)
+ return (-EFAULT);
+
+ if ((name = SAFE_ALLOCA(namelen)) == NULL)
+ return (-EINVAL);
+ if ((getpeername(sockfd, name, &namelen)) < 0)
+ return (-errno);
+
+ if (uucopy(name, np, namelen) != 0)
+ return (-errno);
+
+ if (uucopy(&namelen, (void *)nlp, sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_socketpair(int domain, int type, int protocol, int *sv)
+{
+ int options;
+ int fds[2];
+ int r;
+
+ r = convert_sock_args(domain, type, protocol, &domain, &type, &options);
+ if (r != 0)
+ return (r);
+
+ lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv);
+
+ r = socketpair(domain, type | options, protocol, fds);
+
+ if (r == 0) {
+ if (uucopy(fds, sv, sizeof (fds)) != 0) {
+ r = errno;
+ (void) close(fds[0]);
+ (void) close(fds[1]);
+ return (-r);
+ }
+ return (0);
+ }
+
+ if (errno == EPROTONOSUPPORT)
+ return (-ESOCKTNOSUPPORT);
+
+ return (-errno);
+}
+
+long
+lx_sendto(int sockfd, void *buf, size_t len, int flags, void *lto, int tolen)
+{
+ struct sockaddr *to = NULL;
+ ssize_t r;
+ socklen_t tlen = (socklen_t)tolen;
+ int nlen;
+ lx_addr_type_t type;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ if (lto != NULL) {
+ if (tolen < 0)
+ return (-EINVAL);
+
+ if ((nlen = calc_addr_size(lto, tolen, &type)) < 0)
+ return (nlen);
+
+ if ((to = SAFE_ALLOCA(nlen)) == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(to, &tlen, lto, tlen, type)) < 0)
+ return (r);
+ }
+
+
+ lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len,
+ flags, to, tlen);
+
+ flags = convert_sockflags(flags, "sendto");
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("sendto(): could not ignore SIGPIPE to "
+ "emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = sendto(sockfd, buf, len, flags, to, tolen);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("sendto(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ if (r < 0) {
+ /*
+ * according to the man page and LTP, the expected error in
+ * this case is EPIPE.
+ */
+ if (errno == ENOTCONN)
+ return (-EPIPE);
+ else
+ return (-errno);
+ }
+ return (r);
+}
+
+long
+lx_recvfrom(int sockfd, void *buf, size_t len, int flags, void *from,
+ int *from_lenp)
+{
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecvfrom(%d, 0x%p, 0x%d, 0x%x, 0x%p, 0x%p)", sockfd, buf,
+ len, flags, from, from_lenp);
+
+ /* LTP expects EINVAL when from_len == -1 */
+ if (from_lenp != NULL) {
+ int flen;
+
+ if (uucopy(from_lenp, &flen, sizeof (int)) != 0)
+ return (-errno);
+ if (flen == -1)
+ return (-EINVAL);
+ }
+
+ /*
+ * LTP sometimes passes -1 for the flags but expects a different
+ * failure result for something else that is wrong.
+ */
+ if (flags != -1 && flags & LX_MSG_ERRQUEUE)
+ return (-EAGAIN);
+ flags = convert_sockflags(flags, "recvfrom");
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("recvfrom(): could not ignore SIGPIPE "
+ "to emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = recvfrom(sockfd, buf, len, flags, (struct sockaddr *)from,
+ from_lenp);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("recvfrom(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_shutdown(int sockfd, int how)
+{
+ int r;
+
+ lx_debug("\tshutdown(%d, %d)", sockfd, how);
+ r = shutdown(sockfd, how);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static lx_proto_opts_t *
+get_proto_opt_tbl(int level)
+{
+ switch (level) {
+ case LX_IPPROTO_IP: return (&ip_sockopts_tbl);
+ case LX_SOL_SOCKET: return (&socket_sockopts_tbl);
+ case LX_IPPROTO_IGMP: return (&igmp_sockopts_tbl);
+ case LX_IPPROTO_TCP: return (&tcp_sockopts_tbl);
+ case LX_IPPROTO_RAW: return (&raw_sockopts_tbl);
+ default:
+ lx_unsupported("Unsupported sockopt level %d", level);
+ return (NULL);
+ }
+}
+
+long
+lx_setsockopt(int sockfd, int level, int optname, void *optval, int optlen)
+{
+ int internal_opt;
+ uchar_t internal_uchar;
+ int r;
+ lx_proto_opts_t *proto_opts;
+ boolean_t converted = B_FALSE;
+
+ lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname,
+ optval, optlen);
+
+ /*
+ * The kernel returns EFAULT for all invalid addresses except NULL,
+ * for which it returns EINVAL. Linux wants EFAULT for NULL too.
+ */
+ if (optval == NULL)
+ return (-EFAULT);
+
+ if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP)
+ return (-ENOPROTOOPT);
+
+ if ((proto_opts = get_proto_opt_tbl(level)) == NULL)
+ return (-ENOPROTOOPT);
+
+ if (optname <= 0 || optname >= proto_opts->maxentries) {
+ lx_unsupported("Unsupported sockopt %d, proto %d", optname,
+ level);
+ return (-ENOPROTOOPT);
+ }
+
+ if (level == LX_IPPROTO_IP) {
+ /*
+ * Ping sets this option to receive errors on raw sockets.
+ * Currently we just ignore it to make ping happy. From the
+ * Linux ip.7 man page:
+ * For raw sockets, IP_RECVERR enables passing of all
+ * received ICMP errors to the application.
+ */
+ if (optname == LX_IP_RECVERR &&
+ strcmp(lx_cmd_name, "ping") == 0)
+ return (0);
+
+ if (optname == LX_IP_RECVERR &&
+ strcmp(lx_cmd_name, "traceroute") == 0)
+ return (0);
+
+ if (optname == LX_IP_MTU_DISCOVER &&
+ strcmp(lx_cmd_name, "traceroute") == 0) {
+ /*
+ * The native traceroute uses IP_DONTFRAG. Set this
+ * and ignore LX_IP_MTU_DISCOVER for traceroute.
+ */
+ optname = IP_DONTFRAG;
+ converted = B_TRUE;
+ }
+
+ /*
+ * For IP_MULTICAST_TTL and IP_MULTICAST_LOOP, Linux defines
+ * the option value to be an integer while we define it to be
+ * an unsigned character. To prevent the kernel from spitting
+ * back an error on an illegal length, verify that the option
+ * value is less than UCHAR_MAX and then swizzle it.
+ */
+ if (optname == LX_IP_MULTICAST_TTL ||
+ optname == LX_IP_MULTICAST_LOOP) {
+ if (optlen != sizeof (int))
+ return (-EINVAL);
+
+ if (uucopy(optval, &internal_opt, sizeof (int)) != 0)
+ return (-errno);
+
+ if (internal_opt > UCHAR_MAX)
+ return (-EINVAL);
+
+ internal_uchar = (uchar_t)internal_opt;
+ optval = &internal_uchar;
+ optlen = sizeof (uchar_t);
+ }
+ } else if (level == LX_SOL_SOCKET) {
+ /* Linux ignores this option. */
+ if (optname == LX_SO_BSDCOMPAT)
+ return (0);
+
+ level = SOL_SOCKET;
+ } else if (level == LX_IPPROTO_RAW) {
+ /*
+ * Ping sets this option. Currently we just ignore it to make
+ * ping happy.
+ */
+ if (optname == LX_ICMP_FILTER &&
+ strcmp(lx_cmd_name, "ping") == 0)
+ return (0);
+ }
+
+ if (!converted) {
+ int orig_optname = optname;
+
+ /*
+ * Do a table lookup of the Illumos equivalent of the given
+ * option.
+ */
+ optname = proto_opts->proto[optname];
+ if (optname == OPTNOTSUP) {
+ lx_unsupported("unsupported sockopt %d, proto %d",
+ orig_optname, level);
+ return (-ENOPROTOOPT);
+ }
+ }
+
+ r = setsockopt(sockfd, level, optname, optval, optlen);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_getsockopt(int sockfd, int level, int optname, void *optval, int *optlenp)
+{
+ int r;
+ int orig_optname;
+ lx_proto_opts_t *proto_opts;
+
+ lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname,
+ optval, optlenp);
+
+ /*
+ * According to the Linux man page, a NULL optval should indicate
+ * (as in Solaris) that no return value is expected. Instead, it
+ * actually triggers an EFAULT error.
+ */
+ if (optval == NULL)
+ return (-EFAULT);
+
+ if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP)
+ return (-EOPNOTSUPP);
+
+ if ((proto_opts = get_proto_opt_tbl(level)) == NULL)
+ return (-ENOPROTOOPT);
+
+ if (optname <= 0 || optname >= (proto_opts->maxentries)) {
+ lx_unsupported("Unsupported sockopt %d, proto %d", optname,
+ level);
+ return (-ENOPROTOOPT);
+ }
+
+ if ((level == LX_IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
+ /*
+ * We don't support TCP_CORK but some apps rely on it. So,
+ * rather than return an error we just return 0. This
+ * isn't exactly a lie, since this option really isn't set,
+ * but it's not the whole truth either. Fortunately, we
+ * aren't under oath.
+ */
+ r = 0;
+ if (uucopy(&r, optval, sizeof (int)) != 0)
+ return (-errno);
+ r = sizeof (int);
+ if (uucopy(&r, optlenp, sizeof (int)) != 0)
+ return (-errno);
+ return (0);
+ }
+ if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) {
+ struct lx_ucred lx_ucred;
+ ucred_t *ucp;
+
+ /*
+ * We don't support SO_PEERCRED, but we do have equivalent
+ * functionality in getpeerucred() so invoke that here.
+ */
+
+ /* Verify there's going to be enough room for the results. */
+ if (uucopy(optlenp, &r, sizeof (int)) != 0)
+ return (-errno);
+ if (r < sizeof (struct lx_ucred))
+ return (-EOVERFLOW);
+
+ /*
+ * We allocate a ucred_t ourselves rather than allow
+ * getpeerucred() to do it for us because getpeerucred()
+ * uses malloc(3C) and we'd rather use SAFE_ALLOCA().
+ */
+ if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL)
+ return (-ENOMEM);
+
+ /* Get the credential for the remote end of this socket. */
+ if (getpeerucred(sockfd, &ucp) != 0)
+ return (-errno);
+ if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) ||
+ ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) ||
+ ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) {
+ return (-errno);
+ }
+
+ /* Copy out the results. */
+ if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0)
+ return (-errno);
+ r = sizeof (lx_ucred);
+ if ((uucopy(&r, optlenp, sizeof (int))) != 0)
+ return (-errno);
+ return (0);
+ }
+
+ orig_optname = optname;
+
+ optname = proto_opts->proto[optname];
+ if (optname == OPTNOTSUP) {
+ lx_unsupported("unsupported sockopt %d, proto %d",
+ orig_optname, level);
+ return (-ENOPROTOOPT);
+ }
+
+ if (level == LX_SOL_SOCKET)
+ level = SOL_SOCKET;
+
+ r = getsockopt(sockfd, level, optname, optval, optlenp);
+
+ if (r == 0 && level == SOL_SOCKET) {
+ switch (optname) {
+ case SO_TYPE:
+ /* translate our type back to Linux */
+ *(int *)optval = stol_socktype[(*(int *)optval)];
+ break;
+
+ case SO_ERROR:
+ *(int *)optval = lx_errno(*(int *)optval);
+ break;
+ }
+ }
+
+ return ((r < 0) ? -errno : r);
+}
+
+/*
+ * libc routines that issue these system calls. We bypass the libsocket
+ * wrappers since they explicitly turn off the MSG_XPG_2 flag we need for
+ * Linux compatibility.
+ */
+extern int _so_sendmsg();
+extern int _so_recvmsg();
+
+long
+lx_sendmsg(int sockfd, void *lmp, int flags)
+{
+ struct lx_msghdr msg;
+ struct cmsghdr *cmsg;
+ void *new_cmsg = NULL;
+ int r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\tsendmsg(%d, 0x%p, 0x%x)", sockfd, lmp, flags);
+
+ flags = convert_sockflags(flags, "sendmsg");
+
+ if ((uucopy(lmp, &msg, sizeof (msg))) != 0)
+ return (-errno);
+
+ if (msg.msg_name != NULL && msg.msg_namelen < sizeof (struct sockaddr))
+ return (-EINVAL);
+
+ /*
+ * If there are control messages bundled in this message, we need
+ * to convert them from Linux to Solaris.
+ */
+ if (msg.msg_control != NULL) {
+ if (msg.msg_controllen == 0) {
+ cmsg = NULL;
+ } else {
+ cmsg = SAFE_ALLOCA(msg.msg_controllen);
+ if (cmsg == NULL)
+ return (-EINVAL);
+#if defined(_LP64)
+ /*
+ * We don't know in advance how many control msgs
+ * there are, but we do know that the native header is
+ * 4 bytes smaller than the Linux header, so allocating
+ * the same size will over-estimate what we actually
+ * need.
+ */
+ new_cmsg = SAFE_ALLOCA(msg.msg_controllen);
+ if (new_cmsg == NULL)
+ return (-EINVAL);
+#endif
+ }
+ if ((uucopy(msg.msg_control, cmsg,
+ msg.msg_controllen)) != 0)
+ return (-errno);
+ msg.msg_control = cmsg;
+ if ((r = convert_cmsgs(LX_TO_SOL, &msg, new_cmsg,
+ "sendmsg()")) != 0)
+ return (-r);
+ }
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("sendmsg(): could not ignore SIGPIPE to "
+ "emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = _so_sendmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("sendmsg(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ if (r < 0) {
+ /*
+ * according to the man page and LTP, the expected error in
+ * this case is EPIPE.
+ */
+ if (errno == ENOTCONN)
+ return (-EPIPE);
+ else
+ return (-errno);
+ }
+
+ return (r);
+}
+
+long
+lx_recvmsg(int sockfd, void *lmp, int flags)
+{
+ struct lx_msghdr msg;
+ struct cmsghdr *cmsg = NULL;
+ void *new_cmsg = NULL;
+ int r, err;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecvmsg(%d, 0x%p, 0x%x)", sockfd, lmp, flags);
+
+ /*
+ * LTP sometimes passes -1 for the flags but expects a different
+ * failure result for something else that is wrong.
+ */
+ if (flags != -1 && flags & LX_MSG_ERRQUEUE)
+ return (-EAGAIN);
+ flags = convert_sockflags(flags, "recvmsg");
+
+ if ((uucopy(lmp, &msg, sizeof (msg))) != 0)
+ return (-errno);
+
+ /*
+ * If we are expecting to have to convert any control messages,
+ * then we should receive them into our address space instead of
+ * the app's.
+ */
+ if (msg.msg_control != NULL) {
+ cmsg = msg.msg_control;
+ if (msg.msg_controllen == 0) {
+ msg.msg_control = NULL;
+ } else {
+ msg.msg_control = SAFE_ALLOCA(msg.msg_controllen);
+ if (msg.msg_control == NULL)
+ return (-EINVAL);
+#if defined(_LP64)
+ new_cmsg = SAFE_ALLOCA(msg.msg_controllen +
+ LX_CMSG_EXTRA);
+ if (new_cmsg == NULL)
+ return (-EINVAL);
+#endif
+ }
+ }
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("recvmsg(): could not ignore SIGPIPE to "
+ "emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = _so_recvmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("recvmsg(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ if (r >= 0 && msg.msg_controllen >= sizeof (struct cmsghdr)) {
+ /*
+ * If there are control messages bundled in this message,
+ * we need to convert them from Linux to Solaris.
+ */
+ if ((err = convert_cmsgs(SOL_TO_LX, &msg, new_cmsg,
+ "recvmsg()")) != 0)
+ return (-err);
+
+ if ((uucopy(msg.msg_control, cmsg,
+ msg.msg_controllen)) != 0)
+ return (-errno);
+ }
+
+ msg.msg_control = cmsg;
+
+ /*
+ * A handful of the values in the msghdr are set by the recvmsg()
+ * call, so copy their values back to the caller. Rather than iterate,
+ * just copy the whole structure back.
+ */
+ if (uucopy(&msg, lmp, sizeof (msg)) != 0)
+ return (-errno);
+
+ return ((r < 0) ? -errno : r);
+}
+
+/*
+ * Based on the lx_accept code with the addition of the flags handling.
+ * See internal comments in that function for more explanation.
+ */
+long
+lx_accept4(int sockfd, void *name, int *nlp, int lx_flags)
+{
+ socklen_t namelen = 0;
+ int flags = 0;
+ int r;
+
+ lx_debug("\taccept4(%d, 0x%p, 0x%p 0x%x", sockfd, name, nlp, lx_flags);
+
+ if ((name != NULL) &&
+ (uucopy((void *)nlp, &namelen, sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept4 namelen = %d", namelen);
+
+ if (lx_flags & LX_SOCK_NONBLOCK)
+ flags |= SOCK_NONBLOCK;
+
+ if (lx_flags & LX_SOCK_CLOEXEC)
+ flags |= SOCK_CLOEXEC;
+
+ if ((r = accept4(sockfd, (struct sockaddr *)name, &namelen, flags)) < 0)
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept4 namelen returned %d bytes", namelen);
+
+ if ((name != NULL) && (namelen != 0) &&
+ (uucopy(&namelen, (void *)nlp, sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ return (r);
+}
+
+#ifdef __i386
+
+static int
+lx_socket32(ulong_t *args)
+{
+ return (lx_socket((int)args[0], (int)args[1], (int)args[2]));
+}
+
+static int
+lx_bind32(ulong_t *args)
+{
+ return (lx_bind((int)args[0], (struct sockaddr *)args[1],
+ (int)args[2]));
+}
+
+static int
+lx_connect32(ulong_t *args)
+{
+ return (lx_connect((int)args[0], (struct sockaddr *)args[1],
+ (int)args[2]));
+}
+
+static int
+lx_listen32(ulong_t *args)
+{
+ return (lx_listen((int)args[0], (int)args[1]));
+}
+
+static int
+lx_accept32(ulong_t *args)
+{
+ return (lx_accept((int)args[0], (struct sockaddr *)args[1],
+ (int *)args[2]));
+}
+
+static int
+lx_getsockname32(ulong_t *args)
+{
+ return (lx_getsockname((int)args[0], (struct sockaddr *)args[1],
+ (int *)args[2]));
+}
+
+static int
+lx_getpeername32(ulong_t *args)
+{
+ return (lx_getpeername((int)args[0], (struct sockaddr *)args[1],
+ (int *)args[2]));
+}
+
+static int
+lx_socketpair32(ulong_t *args)
+{
+ return (lx_socketpair((int)args[0], (int)args[1], (int)args[2],
+ (int *)args[3]));
+}
+
+static ssize_t
+lx_send(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\tsend(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags);
+
+ flags = convert_sockflags(flags, "send");
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("send(): could not ignore SIGPIPE to "
+ "emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = send(sockfd, buf, len, flags);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("send(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ return ((r < 0) ? -errno : r);
+}
+
+static ssize_t
+lx_recv(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecv(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags);
+
+ /*
+ * LTP sometimes passes -1 for the flags but expects a different
+ * failure result for something else that is wrong.
+ */
+ if (flags != -1 && flags & LX_MSG_ERRQUEUE)
+ return (-EAGAIN);
+ flags = convert_sockflags(flags, "recv");
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal("recv(): could not ignore SIGPIPE to "
+ "emulate LX_MSG_NOSIGNAL");
+ }
+
+ r = recv(sockfd, buf, len, flags);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal("recv(): could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL");
+
+ return ((r < 0) ? -errno : r);
+}
+
+static ssize_t
+lx_sendto32(ulong_t *args)
+{
+ return (lx_sendto((int)args[0], (void *)args[1], (size_t)args[2],
+ (int)args[3], (struct sockaddr *)args[4], (int)args[5]));
+}
+
+static ssize_t
+lx_recvfrom32(ulong_t *args)
+{
+ return (lx_recvfrom((int)args[0], (void *)args[1], (size_t)args[2],
+ (int)args[3], (struct sockaddr *)args[4], (int *)args[5]));
+}
+
+static int
+lx_shutdown32(ulong_t *args)
+{
+ return (lx_shutdown((int)args[0], (int)args[1]));
+}
+
+static int
+lx_setsockopt32(ulong_t *args)
+{
+ return (lx_setsockopt((int)args[0], (int)args[1], (int)args[2],
+ (void *)args[3], (int)args[4]));
+}
+
+static int
+lx_getsockopt32(ulong_t *args)
+{
+ return (lx_getsockopt((int)args[0], (int)args[1], (int)args[2],
+ (void *)args[3], (int *)args[4]));
+}
+
+static int
+lx_sendmsg32(ulong_t *args)
+{
+ return (lx_sendmsg((int)args[0], (void *)args[1], (int)args[2]));
+}
+
+static int
+lx_recvmsg32(ulong_t *args)
+{
+ return (lx_recvmsg((int)args[0], (void *)args[1], (int)args[2]));
+}
+
+static int
+lx_accept4_32(ulong_t *args)
+{
+ return (lx_accept4((int)args[0], (struct sockaddr *)args[1],
+ (int *)args[2], (int)args[3]));
+}
+
+static int
+lx_recvmmsg32(ulong_t *args)
+{
+ lx_unsupported("Unsupported socketcall: recvmmsg\n.");
+ return (-EINVAL);
+}
+
+static int
+lx_sendmmsg32(ulong_t *args)
+{
+ lx_unsupported("Unsupported socketcall: sendmmsg\n.");
+ return (-EINVAL);
+}
+
+long
+lx_socketcall(uintptr_t p1, uintptr_t p2)
+{
+ int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */
+ ulong_t args[6];
+ int r;
+
+ if (subcmd < 0 || subcmd >= LX_SENDMMSG)
+ return (-EINVAL);
+
+ /*
+ * Copy the arguments to the subcommand in from the app's address
+ * space, returning EFAULT if we get a bogus pointer.
+ */
+ if (uucopy((void *)p2, args,
+ sockfns[subcmd].s_nargs * sizeof (ulong_t)))
+ return (-errno);
+
+ r = (sockfns[subcmd].s_fn)(args);
+
+ return (r);
+}
+
+#endif /* __i386 */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/stat.c b/usr/src/lib/brand/lx/lx_brand/common/stat.c
new file mode 100644
index 0000000000..b1c9b18722
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/stat.c
@@ -0,0 +1,551 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * when a stat() is done for a non-device file, the devt returned
+ * via the stat is the devt of the device backing the filesystem which
+ * contains the file the stat was performed on. these devts are currently
+ * untranslated. if this turns out to cause problems in the future then
+ * we might want to add more devt translators to convert sd and cmdk
+ * devts into linux devts that normally represent disks.
+ *
+ * XXX this may not be the best place to have the devt translation code.
+ * devt translation will also be needed for /proc fs support, which will
+ * probably be done in the kernel. we may need to move this code into
+ * the kernel and add a brand syscall to do the translation for us. this
+ * will need to be worked out before putback.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/lx_types.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_audio.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_syscall.h>
+#include <sys/modctl.h>
+
+/* define _KERNEL to get the devt manipulation macros */
+#define _KERNEL
+#include <sys/sysmacros.h>
+#undef _KERNEL
+
+
+#define LX_PTS_MAJOR_MIN 136
+#define LX_PTS_MAJOR_MAX 143
+#define LX_PTS_MAX \
+ ((LX_PTS_MAJOR_MAX - LX_PTS_MAJOR_MIN + 1) * LX_MINORMASK)
+
+#define LX_PTM_MAJOR 5
+#define LX_PTM_MINOR 2
+
+/* values for dt_type */
+#define DTT_INVALID 0
+#define DTT_LIST 1
+#define DTT_CUSTOM 2
+
+/* convience macros for access the dt_minor union */
+#define dt_list dt_minor.dtm_list
+#define dt_custom dt_minor.dtm_custom
+
+/*
+ * structure used to define devt translators
+ */
+typedef struct minor_translator {
+ char *mt_path; /* solaris minor node path */
+ minor_t mt_minor; /* solaris minor node number */
+ int mt_lx_major; /* linux major node number */
+ int mt_lx_minor; /* linux minor node number */
+} minor_translator_t;
+
+typedef struct devt_translator {
+ char *dt_driver; /* solaris driver name */
+ major_t dt_major; /* solaris driver number */
+
+ /* dt_type dictates how we intrepret dt_minor */
+ int dt_type;
+ union {
+ uintptr_t dtm_foo; /* required to compile */
+ minor_translator_t *dtm_list;
+ int (*dtm_custom)(dev_t, lx_dev_t *, int);
+ } dt_minor;
+} devt_translator_t;
+
+
+/*
+ * forward declerations
+ */
+static devt_translator_t devt_translators[];
+
+/*
+ * called to initialize the devt translation subsystem
+ */
+int
+lx_stat_init()
+{
+ minor_translator_t *mt;
+ struct stat st;
+ major_t major;
+ char *driver;
+ int i, j, ret;
+
+ for (i = 0; devt_translators[i].dt_driver != NULL; i++) {
+
+ assert(devt_translators[i].dt_type != DTT_INVALID);
+
+ /* figure out the major numbers for our devt translators */
+ driver = devt_translators[i].dt_driver;
+ ret = modctl(MODGETMAJBIND,
+ driver, strlen(driver) + 1, &major);
+ if (ret != 0) {
+ lx_err("lx_stat_init(): modctl(MODGETMAJBIND, %s) "
+ "failed: %s\n", driver, strerror(errno));
+ lx_err("lx_stat_init(): devt translator disabled "
+ "for: %s\n", driver);
+ devt_translators[i].dt_major = (major_t)-1;
+ continue;
+ }
+
+ /* save the major node value */
+ devt_translators[i].dt_major = major;
+
+ /* if this translator doesn't use a list mapping we're done. */
+ if (devt_translators[i].dt_type != DTT_LIST)
+ continue;
+
+ /* for each device listed, lookup the minor node number */
+ mt = devt_translators[i].dt_list;
+ for (j = 0; mt[j].mt_path != NULL; j++) {
+
+ /* stat the device */
+ ret = stat(mt[j].mt_path, &st);
+ if (ret != 0) {
+ lx_err("lx_stat_init(): stat(%s) failed: %s\n",
+ mt[j].mt_path, strerror(errno));
+ lx_err("lx_stat_init(): devt translator "
+ "disabled for: %s\n", mt[j].mt_path);
+ st.st_rdev = NODEV;
+ } else {
+ /* make sure the major node matches */
+ assert(getmajor(st.st_rdev) == major);
+ assert(mt[j].mt_minor < LX_MINORMASK);
+ }
+
+ /* save the minor node value */
+ mt[j].mt_minor = getminor(st.st_rdev);
+ }
+ }
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+pts_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ minor_t min = getminor(dev);
+ int lx_maj;
+ int lx_min;
+
+ /*
+ * linux has a really small minor number name space (8 bits).
+ * so if pts devices are limited to one major number you could
+ * only have 256 of them. linux addresses this issue by using
+ * multiple major numbers for pts devices.
+ */
+ if (min >= LX_PTS_MAX)
+ return (EOVERFLOW);
+
+ lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MINORMASK);
+ lx_min = min % LX_MINORMASK;
+
+ *jdev = LX_MAKEDEVICE(lx_maj, lx_min);
+ return (0);
+}
+
+
+static int
+/*ARGSUSED*/
+ptm_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ *jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR);
+ return (0);
+}
+
+static int
+audio_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ int s_minor, l_minor;
+
+ if (fd == -1) {
+ s_minor = getminor(dev);
+ } else {
+ /*
+ * this is a cloning device so we have to ask the driver
+ * what kind of minor node this is
+ */
+ if (ioctl(fd, LXA_IOC_GETMINORNUM, &s_minor) < 0)
+ return (-EINVAL);
+ }
+
+ switch (s_minor) {
+ case LXA_MINORNUM_DSP:
+ l_minor = 3;
+ break;
+ case LXA_MINORNUM_MIXER:
+ l_minor = 0;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ *jdev = LX_MAKEDEVICE(14, l_minor);
+ return (0);
+}
+
+static void
+s2l_dev_report(dev_t dev, lx_dev_t jdev)
+{
+ major_t maj;
+ minor_t min;
+ int lx_maj, lx_min;
+
+ if (lx_debug_enabled == 0)
+ return;
+
+ maj = getmajor(dev);
+ min = getminor(dev);
+
+ lx_maj = LX_GETMAJOR(jdev);
+ lx_min = LX_GETMINOR(jdev);
+
+ lx_debug("\ttranslated devt [%d, %d] -> [%d, %d]",
+ maj, min, lx_maj, lx_min);
+}
+
+static int
+s2l_devt(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ minor_translator_t *mt;
+ int i, j, err;
+ major_t maj = getmajor(dev);
+ minor_t min = getminor(dev);
+
+ /* look for a devt translator for this major number */
+ for (i = 0; devt_translators[i].dt_driver != NULL; i++) {
+ if (devt_translators[i].dt_major == maj)
+ break;
+ }
+ if (devt_translators[i].dt_driver != NULL) {
+
+ /* try to translate the solaris devt to a linux devt */
+ switch (devt_translators[i].dt_type) {
+ case DTT_LIST:
+ mt = devt_translators[i].dt_list;
+ for (j = 0; mt[j].mt_path != NULL; j++) {
+ if (mt[j].mt_minor == min) {
+ assert(mt[j].mt_minor < LX_MINORMASK);
+
+ /* found a translation */
+ *jdev = LX_MAKEDEVICE(
+ mt[j].mt_lx_major,
+ mt[j].mt_lx_minor);
+ s2l_dev_report(dev, *jdev);
+ return (0);
+ }
+ }
+ break;
+
+ case DTT_CUSTOM:
+ err = devt_translators[i].dt_custom(dev, jdev, fd);
+ if (err == 0)
+ s2l_dev_report(dev, *jdev);
+ return (err);
+ break;
+ }
+ }
+
+ /* we don't have a translator for this device */
+ *jdev = LX_MAKEDEVICE(maj, min);
+ return (0);
+}
+
+static int
+stat_convert(uintptr_t lx_statp, struct stat *s, int fd)
+{
+ struct lx_stat buf;
+ lx_dev_t st_dev, st_rdev;
+ int err;
+
+ if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0)
+ return (err);
+ if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0)
+ return (err);
+
+ if ((st_dev > USHRT_MAX) || (st_rdev > USHRT_MAX) ||
+ (s->st_nlink > USHRT_MAX) || (s->st_size > LONG_MAX))
+ return (-EOVERFLOW);
+
+ /* Linux seems to report a 0 st_size for all block devices */
+ if ((s->st_mode & S_IFMT) == S_IFBLK)
+ s->st_size = 0;
+
+ bzero(&buf, sizeof (buf));
+ buf.st_dev = st_dev;
+ buf.st_rdev = st_rdev;
+ buf.st_ino = (lx_ino_t)s->st_ino;
+ buf.st_mode = s->st_mode;
+ buf.st_nlink = s->st_nlink;
+ buf.st_uid = LX_UID32_TO_UID16(s->st_uid);
+ buf.st_gid = LX_GID32_TO_GID16(s->st_gid);
+ buf.st_size = (lx_off_t)s->st_size;
+ buf.st_blksize = (lx_blksize_t)s->st_blksize;
+ buf.st_blocks = s->st_blocks;
+ buf.st_atime.ts_sec = s->st_atim.tv_sec;
+ buf.st_atime.ts_nsec = s->st_atim.tv_nsec;
+ buf.st_ctime.ts_sec = s->st_ctim.tv_sec;
+ buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec;
+ buf.st_mtime.ts_sec = s->st_mtim.tv_sec;
+ buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec;
+
+ if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+stat64_convert(uintptr_t lx_statp, struct stat64 *s, int fd)
+{
+ struct lx_stat64 buf;
+ lx_dev_t st_dev, st_rdev;
+ int err;
+
+ if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0)
+ return (err);
+ if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0)
+ return (err);
+
+ /* Linux seems to report a 0 st_size for all block devices */
+ if ((s->st_mode & S_IFMT) == S_IFBLK)
+ s->st_size = 0;
+
+ bzero(&buf, sizeof (buf));
+ buf.st_dev = st_dev;
+ buf.st_rdev = st_rdev;
+#if defined(_ILP32)
+ buf.st_small_ino = (lx_ino_t)(s->st_ino & UINT_MAX);
+#endif
+ buf.st_ino = (lx_ino64_t)s->st_ino;
+ buf.st_mode = s->st_mode;
+ buf.st_nlink = s->st_nlink;
+ buf.st_uid = s->st_uid;
+ buf.st_gid = s->st_gid;
+ buf.st_size = s->st_size;
+ buf.st_blksize = s->st_blksize;
+ buf.st_blocks = s->st_blocks;
+ buf.st_atime.ts_sec = s->st_atim.tv_sec;
+ buf.st_atime.ts_nsec = s->st_atim.tv_nsec;
+ buf.st_ctime.ts_sec = s->st_ctim.tv_sec;
+ buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec;
+ buf.st_mtime.ts_sec = s->st_mtim.tv_sec;
+ buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec;
+
+ if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_stat(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat sbuf;
+
+ lx_debug("\tstat(%s, ...)", path);
+ if (stat(path, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, -1));
+}
+
+
+long
+lx_fstat(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ struct stat sbuf;
+ char *path, path_buf[MAXPATHLEN];
+
+ if (lx_debug_enabled != 0) {
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tfstat(%d - %s, ...)", fd, path);
+ }
+ if (fstat(fd, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, fd));
+}
+
+
+long
+lx_lstat(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat sbuf;
+
+ lx_debug("\tlstat(%s, ...)", path);
+ if (lstat(path, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, -1));
+}
+
+long
+lx_stat64(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat64 sbuf;
+
+ lx_debug("\tstat64(%s, ...)", path);
+ if (stat64(path, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, -1));
+}
+
+
+long
+lx_fstat64(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ struct stat64 sbuf;
+ char *path, path_buf[MAXPATHLEN];
+
+ if (lx_debug_enabled != 0) {
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tfstat64(%d - %s, ...)", fd, path);
+ }
+ if (fstat64(fd, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, fd));
+}
+
+long
+lx_fstatat64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int atfd = (int)p1;
+ const char *path = (const char *)p2;
+ int flag;
+ struct stat64 sbuf;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW, B_TRUE);
+ if (flag < 0)
+ return (-EINVAL);
+
+ if (fstatat64(atfd, path, &sbuf, flag))
+ return (-errno);
+
+ return (stat64_convert(p3, &sbuf, -1));
+}
+
+
+long
+lx_lstat64(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat64 sbuf;
+
+ lx_debug("\tlstat64(%s, ...)", path);
+ if (lstat64(path, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, -1));
+}
+
+/*
+ * devt translator definitions
+ */
+#define MINOR_TRANSLATOR(path, lx_major, lx_minor) \
+ { path, 0, lx_major, lx_minor }
+
+#define MINOR_TRANSLATOR_END \
+ { NULL, 0, 0, 0 }
+
+#define DEVT_TRANSLATOR(drv, flags, i) \
+ { drv, 0, flags, (uintptr_t)i }
+
+/*
+ * translators for devts
+ */
+static minor_translator_t mtranslator_mm[] = {
+ MINOR_TRANSLATOR("/dev/null", 1, 3),
+ MINOR_TRANSLATOR("/dev/zero", 1, 5),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_random[] = {
+ MINOR_TRANSLATOR("/dev/random", 1, 8),
+ MINOR_TRANSLATOR("/dev/urandom", 1, 9),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_sy[] = {
+ MINOR_TRANSLATOR("/dev/tty", 5, 0),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_zcons[] = {
+ MINOR_TRANSLATOR("/dev/console", 5, 1),
+ MINOR_TRANSLATOR_END
+};
+static devt_translator_t devt_translators[] = {
+ DEVT_TRANSLATOR("mm", DTT_LIST, &mtranslator_mm),
+ DEVT_TRANSLATOR("random", DTT_LIST, &mtranslator_random),
+ DEVT_TRANSLATOR("sy", DTT_LIST, &mtranslator_sy),
+ DEVT_TRANSLATOR("zcons", DTT_LIST, &mtranslator_zcons),
+ DEVT_TRANSLATOR(LX_AUDIO_DRV, DTT_CUSTOM, audio_devt_translator),
+ DEVT_TRANSLATOR(LX_PTM_DRV, DTT_CUSTOM, ptm_devt_translator),
+ DEVT_TRANSLATOR("pts", DTT_CUSTOM, pts_devt_translator),
+ DEVT_TRANSLATOR(NULL, 0, 0)
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/statfs.c b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
new file mode 100644
index 0000000000..a18001dca7
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
@@ -0,0 +1,343 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/statvfs.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_statfs.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * these defines must exist before we include regexp.h, see regexp(5)
+ */
+#define RE_SIZE 1024
+#define INIT char *sp = instring;
+#define GETC() (*sp++)
+#define PEEKC() (*sp)
+#define UNGETC(c) (--sp)
+#define RETURN(c) return (NULL);
+#define ERROR(c) return ((char *)c);
+
+/*
+ * for regular expressions we're using regexp(5).
+ *
+ * we'd really prefer to use some other nicer regular expressions
+ * interfaces (like regcmp(3c), regcomp(3c), or re_comp(3c)) but we
+ * can't because all these other interfaces rely on the ability
+ * to allocate memory via libc malloc()/calloc() calls, which
+ * we can't really do here.
+ *
+ * we could optionally use regexpr(3gen) but we don't since the
+ * interfaces there are incredibly similar to the regexp(5)
+ * interfaces we're already using and we'd have the added
+ * requirement of linking against libgen.
+ *
+ * another option that was considered is fnmatch(3c) but the
+ * limited pattern expansion capability of this interface would
+ * force us to include more patterns to check against.
+ */
+#include <regexp.h>
+
+static struct lx_ftype_path {
+ char *lfp_path;
+ char lfp_re[RE_SIZE];
+ int lfp_magic;
+ char *lfp_magic_str;
+} ftype_path_list[] = {
+ { "^/dev/pts$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/[0-9][0-9]*$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { NULL, "",
+ 0, NULL }
+};
+
+/*
+ * For lack of linux equivalents, we present lofs and zfs as being ufs.
+ */
+static struct lx_ftype_name {
+ const char *lfn_name;
+ int lfn_magic;
+ char *lfn_magic_str;
+} ftype_name_list[] = {
+ { "hsfs", LX_ISOFS_SUPER_MAGIC, "LX_ISOFS_SUPER_MAGIC" },
+ { "nfs", LX_NFS_SUPER_MAGIC, "LX_NFS_SUPER_MAGIC" },
+ { "pcfs", LX_MSDOS_SUPER_MAGIC, "LX_MSDOS_SUPER_MAGIC" },
+ { "lx_proc", LX_PROC_SUPER_MAGIC, "LX_PROC_SUPER_MAGIC" },
+ { "tmpfs", LX_TMPFS_SUPER_MAGIC, "LX_TMPFS_SUPER_MAGIC" },
+ { "ufs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { "lofs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { "zfs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { NULL, 0, NULL }
+};
+
+int
+lx_statfs_init()
+{
+ int i;
+ char *rv;
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ rv = compile(
+ ftype_path_list[i].lfp_path,
+ ftype_path_list[i].lfp_re,
+ ftype_path_list[i].lfp_re + RE_SIZE, '\0');
+ if (rv == NULL)
+ continue;
+
+ lx_debug("lx_statfs_init compile(\"%s\") failed",
+ ftype_path_list[i].lfp_path);
+ return (1);
+ }
+ return (0);
+}
+
+static int
+stol_type(const char *path, const char *name)
+{
+ int i;
+ lx_debug("\tstol_type(\"%s\", \"%s\")\n", path == NULL ? "NULL" : path,
+ name == NULL ? "NULL" : name);
+
+ if (path != NULL) {
+ char userpath[MAXPATHLEN];
+
+ if (uucopystr(path, userpath, MAXPATHLEN) == -1)
+ return (-errno);
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ if (step(userpath, ftype_path_list[i].lfp_re) == 0)
+ continue;
+
+ /* got a match on the fs path */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_path_list[i].lfp_magic,
+ ftype_path_list[i].lfp_magic_str);
+ return (ftype_path_list[i].lfp_magic);
+ }
+ }
+
+ assert(name != NULL);
+ for (i = 0; ftype_name_list[i].lfn_name != NULL; i++) {
+ if (strcmp(name, ftype_name_list[i].lfn_name) == 0) {
+
+ /* got a match on the fs name */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_name_list[i].lfn_magic,
+ ftype_name_list[i].lfn_magic_str);
+ return (ftype_name_list[i].lfn_magic);
+ }
+ }
+
+ /* we don't know what the fs type is so just set it to 0 */
+ return (0);
+}
+
+/*
+ * The Linux statfs() is similar to the Solaris statvfs() call, the main
+ * difference being the use of a numeric 'f_type' identifier instead of the
+ * 'f_basetype' string.
+ */
+static int
+stol_statfs(const char *path, struct lx_statfs *l, struct statvfs *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_frsize; /* other fields depend on frsize */
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+static int
+stol_statfs64(const char *path, struct lx_statfs64 *l, struct statvfs64 *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_frsize; /* other fields depend on frsize */
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+long
+lx_statfs(uintptr_t p1, uintptr_t p2)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct statvfs vfs;
+ int err;
+
+ lx_debug("\tstatvfs(%s, 0x%p)", path, fs);
+ if (statvfs(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_fstatfs(uintptr_t p1, uintptr_t p2)
+{
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct stat64 sb;
+ struct statvfs vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs(%d, 0x%p)", fd, fs);
+
+ /*
+ * fstatfs emulation for a pipe.
+ */
+ if (fstat64(fd, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
+ lxfs.f_type = LX_PIPEFS_MAGIC;
+ lxfs.f_bsize = 4096;
+ lxfs.f_blocks = 0;
+ lxfs.f_bfree = 0;
+ lxfs.f_bavail = 0;
+ lxfs.f_files = 0;
+ lxfs.f_ffree = 0;
+ lxfs.f_fsid = 0;
+ lxfs.f_namelen = 255;
+ lxfs.f_frsize = 4096;
+ } else {
+ if (fstatvfs(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+ }
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+long
+lx_statfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct statvfs64 vfs;
+ int err;
+
+ lx_debug("\tstatvfs64(%s, %d, 0x%p)", path, p2, fs);
+ if (statvfs64(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+long
+lx_fstatfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct stat64 sb;
+ struct statvfs64 vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs64(%d, %d, 0x%p)", fd, p2, fs);
+ if (fstat64(fd, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
+ lxfs.f_type = LX_PIPEFS_MAGIC;
+ lxfs.f_bsize = 4096;
+ lxfs.f_blocks = 0;
+ lxfs.f_bfree = 0;
+ lxfs.f_bavail = 0;
+ lxfs.f_files = 0;
+ lxfs.f_ffree = 0;
+ lxfs.f_fsid = 0;
+ lxfs.f_namelen = 255;
+ lxfs.f_frsize = 4096;
+ } else {
+ if (fstatvfs64(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+ }
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
new file mode 100644
index 0000000000..262a9c3830
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
@@ -0,0 +1,137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+
+/*
+ * sysctl() implementation. The full set of possible values is incredibly
+ * large; we only implement the bare minimum here, namely basic kernel
+ * information.
+ *
+ * For the moment, we also print out debugging messages if the application
+ * attempts to write or access any other values, so we can tell if we are not
+ * supporting something we should be.
+ */
+
+struct lx_sysctl_args {
+ int *name;
+ int nlen;
+ void *oldval;
+ size_t *oldlenp;
+ void *newval;
+ size_t newlen;
+};
+
+#define LX_CTL_KERN 1
+
+#define LX_KERN_OSTYPE 1
+#define LX_KERN_OSRELEASE 2
+#define LX_KERN_OSREV 3
+#define LX_KERN_VERSION 4
+
+long
+lx_sysctl(uintptr_t raw)
+{
+ struct lx_sysctl_args args;
+ int name[2];
+ size_t oldlen;
+ char *namebuf;
+
+ if (uucopy((void *)raw, &args, sizeof (args)) < 0)
+ return (-EFAULT);
+
+ /*
+ * We only allow [ CTL_KERN, KERN_* ] pairs, so reject anything that
+ * doesn't have exactly two values starting with LX_CTL_KERN.
+ */
+ if (args.nlen != 2)
+ return (-ENOTDIR);
+
+ if (uucopy(args.name, name, sizeof (name)) < 0)
+ return (-EFAULT);
+
+ if (name[0] != LX_CTL_KERN) {
+ lx_debug("sysctl: read of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-ENOTDIR);
+ }
+
+ /* We don't support writing new sysctl values. */
+ if ((args.newval != NULL) || (args.newlen != 0)) {
+ lx_debug("sysctl: write of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-EPERM);
+ }
+
+ /*
+ * It may seem silly, but passing in a NULL oldval pointer and not
+ * writing any new values is a perfectly legal thing to do and should
+ * succeed.
+ */
+ if (args.oldval == NULL)
+ return (0);
+
+ /*
+ * Likewise, Linux specifies that setting a non-NULL oldval but a
+ * zero *oldlenp should result in an errno of EFAULT.
+ */
+ if ((uucopy(args.oldlenp, &oldlen, sizeof (oldlen)) < 0) ||
+ (oldlen == 0))
+ return (-EFAULT);
+
+ namebuf = SAFE_ALLOCA(oldlen);
+ if (namebuf == NULL)
+ return (-ENOMEM);
+
+ switch (name[1]) {
+ case LX_KERN_OSTYPE:
+ (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen);
+ break;
+ case LX_KERN_OSRELEASE:
+ (void) strlcpy(namebuf, lx_release, oldlen);
+ break;
+ case LX_KERN_VERSION:
+ (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen);
+ break;
+ default:
+ lx_debug("sysctl: read of [CTL_KERN, %d] unsupported", name[1]);
+ return (-ENOTDIR);
+ }
+
+ oldlen = strlen(namebuf);
+
+ if ((uucopy(namebuf, args.oldval, oldlen) < 0) ||
+ (uucopy(&oldlen, args.oldlenp, sizeof (oldlen)) < 0))
+ return (-EFAULT);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
new file mode 100644
index 0000000000..7eb6a6cd12
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
@@ -0,0 +1,947 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <strings.h>
+#include <rctl.h>
+#include <alloca.h>
+#include <values.h>
+#include <sys/syscall.h>
+#include <sys/msg.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_types.h>
+#include <sys/lx_sysv_ipc.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#define SLOT_SEM 0
+#define SLOT_SHM 1
+#define SLOT_MSG 2
+
+static int
+get_rctlval(rctlblk_t *rblk, char *name)
+{
+ rctl_qty_t r;
+
+ if (getrctl(name, NULL, rblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ r = rctlblk_get_value(rblk);
+ if (r > MAXINT)
+ return (-EOVERFLOW);
+ return (r);
+}
+
+/*
+ * Given a slot number and a maximum number of ids to extract from the
+ * kernel, return the msgid in the provided slot.
+ */
+static int
+slot_to_id(int type, int slot)
+{
+ uint_t nids, max;
+ int *idbuf = NULL;
+ int r = 0;
+
+ nids = 0;
+ for (;;) {
+ switch (type) {
+ case SLOT_SEM:
+ r = semids(idbuf, nids, &max);
+ break;
+ case SLOT_SHM:
+ r = shmids(idbuf, nids, &max);
+ break;
+ case SLOT_MSG:
+ r = msgids(idbuf, nids, &max);
+ break;
+ }
+
+ if (r < 0)
+ return (-errno);
+
+ if (max == 0)
+ return (-EINVAL);
+
+ if (max <= nids)
+ return (idbuf[slot]);
+
+ nids = max;
+ if ((idbuf = (int *)SAFE_ALLOCA(sizeof (int) * nids)) == NULL)
+ return (-ENOMEM);
+ }
+}
+
+/*
+ * Semaphore operations.
+ */
+long
+lx_semget(key_t key, int nsems, int semflg)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\nsemget(%d, %d, %d)\n", key, nsems, semflg);
+ sol_flag = semflg & S_IAMB;
+ if (semflg & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (semflg & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = semget(key, nsems, sol_flag);
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semop(int semid, void *p1, size_t nsops)
+{
+ int r;
+ struct sembuf *sops = (struct sembuf *)p1;
+
+ lx_debug("\nsemop(%d, 0x%p, %u)\n", semid, sops, nsops);
+ if (nsops == 0)
+ return (-EINVAL);
+
+ r = semop(semid, sops, nsops);
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semtimedop(int semid, void *p1, size_t nsops, struct timespec *timeout)
+{
+ int r;
+ struct sembuf *sops = (struct sembuf *)p1;
+
+ lx_debug("\nsemtimedop(%d, 0x%p, %u, 0x%p)\n", semid, sops, nsops,
+ timeout);
+ if (nsops == 0)
+ return (-EINVAL);
+
+ r = semtimedop(semid, sops, nsops, timeout);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcset(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+ int r;
+
+ if (uucopy(buf, &semds, sizeof (semds)))
+ return (-errno);
+
+ bzero(&sol_semds, sizeof (sol_semds));
+ sol_semds.sem_perm.uid = semds.sem_perm.uid;
+ sol_semds.sem_perm.gid = semds.sem_perm.gid;
+ sol_semds.sem_perm.mode = semds.sem_perm.mode;
+
+ r = semctl(semid, 0, IPC_SET, &sol_semds);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcstat(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+
+ if (semctl(semid, 0, IPC_STAT, &sol_semds) != 0)
+ return (-errno);
+
+ bzero(&semds, sizeof (semds));
+ semds.sem_perm.key = sol_semds.sem_perm.key;
+ semds.sem_perm.seq = sol_semds.sem_perm.seq;
+ semds.sem_perm.uid = sol_semds.sem_perm.uid;
+ semds.sem_perm.gid = sol_semds.sem_perm.gid;
+ semds.sem_perm.cuid = sol_semds.sem_perm.cuid;
+ semds.sem_perm.cgid = sol_semds.sem_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ semds.sem_perm.mode = sol_semds.sem_perm.mode & S_IAMB;
+ semds.sem_otime = sol_semds.sem_otime;
+ semds.sem_ctime = sol_semds.sem_ctime;
+ semds.sem_nsems = sol_semds.sem_nsems;
+
+ if (uucopy(&semds, buf, sizeof (semds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_semctl_ipcinfo(void *buf)
+{
+ struct lx_seminfo i;
+ rctlblk_t *rblk;
+ int rblksz;
+ uint_t nids;
+ int idbuf;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&i, sizeof (i));
+ if ((i.semmni = get_rctlval(rblk, "project.max-sem-ids")) < 0)
+ return (i.semmni);
+ if ((i.semmsl = get_rctlval(rblk, "process.max-sem-nsems")) < 0)
+ return (i.semmsl);
+ if ((i.semopm = get_rctlval(rblk, "process.max-sem-ops")) < 0)
+ return (i.semopm);
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ i.semmap = i.semmni;
+ i.semmns = i.semmni * i.semmsl;
+ i.semmnu = INT_MAX;
+ i.semume = INT_MAX;
+ i.semvmx = LX_SEMVMX;
+ if (semids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ i.semusz = nids;
+ i.semaem = INT_MAX;
+
+ if (uucopy(&i, buf, sizeof (i)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_semctl_semstat(int slot, void *buf)
+{
+ int r, semid;
+
+ semid = slot_to_id(SLOT_SEM, slot);
+ if (semid < 0)
+ return (semid);
+
+ r = lx_semctl_ipcstat(semid, buf);
+ return (r < 0 ? r : semid);
+}
+
+/*
+ * For the SETALL operation, we have to examine each of the semaphore
+ * values to be sure it is legal.
+ */
+static int
+lx_semctl_setall(int semid, ushort_t *arg)
+{
+ struct semid_ds semds;
+ ushort_t *vals;
+ int i, sz, r;
+
+ /*
+ * Find out how many semaphores are involved, reserve enough
+ * memory for an internal copy of the array, and then copy it in
+ * from the process.
+ */
+ if (semctl(semid, 0, IPC_STAT, &semds) != 0)
+ return (-errno);
+ sz = semds.sem_nsems * sizeof (ushort_t);
+ if ((vals = SAFE_ALLOCA(sz)) == NULL)
+ return (-ENOMEM);
+ if (uucopy(arg, vals, sz))
+ return (-errno);
+
+ /* Validate each of the values. */
+ for (i = 0; i < semds.sem_nsems; i++)
+ if (vals[i] > LX_SEMVMX)
+ return (-ERANGE);
+
+ r = semctl(semid, 0, SETALL, arg);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semctl(int semid, int semnum, int cmd, void *ptr)
+{
+#if defined(_ILP32)
+ union lx_semun arg;
+#endif
+ int rval;
+ int opt = cmd & ~LX_IPC_64;
+ int use_errno = 0;
+ uint_t val;
+
+ lx_debug("\nsemctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, ptr);
+
+#if defined(_ILP32)
+ /*
+ * The final arg to semctl() is a pointer to a union. For some
+ * commands we can hand that pointer directly to the kernel. For
+ * these commands, we need to extract an argument from the union
+ * before calling into the kernel.
+ */
+ if (opt == LX_SETVAL || opt == LX_SETALL || opt == LX_GETALL ||
+ opt == LX_IPC_SET || opt == LX_IPC_STAT || opt == LX_SEM_STAT ||
+ opt == LX_IPC_INFO || opt == LX_SEM_INFO)
+ if (uucopy(ptr, &arg, sizeof (arg)))
+ return (-errno);
+#endif
+
+ switch (opt) {
+ case LX_GETVAL:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETVAL, NULL);
+ break;
+ case LX_SETVAL:
+#if defined(_ILP32)
+ val = arg.val;
+#else
+ val = (uint_t)(uintptr_t)ptr;
+#endif
+ if (val > LX_SEMVMX) {
+ return (-ERANGE);
+ }
+ use_errno = 1;
+ rval = semctl(semid, semnum, SETVAL, val);
+ break;
+ case LX_GETPID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETPID, NULL);
+ break;
+ case LX_GETNCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETNCNT, NULL);
+ break;
+ case LX_GETZCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETZCNT, NULL);
+ break;
+ case LX_GETALL:
+ use_errno = 1;
+#if defined(_ILP32)
+ rval = semctl(semid, semnum, GETALL, arg.sems);
+#else
+ rval = semctl(semid, semnum, GETALL, ptr);
+#endif
+ break;
+ case LX_SETALL:
+#if defined(_ILP32)
+ rval = lx_semctl_setall(semid, arg.sems);
+#else
+ rval = lx_semctl_setall(semid, ptr);
+#endif
+ break;
+ case LX_IPC_RMID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, IPC_RMID, NULL);
+ break;
+ case LX_SEM_STAT:
+#if defined(_ILP32)
+ rval = lx_semctl_semstat(semid, arg.semds);
+#else
+ rval = lx_semctl_semstat(semid, ptr);
+#endif
+ break;
+ case LX_IPC_STAT:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcstat(semid, arg.semds);
+#else
+ rval = lx_semctl_ipcstat(semid, ptr);
+#endif
+ break;
+
+ case LX_IPC_SET:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcset(semid, arg.semds);
+#else
+ rval = lx_semctl_ipcset(semid, ptr);
+#endif
+ break;
+
+ case LX_IPC_INFO:
+ case LX_SEM_INFO:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcinfo(arg.semds);
+#else
+ rval = lx_semctl_ipcinfo(ptr);
+#endif
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ if (use_errno == 1 && rval < 0)
+ return (-errno);
+ return (rval);
+}
+
+/*
+ * msg operations.
+ */
+long
+lx_msgget(key_t key, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_msgget(%d, %d)\n", key, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = msgget(key, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_msgsnd(int id, void *p1, size_t sz, int flag)
+{
+ int sol_flag = 0;
+ int r;
+ struct msgbuf *buf = (struct msgbuf *)p1;
+
+ lx_debug("\tlx_msgsnd(%d, 0x%p, %d, %d)\n", id, buf, sz, flag);
+
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ if (((ssize_t)sz < 0) || (sz > LX_MSGMAX))
+ return (-EINVAL);
+
+ r = msgsnd(id, buf, sz, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_msgrcv(int id, void *msgp, size_t sz, long msgtype, int flag)
+{
+ int sol_flag = 0;
+ ssize_t r;
+
+ lx_debug("\tlx_msgrcv(%d, 0x%p, %d, %d, %ld, %d)\n",
+ id, msgp, sz, msgtype, flag);
+
+ /*
+ * Check for a negative sz parameter.
+ *
+ * Unlike msgsnd(2), the Linux man page does not specify that
+ * msgrcv(2) should return EINVAL if (sz > MSGMAX), only if (sz < 0).
+ */
+ if ((ssize_t)sz < 0)
+ return (-EINVAL);
+
+ if (flag & LX_MSG_NOERROR)
+ sol_flag |= MSG_NOERROR;
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ r = msgrcv(id, msgp, sz, msgtype, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_ipcstat(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ r = msgctl(msgid, IPC_STAT, &sol_msgids);
+ if (r < 0)
+ return (-errno);
+
+ bzero(&msgids, sizeof (msgids));
+ msgids.msg_perm.key = sol_msgids.msg_perm.key;
+ msgids.msg_perm.seq = sol_msgids.msg_perm.seq;
+ msgids.msg_perm.uid = sol_msgids.msg_perm.uid;
+ msgids.msg_perm.gid = sol_msgids.msg_perm.gid;
+ msgids.msg_perm.cuid = sol_msgids.msg_perm.cuid;
+ msgids.msg_perm.cgid = sol_msgids.msg_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ msgids.msg_perm.mode = sol_msgids.msg_perm.mode & S_IAMB;
+
+ msgids.msg_stime = sol_msgids.msg_stime;
+ msgids.msg_rtime = sol_msgids.msg_rtime;
+ msgids.msg_ctime = sol_msgids.msg_ctime;
+ msgids.msg_qbytes = sol_msgids.msg_qbytes;
+ msgids.msg_cbytes = sol_msgids.msg_cbytes;
+ msgids.msg_qnum = sol_msgids.msg_qnum;
+ msgids.msg_lspid = sol_msgids.msg_lspid;
+ msgids.msg_lrpid = sol_msgids.msg_lrpid;
+
+ if (uucopy(&msgids, buf, sizeof (msgids)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_msgctl_ipcinfo(int cmd, void *buf)
+{
+ struct lx_msginfo m;
+ rctlblk_t *rblk;
+ int idbuf, rblksz, msgseg, maxmsgs;
+ uint_t nids;
+ int rval;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&m, sizeof (m));
+ if ((m.msgmni = get_rctlval(rblk, "project.max-msg-ids")) < 0)
+ return (m.msgmni);
+ if ((m.msgmnb = get_rctlval(rblk, "process.max-msg-qbytes")) < 0)
+ return (m.msgmnb);
+
+ if (cmd == LX_IPC_INFO) {
+ if ((maxmsgs = get_rctlval(rblk,
+ "process.max-msg-messages")) < 0)
+ return (maxmsgs);
+ m.msgtql = maxmsgs * m.msgmni;
+ m.msgmap = m.msgmnb;
+ m.msgpool = m.msgmax * m.msgmnb;
+ rval = 0;
+ } else {
+ if (msgids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ m.msgpool = nids;
+
+ /*
+ * For these fields, we can't even come up with a good fake
+ * approximation. These are listed as 'obsolete' or
+ * 'unused' in the header files, so hopefully nobody is
+ * relying on them anyway.
+ */
+ m.msgtql = INT_MAX;
+ m.msgmap = INT_MAX;
+ rval = nids;
+ }
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ m.msgmax = m.msgmnb;
+ m.msgssz = 16;
+ msgseg = (m.msgpool * 1024) / m.msgssz;
+ m.msgseg = (msgseg > 0xffff) ? 0xffff : msgseg;
+
+ if (uucopy(&m, buf, sizeof (m)))
+ return (-errno);
+ return (rval);
+}
+
+static int
+lx_msgctl_ipcset(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ if (uucopy(buf, &msgids, sizeof (msgids)))
+ return (-errno);
+
+ bzero(&sol_msgids, sizeof (sol_msgids));
+ sol_msgids.msg_perm.uid = LX_UID16_TO_UID32(msgids.msg_perm.uid);
+ sol_msgids.msg_perm.gid = LX_UID16_TO_UID32(msgids.msg_perm.gid);
+
+ /* Linux only uses the bottom 9 bits */
+ sol_msgids.msg_perm.mode = msgids.msg_perm.mode & S_IAMB;
+ sol_msgids.msg_qbytes = msgids.msg_qbytes;
+
+ r = msgctl(msgid, IPC_SET, &sol_msgids);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_msgstat(int slot, void *buf)
+{
+ int r, msgid;
+
+ lx_debug("msgstat(%d, 0x%p)\n", slot, buf);
+
+ msgid = slot_to_id(SLOT_MSG, slot);
+
+ if (msgid < 0)
+ return (msgid);
+
+ r = lx_msgctl_ipcstat(msgid, buf);
+ return (r < 0 ? r : msgid);
+}
+
+/*
+ * Split off the various msgctl's here
+ */
+long
+lx_msgctl(int msgid, int cmd, void *buf)
+{
+ int r;
+
+ lx_debug("\tlx_msgctl(%d, %d, 0x%p)\n", msgid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ r = msgctl(msgid, IPC_RMID, NULL);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_IPC_SET:
+ r = lx_msgctl_ipcset(msgid, buf);
+ break;
+ case LX_IPC_STAT:
+ r = lx_msgctl_ipcstat(msgid, buf);
+ break;
+ case LX_MSG_STAT:
+ r = lx_msgctl_msgstat(msgid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ case LX_MSG_INFO:
+ r = lx_msgctl_ipcinfo(cmd, buf);
+ break;
+
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return (r);
+}
+
+/*
+ * shm-related operations.
+ */
+long
+lx_shmget(key_t key, size_t size, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_shmget(%d, %d, %d)\n", key, size, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = shmget(key, size, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_shmat(int shmid, void *addr, int flags)
+{
+ int sol_flags;
+ void *ptr;
+
+ lx_debug("\tlx_shmat(%d, 0x%p, %d)\n", shmid, addr, flags);
+
+ sol_flags = 0;
+ if (flags & LX_SHM_RDONLY)
+ sol_flags |= SHM_RDONLY;
+ if (flags & LX_SHM_RND)
+ sol_flags |= SHM_RND;
+ if ((flags & LX_SHM_REMAP) && (addr == NULL))
+ return (-EINVAL);
+
+ ptr = shmat(shmid, addr, sol_flags);
+ if (ptr == (void *)-1)
+ return (-errno);
+
+ return ((ssize_t)ptr);
+}
+
+static int
+lx_shmctl_ipcinfo(void *buf)
+{
+ struct lx_shminfo s;
+ rctlblk_t *rblk;
+ int rblksz;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&s, sizeof (s));
+ if ((s.shmmni = get_rctlval(rblk, "project.max-shm-ids")) < 0)
+ return (s.shmmni);
+ if ((s.shmmax = get_rctlval(rblk, "project.max-shm-memory")) < 0)
+ return (s.shmmax);
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ s.shmmin = 1;
+ s.shmseg = INT_MAX;
+ s.shmall = s.shmmax / getpagesize();
+
+ if (uucopy(&s, buf, sizeof (s)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcstat(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+
+ if (shmctl(shmid, IPC_STAT, &sol_shmds) != 0)
+ return (-errno);
+
+ bzero(&shmds, sizeof (shmds));
+ shmds.shm_perm.key = sol_shmds.shm_perm.key;
+ shmds.shm_perm.seq = sol_shmds.shm_perm.seq;
+ shmds.shm_perm.uid = sol_shmds.shm_perm.uid;
+ shmds.shm_perm.gid = sol_shmds.shm_perm.gid;
+ shmds.shm_perm.cuid = sol_shmds.shm_perm.cuid;
+ shmds.shm_perm.cgid = sol_shmds.shm_perm.cgid;
+ shmds.shm_perm.mode = sol_shmds.shm_perm.mode & S_IAMB;
+ if (sol_shmds.shm_lkcnt > 0)
+ shmds.shm_perm.mode |= LX_SHM_LOCKED;
+ shmds.shm_segsz = sol_shmds.shm_segsz;
+ shmds.shm_atime = sol_shmds.shm_atime;
+ shmds.shm_dtime = sol_shmds.shm_dtime;
+ shmds.shm_ctime = sol_shmds.shm_ctime;
+ shmds.shm_cpid = sol_shmds.shm_cpid;
+ shmds.shm_lpid = sol_shmds.shm_lpid;
+ shmds.shm_nattch = (ushort_t)sol_shmds.shm_nattch;
+
+ if (uucopy(&shmds, buf, sizeof (shmds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcset(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+ int r;
+
+ if (uucopy(buf, &shmds, sizeof (shmds)))
+ return (-errno);
+
+ bzero(&sol_shmds, sizeof (sol_shmds));
+ sol_shmds.shm_perm.uid = shmds.shm_perm.uid;
+ sol_shmds.shm_perm.gid = shmds.shm_perm.gid;
+ sol_shmds.shm_perm.mode = shmds.shm_perm.mode & S_IAMB;
+
+ r = shmctl(shmid, IPC_SET, &sol_shmds);
+ return (r < 0 ? -errno : r);
+}
+
+/*
+ * Build and return a shm_info structure. We only return the bare
+ * essentials required by ipcs. The rest of the info is not readily
+ * available.
+ */
+static int
+lx_shmctl_shminfo(void *buf)
+{
+ struct lx_shm_info shminfo;
+ uint_t nids;
+ int idbuf;
+
+ bzero(&shminfo, sizeof (shminfo));
+
+ if (shmids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+
+ shminfo.used_ids = nids;
+ if (uucopy(&shminfo, buf, sizeof (shminfo)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_shmctl_shmstat(int slot, void *buf)
+{
+ int r, shmid;
+
+ lx_debug("shmctl_shmstat(%d, 0x%p)\n", slot, buf);
+ shmid = slot_to_id(SLOT_SHM, slot);
+ if (shmid < 0)
+ return (shmid);
+
+ r = lx_shmctl_ipcstat(shmid, buf);
+ return (r < 0 ? r : shmid);
+}
+
+long
+lx_shmctl(int shmid, int cmd, void *buf)
+{
+ int r;
+ int use_errno = 0;
+
+ lx_debug("\tlx_shmctl(%d, %d, 0x%p)\n", shmid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ use_errno = 1;
+ r = shmctl(shmid, IPC_RMID, NULL);
+ break;
+
+ case LX_IPC_SET:
+ r = lx_shmctl_ipcset(shmid, buf);
+ break;
+
+ case LX_IPC_STAT:
+ r = lx_shmctl_ipcstat(shmid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ r = lx_shmctl_ipcinfo(buf);
+ break;
+
+ case LX_SHM_LOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_LOCK, NULL);
+ break;
+
+ case LX_SHM_UNLOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_UNLOCK, NULL);
+ break;
+
+ case LX_SHM_INFO:
+ r = lx_shmctl_shminfo(buf);
+ break;
+
+ case LX_SHM_STAT:
+ r = lx_shmctl_shmstat(shmid, buf);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ if (use_errno == 1 && r < 0)
+ return (-errno);
+
+ return (r);
+}
+
+/*
+ * Under 32-bit Linux, glibc funnels all of the sysv IPC operations into this
+ * single ipc(2) system call. We need to blow that up and filter the
+ * remnants into the proper Solaris system calls.
+ */
+long
+lx_ipc(uintptr_t cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+ uintptr_t arg4)
+{
+ int r;
+ void *bufptr = (void *)arg4;
+
+ lx_debug("lx_ipc(%d, %d, %d, %d, 0x%p, %d)\n",
+ cmd, arg1, arg2, arg3, bufptr, arg4);
+
+ switch (cmd) {
+ case LX_MSGGET:
+ r = lx_msgget((key_t)arg1, (int)arg2);
+ break;
+ case LX_MSGSND:
+ r = lx_msgsnd((int)arg1, bufptr, (size_t)arg2, (int)arg3);
+ break;
+ case LX_MSGRCV:
+ {
+ struct {
+ void *msgp;
+ long msgtype;
+ } args;
+
+ /*
+ * Rather than passing 5 args into ipc(2) directly,
+ * glibc passes 4 args and uses the buf argument to
+ * point to a structure containing two args: a pointer
+ * to the message and the message type.
+ */
+ if (uucopy(bufptr, &args, sizeof (args)))
+ return (-errno);
+ r = lx_msgrcv((int)arg1, args.msgp, (size_t)arg2,
+ args.msgtype, (int)arg3);
+ }
+ break;
+ case LX_MSGCTL:
+ r = lx_msgctl((int)arg1, (int)arg2, bufptr);
+ break;
+ case LX_SEMCTL:
+ r = lx_semctl((int)arg1, (size_t)arg2, (int)arg3, bufptr);
+ break;
+ case LX_SEMOP:
+ /*
+ * 'struct sembuf' is the same on Linux and Solaris, so we
+ * pass bufptr straight through.
+ */
+ r = lx_semop((int)arg1, bufptr, (size_t)arg2);
+ break;
+ case LX_SEMGET:
+ r = lx_semget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMAT:
+ r = lx_shmat((int)arg1, bufptr, (size_t)arg2);
+ if (r >= 0 || r <= -4096) {
+ if (uucopy(&r, (void *)arg3, sizeof (r)) != 0)
+ r = -errno;
+ }
+ break;
+ case LX_SHMDT:
+ r = shmdt(bufptr);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_SHMGET:
+ r = lx_shmget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMCTL:
+ r = lx_shmctl((int)arg1, (int)arg2, bufptr);
+ break;
+
+ default:
+ r = -EINVAL;
+ }
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/time.c b/usr/src/lib/brand/lx/lx_brand/common/time.c
new file mode 100644
index 0000000000..cae5d9e834
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/time.c
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/times.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+
+/*
+ * time() - This cannot be passthrough because on Linux a bad buffer will
+ * set errno to EFAULT, and on Illumos the failure mode is documented
+ * as "undefined."
+ *
+ * (At present, Illumos' time(2) will segmentation fault, as the call
+ * is simply a libc wrapper atop the time() syscall that will
+ * dereference the passed pointer if it is non-zero.)
+ */
+long
+lx_time(uintptr_t p1)
+{
+ time_t ret = time((time_t *)0);
+
+ if ((ret == (time_t)-1) ||
+ ((p1 != 0) && (uucopy(&ret, (time_t *)p1, sizeof (ret)) != 0)))
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * times() - The Linux implementation avoids writing to NULL, while Illumos
+ * returns EFAULT.
+ */
+long
+lx_times(uintptr_t p1)
+{
+ clock_t ret;
+ struct tms buf, *tp = (struct tms *)p1;
+
+ ret = times(&buf);
+
+ if ((ret == -1) ||
+ ((tp != NULL) && uucopy((void *)&buf, tp, sizeof (buf)) != 0))
+ return (-errno);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+/*
+ * setitimer() - the Linux implementation can handle tv_usec values greater
+ * than 1,000,000 where Illumos would return EINVAL.
+ *
+ * There's still an issue here where Linux can handle a
+ * tv_sec value greater than 100,000,000 but Illumos cannot,
+ * but that would also mean setting an interval timer to fire
+ * over _three years_ in the future so it's unlikely anything
+ * other than Linux test suites will trip over it.
+ */
+long
+lx_setitimer(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct itimerval itv;
+ struct itimerval *itp = (struct itimerval *)p2;
+
+ if (itp != NULL) {
+ if (uucopy(itp, &itv, sizeof (itv)) != 0)
+ return (-errno);
+
+ /*
+ * Adjust any tv_usec fields >= 1,000,000 by adding any whole
+ * seconds so indicated to tv_sec and leaving tv_usec as the
+ * remainder.
+ */
+ if (itv.it_interval.tv_usec >= MICROSEC) {
+ itv.it_interval.tv_sec +=
+ itv.it_interval.tv_usec / MICROSEC;
+
+ itv.it_interval.tv_usec %= MICROSEC;
+ }
+ if (itv.it_value.tv_usec >= MICROSEC) {
+ itv.it_value.tv_sec +=
+ itv.it_value.tv_usec / MICROSEC;
+
+ itv.it_value.tv_usec %= MICROSEC;
+ }
+
+ itp = &itv;
+ }
+
+ return ((setitimer((int)p1, itp, (struct itimerval *)p3) != 0) ?
+ -errno : 0);
+}
+
+/*
+ * NOTE: The Linux man pages state this structure is obsolete and is
+ * unsupported, so it is declared here for sizing purposes only.
+ */
+struct lx_timezone {
+ int tz_minuteswest; /* minutes W of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+/*
+ * lx_gettimeofday() and lx_settimeofday() are implemented here rather than
+ * as pass-through calls to Solaris' libc due to the need to return EFAULT
+ * for a bad buffer rather than die with a segmentation fault.
+ */
+long
+lx_gettimeofday(uintptr_t p1, uintptr_t p2)
+{
+ struct timeval tv;
+ struct lx_timezone tz;
+
+ bzero(&tz, sizeof (tz));
+ (void) gettimeofday(&tv, NULL);
+
+ if ((p1 != NULL) &&
+ (uucopy(&tv, (struct timeval *)p1, sizeof (tv)) < 0))
+ return (-errno);
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but gettimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if ((p2 != NULL) &&
+ (uucopy(&tz, (struct lx_timezone *)p2, sizeof (tz)) < 0))
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_settimeofday(uintptr_t p1, uintptr_t p2)
+{
+ struct timeval tv;
+ struct lx_timezone tz;
+
+ if ((p1 != NULL) &&
+ (uucopy((struct timeval *)p1, &tv, sizeof (tv)) < 0))
+ return (-errno);
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but settimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if ((p2 != NULL) &&
+ (uucopy((struct lx_timezone *)p2, &tz, sizeof (tz)) < 0))
+ return (-errno);
+
+ if ((p1 != NULL) && (settimeofday(&tv, NULL) < 0))
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/truncate.c b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
new file mode 100644
index 0000000000..ba3e452408
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * ZFS does not enforce the process.max-file-size rctl on a file which is
+ * grown in size via truncate/ftruncate, since that is simply metadata which
+ * does not consume any additional space. However, LTP truncate03 depends on
+ * this behavior so we enforce it here.
+ */
+static boolean_t
+p_fsize_excd(const char *path, off_t length)
+{
+ struct stat64 sb;
+ struct rlimit rl;
+
+ if (stat64(path, &sb) == 0 && sb.st_size < length) {
+ /* We are growing the file, check the rlimit */
+ if (getrlimit(RLIMIT_FSIZE, &rl) == 0 && length > rl.rlim_cur)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static boolean_t
+f_fsize_excd(int fd, off_t length)
+{
+ struct stat64 sb;
+ struct rlimit rl;
+
+ if (fstat64(fd, &sb) == 0 && sb.st_size < length) {
+ /* We are growing the file, check the rlimit */
+ if (getrlimit(RLIMIT_FSIZE, &rl) == 0 && length > rl.rlim_cur)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * On Illumos, truncate() and ftruncate() are implemented in libc, so these are
+ * layered on those interfaces.
+ */
+
+long
+lx_truncate(uintptr_t path, uintptr_t length)
+{
+#if defined(_ILP32)
+ if ((off_t)length >= 0xffffffffUL)
+ return (-EFBIG);
+#endif
+
+ if (length > 0 && p_fsize_excd((const char *)path, (off_t)length))
+ return (-EFBIG);
+
+ return (truncate((const char *)path, (off_t)length) == 0 ? 0 : -errno);
+}
+
+long
+lx_ftruncate(uintptr_t fd, uintptr_t length)
+{
+ int r;
+
+#if defined(_ILP32)
+ if ((off_t)length >= 0xffffffffUL)
+ return (-EFBIG);
+#endif
+
+ if (length > 0 && f_fsize_excd((int)fd, (off_t)length))
+ return (-EFBIG);
+
+ r = ftruncate((int)fd, (off_t)length);
+ /*
+ * On Linux, truncating a file opened read-only returns EINVAL whereas
+ * Illumos returns EBADF.
+ */
+ if (r != 0) {
+ if (errno == EBADF) {
+ int mode;
+
+ if ((mode = fcntl(fd, F_GETFL, 0)) != -1 &&
+ (mode & O_ACCMODE) == O_RDONLY)
+ r = -EINVAL;
+ else
+ r = -EBADF; /* keep existing errno */
+ } else {
+ r = -errno;
+ }
+ }
+ return (r);
+}
+
+long
+lx_truncate64(uintptr_t path, uintptr_t length_lo, uintptr_t length_hi)
+{
+ uint64_t len = LX_32TO64(length_lo, length_hi);
+
+ if (len >= 0x7fffffffffffffffULL)
+ return (-EFBIG);
+
+ if (len > 0 && p_fsize_excd((const char *)path, (off_t)len))
+ return (-EFBIG);
+
+ return (truncate64((const char *)path, len) == 0 ? 0 : -errno);
+}
+
+long
+lx_ftruncate64(uintptr_t fd, uintptr_t length_lo, uintptr_t length_hi)
+{
+ int r;
+ uint64_t len = LX_32TO64(length_lo, length_hi);
+
+ if (len >= 0x7fffffffffffffffULL)
+ return (-EFBIG);
+
+ if (len > 0 && f_fsize_excd((int)fd, (off_t)len))
+ return (-EFBIG);
+
+ r = ftruncate64((int)fd, len);
+ /*
+ * On Linux, truncating a file opened read-only returns EINVAL whereas
+ * Illumos returns EBADF.
+ */
+ if (r != 0) {
+ if (errno == EBADF) {
+ int mode;
+
+ if ((mode = fcntl(fd, F_GETFL, 0)) != -1 &&
+ (mode & O_ACCMODE) == O_RDONLY)
+ r = -EINVAL;
+ else
+ r = -EBADF; /* keep existing errno */
+ } else {
+ r = -errno;
+ }
+ }
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c
new file mode 100644
index 0000000000..031eb5e5cd
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c
@@ -0,0 +1,329 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * wait() family of functions.
+ *
+ * The first minor difference between the Linux and Solaris family of wait()
+ * calls is that the values for WNOHANG and WUNTRACED are different. Thankfully,
+ * the exit status values are identical between the two implementations.
+ *
+ * Things get very different and very complicated when we introduce the Linux
+ * threading model. Under linux, both threads and child processes are
+ * represented as processes. However, the behavior of wait() with respect to
+ * each child varies according to the flags given to clone()
+ *
+ * SIGCHLD The SIGCHLD signal should be sent on termination
+ * CLONE_THREAD The child shares the same thread group as the parent
+ * CLONE_DETACHED The parent receives no notification when the child exits
+ *
+ * The following flags control the Linux behavior w.r.t. the above attributes:
+ *
+ * __WALL Wait on all children, regardless of type
+ * __WCLONE Wait only on non-SIGCHLD children
+ * __WNOTHREAD Don't wait on children of other threads in this group
+ *
+ * The following chart shows whether wait() returns when the child exits:
+ *
+ * default __WCLONE __WALL
+ * no SIGCHLD - X X
+ * SIGCHLD X - X
+ *
+ * The following chart shows whether wait() returns when the grandchild exits:
+ *
+ * default __WNOTHREAD
+ * no CLONE_THREAD - -
+ * CLONE_THREAD X -
+ *
+ * The CLONE_DETACHED flag is universal - when the child exits, no state is
+ * stored and wait() has no effect.
+ *
+ * XXX Support the above combination of options, or some reasonable subset that
+ * covers at least fork() and pthread_create().
+ */
+
+#include <errno.h>
+#include <sys/wait.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/syscall.h>
+#include <sys/times.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <lx_syscall.h>
+
+/*
+ * Convert between Linux options and Solaris options, returning -1 if any
+ * invalid flags are found.
+ */
+#define LX_WNOHANG 0x00000001
+#define LX_WUNTRACED 0x00000002
+#define LX_WSTOPPED LX_WUNTRACED
+#define LX_WEXITED 0x00000004
+#define LX_WCONTINUED 0x00000008
+#define LX_WNOWAIT 0x01000000
+
+#define LX_WNOTHREAD 0x20000000
+#define LX_WALL 0x40000000
+#define LX_WCLONE 0x80000000
+
+#define LX_P_ALL 0x0
+#define LX_P_PID 0x1
+#define LX_P_GID 0x2
+
+extern long max_pid;
+
+static int
+ltos_options(uintptr_t options)
+{
+ int newoptions = 0;
+ int rval;
+ lx_waitid_args_t extra;
+
+ if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED |
+ LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL |
+ LX_WCLONE)) != 0) {
+ return (-1);
+ }
+ /*
+ * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD,
+ * LX_WALL, and LX_WCLONE that have been set as options on this waitid
+ * call. These flags are stored as part of the lwp_brand_data, so that
+ * when there is a later syscall to waitid, the brand code there can
+ * detect that we added extra flags here and use them as appropriate.
+ * We pass them in here rather than the normal channel for flags to
+ * prevent polluting the namespace.
+ */
+ extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE);
+ rval = syscall(SYS_brand, B_STORE_ARGS, &extra,
+ sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL);
+ if (rval < 0)
+ return (rval);
+
+ if (options & LX_WNOHANG)
+ newoptions |= WNOHANG;
+ if (options & LX_WUNTRACED)
+ newoptions |= WUNTRACED;
+ if (options & LX_WEXITED)
+ newoptions |= WEXITED;
+ if (options & LX_WCONTINUED)
+ newoptions |= WCONTINUED;
+ if (options & LX_WNOWAIT)
+ newoptions |= WNOWAIT;
+
+ /* The trapped option is implicit on Linux */
+ newoptions |= WTRAPPED;
+
+ return (newoptions);
+}
+
+static int
+lx_wstat(int code, int status)
+{
+ int stat = 0;
+
+ switch (code) {
+ case CLD_EXITED:
+ stat = status << 8;
+ break;
+ case CLD_DUMPED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ stat |= WCOREFLG;
+ break;
+ case CLD_KILLED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ break;
+ case CLD_TRAPPED:
+ case CLD_STOPPED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ stat <<= 8;
+ stat |= WSTOPFLG;
+ break;
+ case CLD_CONTINUED:
+ stat = WCONTFLG;
+ break;
+ }
+
+ return (stat);
+}
+
+/* wrapper to make solaris waitid work properly with ptrace */
+static int
+lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options)
+{
+ do {
+ /*
+ * It's possible that we return EINVAL here if the idtype is
+ * P_PID or P_PGID and id is out of bounds for a valid pid or
+ * pgid, but Linux expects to see ECHILD. No good way occurs to
+ * handle this so we'll punt for now.
+ */
+ if (waitid(idtype, id, info, options) < 0)
+ return (-errno);
+
+ /*
+ * If the WNOHANG flag was specified and no child was found
+ * return 0.
+ */
+ if ((options & WNOHANG) && info->si_pid == 0)
+ return (0);
+
+ /*
+ * It's possible that we may have a spurious return for one of
+ * the child processes created by the ptrace subsystem. If
+ * that's the case, we simply try again.
+ */
+ } while (lx_ptrace_wait(info) == -1);
+ return (0);
+}
+
+long
+lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ siginfo_t info = { 0 };
+ struct rusage ru = { 0 };
+ idtype_t idtype;
+ id_t id;
+ int options, status = 0;
+ pid_t pid = (pid_t)p1;
+ int rval;
+
+ if ((options = ltos_options(p3)) == -1)
+ return (-EINVAL);
+
+ if (pid > max_pid)
+ return (-ECHILD);
+
+ /*
+ * While not listed as a valid return code, Linux's wait4(2) does,
+ * in fact, get an EFAULT if either the status pointer or rusage
+ * pointer is invalid. Since a failed waitpid should leave child
+ * process in a state where a future wait4(2) will succeed, we
+ * check them by copying out the values their buffers originally
+ * contained. (We need to do this as a failed system call should
+ * never affect the contents of a passed buffer.)
+ *
+ * This will fail if the buffers in question are write-only.
+ */
+ if ((void *)p2 != NULL &&
+ ((uucopy((void *)p2, &status, sizeof (status)) != 0) ||
+ (uucopy(&status, (void *)p2, sizeof (status)) != 0)))
+ return (-EFAULT);
+
+ if ((void *)p4 != NULL) {
+ if ((uucopy((void *)p4, &ru, sizeof (ru)) != 0) ||
+ (uucopy(&ru, (void *)p4, sizeof (ru)) != 0))
+ return (-EFAULT);
+ }
+
+ if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ id = getpgrp();
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ options |= WEXITED | WTRAPPED;
+
+ if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0)
+ return (rval);
+ /*
+ * If the WNOHANG flag was specified and no child was found return 0.
+ */
+ if ((options & WNOHANG) && info.si_pid == 0)
+ return (0);
+
+ status = lx_wstat(info.si_code, info.si_status);
+
+ /*
+ * Unfortunately if this attempt to copy out either the status or the
+ * rusage fails, the process will be in an inconsistent state as
+ * subsequent calls to wait for the same child will fail where they
+ * should succeed on a Linux system. This, however, is rather
+ * unlikely since we tested the validity of both above.
+ */
+ if (p2 != NULL && uucopy(&status, (void *)p2, sizeof (status)) != 0)
+ return (-EFAULT);
+
+ if (p4 != NULL && (rval = lx_getrusage(LX_RUSAGE_CHILDREN, p4)) != 0)
+ return (rval);
+
+ return (info.si_pid);
+}
+
+long
+lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lx_wait4(p1, p2, p3, NULL));
+}
+
+long
+lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
+{
+ int rval, options;
+ siginfo_t s_info = {0};
+ if ((options = ltos_options(opt)) == -1)
+ return (-EINVAL);
+
+ if (((opt) & (LX_WEXITED | LX_WSTOPPED | LX_WCONTINUED)) == 0)
+ return (-EINVAL);
+
+ switch (idtype) {
+ case LX_P_ALL:
+ idtype = P_ALL;
+ break;
+ case LX_P_PID:
+ idtype = P_PID;
+ break;
+ case LX_P_GID:
+ idtype = P_PGID;
+ break;
+ default:
+ return (-EINVAL);
+ }
+ if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_info, options)) < 0)
+ return (rval);
+
+ /* If the WNOHANG flag was specified and no child was found return 0. */
+ if ((options & WNOHANG) && s_info.si_pid == 0)
+ return (0);
+
+ return (stol_siginfo(&s_info, (lx_siginfo_t *)infop));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/xattr.c b/usr/src/lib/brand/lx/lx_brand/common/xattr.c
new file mode 100644
index 0000000000..39d6d0361b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/xattr.c
@@ -0,0 +1,47 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * *xattr() family of functions.
+ *
+ * These are currently unimplemented. We return EOPNOTSUPP for now, rather
+ * than using NOSYS_NO_EQUIV to avoid unwanted stderr output from ls(1).
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+
+long
+lx_xattr2(uintptr_t p1, uintptr_t p2)
+{
+
+ return (-EOPNOTSUPP);
+}
+
+long
+lx_xattr3(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+
+ return (-EOPNOTSUPP);
+}
+
+long
+lx_xattr4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+
+ return (-EOPNOTSUPP);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/Makefile b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
new file mode 100644
index 0000000000..e9546dee4a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
@@ -0,0 +1,57 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+# lib/brand/lx/i386/Makefile
+
+ISASRCDIR=.
+
+ASFLAGS += -P -D_ASM
+
+include ../Makefile.com
+
+DYNFLAGS += -Wl,-I/native/lib/ld.so.1
+
+POFILE= lx_brand.po
+MSGFILES= $(CSRCS)
+
+ASSYMDEP_OBJS = lx_handler.o
+
+$(ASSYMDEP_OBJS:%=pics/%): assym.h
+
+OFFSETS = ../$(MACH)/offsets.in
+
+assym.h: $(OFFSETS)
+ $(OFFSETS_CREATE) $(CTF_FLAGS) < $(OFFSETS) > $@
+
+CLOBBERFILES += assym.h
+
+install: all $(ROOTLIBS)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
new file mode 100644
index 0000000000..c457c1c209
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+void
+_start(void)
+{
+}
+
+#else /* lint */
+
+ /*
+ * C language startup routine for the lx brand shared library.
+ */
+ ENTRY_NP(_start)
+ pushl $0 / Build a stack frame. retpc = NULL
+ pushl $0 / fp = NULL
+ movl %esp, %ebp / first stack frame
+
+ /*
+ * Calculate the location of the envp array by adding the size of
+ * the argv array to the start of the argv array.
+ */
+ movl 8(%ebp), %eax / argc in %eax
+ leal 16(%ebp,%eax,4), %edx / envp in %edx
+ andl $-16, %esp
+ pushl %edx / push envp
+ leal 12(%ebp),%edx / compute &argv[0]
+ pushl %edx / push argv
+ pushl %eax / push argc
+ call lx_init
+ /*
+ * lx_init will never return.
+ */
+ SET_SIZE(_start)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
new file mode 100644
index 0000000000..2b382c9f76
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
@@ -0,0 +1,384 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/segments.h>
+#include <sys/syscall.h>
+#include <sys/lx_brand.h>
+
+#if defined(_ASM)
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#endif /* _ASM */
+
+#include "assym.h"
+
+/* 32-bit syscall numbers */
+#define LX_SYS_sigreturn 119
+#define LX_SYS_rt_sigreturn 173
+
+#define PIC_SETUP(r) \
+ call 9f; \
+9: popl r; \
+ addl $_GLOBAL_OFFSET_TABLE_ + [. - 9b], r
+
+/*
+ * Each JMP must occupy 16 bytes
+ */
+#define JMP \
+ pushl $_CONST(. - lx_handler_table); \
+ jmp lx_handler; \
+ .align 16;
+
+#define JMP4 JMP; JMP; JMP; JMP
+#define JMP16 JMP4; JMP4; JMP4; JMP4
+#define JMP64 JMP16; JMP16; JMP16; JMP16
+#define JMP256 JMP64; JMP64; JMP64; JMP64
+
+/*
+ * Alternate jump table that turns on lx_traceflag before proceeding with
+ * the normal emulation routine.
+ */
+#define TJMP \
+ pushl $_CONST(. - lx_handler_trace_table); \
+ jmp lx_handler_trace; \
+ .align 16;
+
+#define TJMP4 TJMP; TJMP; TJMP; TJMP
+#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4
+#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16
+#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64
+
+
+#if defined(lint)
+
+#include <sys/types.h>
+#include <sys/regset.h>
+#include <sys/signal.h>
+
+void
+lx_handler_table(void)
+{}
+
+void
+lx_handler(void)
+{}
+
+/* ARGSUSED */
+void
+lx_setup_clone(uintptr_t gs, void *retaddr, void *stk)
+{}
+
+/* ARGSUSED */
+void
+lx_sigdeliver(int sig, siginfo_t *sip, void *p, size_t stacksz,
+ void (*stack_frame_builder)(void), void (*lx_sighandler)(void),
+ uintptr_t gs)
+{}
+
+/* ARGSUSED */
+void
+lx_sigacthandler(int sig, siginfo_t *s, void *p)
+{}
+
+void
+lx_sigreturn_tramp(void)
+{}
+
+void
+lx_rt_sigreturn_tramp(void)
+{}
+
+/* ARGSUSED */
+void
+lx_sigreturn_tolibc(uintptr_t sp)
+{}
+
+#else /* lint */
+
+ /*
+ * On entry to this table, %eax will hold the return address. The
+ * location where we enter the table is a function of the system
+ * call number. The table needs the same alignment as the individual
+ * entries.
+ */
+ .align 16
+ ENTRY_NP(lx_handler_trace_table)
+ TJMP256
+ TJMP64
+ TJMP64
+ SET_SIZE(lx_handler_trace_table)
+
+ .align 16
+ ENTRY_NP(lx_handler_table)
+ JMP256
+ JMP64
+ JMP64
+ SET_SIZE(lx_handler_table)
+
+ ENTRY_NP(lx_handler_trace)
+ pushl %esi
+ PIC_SETUP(%esi)
+ movl lx_traceflag@GOT(%esi), %esi
+ movl $1, (%esi)
+ popl %esi
+ /*
+ * While we could just fall through to lx_handler(), we "tail-call" it
+ * instead to make ourselves a little more comprehensible to trace
+ * tools.
+ */
+ jmp lx_handler
+ SET_SIZE(lx_handler_trace)
+
+ ALTENTRY(lx_handler)
+ /*
+ * %ebp isn't always going to be a frame pointer on Linux, but when
+ * it is, saving it here lets us have a coherent stack backtrace.
+ */
+ pushl %ebp
+
+ /*
+ * Fill in a lx_regs_t structure on the stack.
+ */
+ subl $SIZEOF_LX_REGS_T, %esp
+
+ /*
+ * Save %ebp and then fill it with what would be its usual value as
+ * the frame pointer. The value we save for %esp needs to be the
+ * stack pointer at the time of the interrupt so we need to skip the
+ * saved %ebp and (what will be) the return address.
+ */
+ movl %ebp, LXR_EBP(%esp)
+ movl %esp, %ebp
+ addl $_CONST(SIZEOF_LX_REGS_T), %ebp
+ movl %ebp, LXR_ESP(%esp)
+ addl $_CONST(_MUL(CPTRSIZE, 2)), LXR_ESP(%esp)
+
+ movl $0, LXR_GS(%esp)
+ movw %gs, LXR_GS(%esp)
+ movl %edi, LXR_EDI(%esp)
+ movl %esi, LXR_ESI(%esp)
+ movl %ebx, LXR_EBX(%esp)
+ movl %edx, LXR_EDX(%esp)
+ movl %ecx, LXR_ECX(%esp)
+ movl %eax, LXR_EIP(%esp)
+
+ /*
+ * The kernel drops us into the middle of one of the tables above
+ * that then pushes that table offset onto the stack, and calls into
+ * lx_handler. That offset indicates the system call number while
+ * %eax holds the return address for the system call. We replace the
+ * value on the stack with the return address, and use the value to
+ * compute the system call number by dividing by the table entry size.
+ */
+ xchgl CPTRSIZE(%ebp), %eax
+ shrl $4, %eax
+ movl %eax, LXR_EAX(%esp)
+
+ /*
+ * Switch to the Solaris libc's %gs.
+ */
+ movl $LWPGS_SEL, %ebx
+ movw %bx, %gs
+
+ /*
+ * Call lx_emulate() whose only argument is a pointer to the
+ * lx_regs_t structure we've placed on the stack.
+ */
+ pushl %esp
+ call lx_emulate
+
+ /*
+ * We use this global symbol to identify this return site when
+ * walking the stack backtrace. It needs to remain immediately
+ * after the call to lx_emulate().
+ */
+ ALTENTRY(lx_emulate_done)
+
+ /*
+ * Clean up the argument to lx_emulate().
+ */
+ addl $4, %esp
+
+ /*
+ * Restore the saved register state; we get %ebp, %esp and %esp from
+ * the ordinary locations rather than the saved state.
+ */
+ movl LXR_EDI(%esp), %edi
+ movl LXR_ESI(%esp), %esi
+ movl LXR_EBX(%esp), %ebx
+ movl LXR_EDX(%esp), %edx
+ movl LXR_ECX(%esp), %ecx
+ movl LXR_EAX(%esp), %eax
+ movw LXR_GS(%esp), %gs
+
+ movl %ebp, %esp
+ popl %ebp
+ ret
+ SET_SIZE(lx_handler)
+
+ ENTRY_NP(lx_swap_gs)
+ push %eax /* save the current eax value */
+ movl 0xc(%esp),%eax /* 2nd param is a pointer */
+ movw %gs,(%eax) /* use the pointer to save current gs */
+ movl 0x8(%esp),%eax /* first parameter is the new gs value */
+ movw %ax, %gs /* switch to the new gs value */
+ pop %eax /* restore eax */
+ ret
+ SET_SIZE(lx_swap_gs)
+
+ ENTRY_NP(lx_setup_clone)
+ xorl %ebp, %ebp /* terminating stack */
+ popl %edx /* eat the clone_start() return address */
+ popl %gs /* Switch back to the Linux libc's %gs */
+ popl %edx /* Linux clone() return address */
+ popl %esp /* New stack pointer */
+ xorl %eax, %eax /* child returns 0 to SYS_clone() */
+ jmp *%edx /* return to Linux app. */
+ SET_SIZE(lx_setup_clone)
+
+ /*
+ * lx_sigdeliver(sig, siginfo_t *, ucontext_t *, stack_size,
+ * stack_build_routine, signal_handler, glibc_gs)
+ *
+ * This routine allocates stack space for the Linux signal stack,
+ * calls a routine to build the signal stack and then calls the Linux
+ * signal handler. This is written in assembly because of the way
+ * we need to directly manipulate the stack and pass the resulting
+ * stack to the signal handler with the Linux signal stack on top.
+ *
+ * When the Linux signal handler is called, the stack will look
+ * like this:
+ *
+ * =================================================
+ * | | %ebp |
+ * | =================================================
+ * | | LX_SIGRT_MAGIC |
+ * | =================================================
+ * V | Linux signal frame built by lx_stackbuilder() |
+ * =================================================
+ *
+ * The stack frame (%ebp) will be reset to its original value (i.e. the
+ * previous frame) on entry to the Linux signal handler.
+ */
+ ENTRY_NP(lx_sigdeliver)
+ pushl %ebp
+ movl %esp, %ebp
+ movl 16(%ebp), %edx /* pointer to Solaris ucontext_t */
+ pushl %edx /* save ucontext_t ptr for later */
+ pushl $LX_SIGRT_MAGIC /* marker value for lx_(rt)_sigreturn */
+
+ subl 20(%ebp), %esp /* create stack_size stack buffer */
+ pushl %esp /* push stack pointer */
+ pushl %edx /* push pointer to ucontext_t */
+ pushl 12(%ebp) /* push pointer to siginfo_t */
+ pushl 8(%ebp) /* push signal number */
+ call *24(%ebp) /* lx_stackbuilder(sig, sip, ucp, sp) */
+ add $16, %esp /* remove args from stack */
+ movw 32(%ebp), %gs /* only low 16 bits are used */
+
+ mov 4(%ebp),%eax /* fetch old %ebp from stack */
+ mov 28(%ebp), %edx /* get address of Linux handler */
+ mov %eax, %ebp /* restore old %ebp */
+ jmp *%edx /* jmp to the Linux signal handler */
+ SET_SIZE(lx_sigdeliver)
+
+ /*
+ * Due to the nature of signals, we need to be able to force the %gs
+ * value to that used by Solaris by running any Solaris code.
+ *
+ * This routine does that, then calls a C routine that will save the
+ * %gs value at the time of the signal off into a thread-specific data
+ * structure. Finally, we trampoline to the libc code that would
+ * normally interpose itself before calling a signal handler.
+ *
+ * The libc routine that calls user signal handlers ends with a
+ * setcontext, so we would never return here even if we used a call
+ * rather than a jmp.
+ *
+ * %esi is used for the PIC as it is guaranteed by the 386 ABI to
+ * survive the call to lx_sigsavegs. The downside is we must also
+ * preserve its value for our caller.
+ *
+ * Note that because lx_sigsavegs and libc_sigacthandler are externs,
+ * they need to be dereferenced via the GOT.
+ *
+ * IMPORTANT: Because libc apparently gets upset if extra data is
+ * left on its stack, this routine needs to be crafted
+ * in assembly so that the jmp to the libc interposer
+ * doesn't leave any cruft lying around.
+ */
+ ENTRY_NP(lx_sigacthandler)
+ pushl %esi /* save %esi */
+ pushl %gs /* push the Linux %gs */
+ pushl $LWPGS_SEL
+ popl %gs /* install the Solaris %gs */
+
+ PIC_SETUP(%esi)
+ movl lx_sigsavegs@GOT(%esi), %eax
+ call *%eax /* save the Linux %gs */
+ movl libc_sigacthandler@GOT(%esi), %eax
+ add $4, %esp /* clear Linux %gs from stack */
+ popl %esi /* restore %esi */
+ jmp *(%eax) /* jmp to libc's interposer */
+ SET_SIZE(lx_sigacthandler)
+
+ /*
+ * Trampoline code is called by the return at the end of a Linux
+ * signal handler to return control to the interrupted application
+ * via the lx_sigreturn() or lx_rt_sigreturn() syscalls.
+ *
+ * (lx_sigreturn() is called for legacy signal handling, and
+ * lx_rt_sigreturn() is called for "new"-style signals.)
+ *
+ * These two routines must consist of the EXACT code sequences below
+ * as gdb looks at the sequence of instructions a routine will return
+ * to determine whether it is in a signal handler or not.
+ * See the Linux code setup_signal_stack_sc() in arch/x86/um/signal.c.
+ */
+ ENTRY_NP(lx_sigreturn_tramp)
+ popl %eax
+ movl $LX_SYS_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_sigreturn_tramp)
+
+ ENTRY_NP(lx_rt_sigreturn_tramp)
+ movl $LX_SYS_rt_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_rt_sigreturn_tramp)
+
+ /*
+ * Manipulate the stack in the way necessary for it to appear to libc
+ * that the signal handler it invoked via call_user_handler() is
+ * returning.
+ */
+ ENTRY_NP(lx_sigreturn_tolibc)
+ movl 4(%esp), %esp /* set %esp to passed value */
+ popl %ebp /* restore proper %ebp */
+ ret /* return to lx_call_user_handler */
+ SET_SIZE(lx_sigreturn_tolibc)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s
new file mode 100644
index 0000000000..a90bc5621b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+/*ARGSUSED*/
+void
+lx_runexe(void *argv, void *entry)
+{
+}
+
+#else /* lint */
+
+ /*
+ * Set our stack pointer, clear the general registers,
+ * and jump to the brand linker's entry point.
+ */
+ ENTRY_NP(lx_runexe)
+ movl 4(%esp), %eax / %eax = &argv[0]
+ movl 8(%esp), %ebx / Brand linker's entry point in %ebx
+ subl $4, %eax / Top of stack - must point at argc
+ movl %eax, %esp / Set %esp to what linkers expect
+
+ movl $0, %eax
+ movl $0, %ecx
+ movl $0, %edx
+ movl $0, %esi
+ movl $0, %edi
+ movl $0, %ebp
+
+ jmp *%ebx / And away we go...
+ SET_SIZE(lx_runexe)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/offsets.in b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in
new file mode 100644
index 0000000000..ac934ee76c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in
@@ -0,0 +1,40 @@
+\
+\ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+\ Use is subject to license terms.
+\
+\ CDDL HEADER START
+\
+\ The contents of this file are subject to the terms of the
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
+\
+\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+\ or http://www.opensolaris.org/os/licensing.
+\ See the License for the specific language governing permissions
+\ and limitations under the License.
+\
+\ When distributing Covered Code, include this CDDL HEADER in each
+\ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+\ If applicable, add the following below this CDDL HEADER, with the
+\ fields enclosed by brackets "[]" replaced with your own identifying
+\ information: Portions Copyright [yyyy] [name of copyright owner]
+\
+\ CDDL HEADER END
+\
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/lx_brand.h>
+
+lx_regs_t SIZEOF_LX_REGS_T
+ lxr_gs
+ lxr_edi
+ lxr_esi
+ lxr_ebp
+ lxr_esp
+ lxr_ebx
+ lxr_edx
+ lxr_ecx
+ lxr_eax
+ lxr_eip
+ lxr_orig_eax
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
new file mode 100644
index 0000000000..80fb579665
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_DEBUG_H
+#define _LX_DEBUG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* initialize the debugging subsystem */
+extern void lx_debug_init(void);
+
+/* printf() style debug message functionality */
+extern void lx_debug(const char *, ...);
+
+/* set non-zero if the debugging subsystem is enabled */
+extern int lx_debug_enabled;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_DEBUG_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h
new file mode 100644
index 0000000000..06e05cbf5c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_FCNTL_H
+#define _SYS_LX_FCNTL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Lx open/fcntl flags
+ */
+#define LX_O_RDONLY 00
+#define LX_O_WRONLY 01
+#define LX_O_RDWR 02
+#define LX_O_CREAT 0100
+#define LX_O_EXCL 0200
+#define LX_O_NOCTTY 0400
+#define LX_O_TRUNC 01000
+#define LX_O_APPEND 02000
+#define LX_O_NONBLOCK 04000
+#define LX_O_NDELAY LX_O_NONBLOCK
+#define LX_O_SYNC 010000
+#define LX_O_FSYNC LX_O_SYNC
+#define LX_O_ASYNC 020000
+#define LX_O_DIRECT 040000
+#define LX_O_LARGEFILE 0100000
+#define LX_O_DIRECTORY 0200000
+#define LX_O_NOFOLLOW 0400000
+/* lx flag for pipe2 */
+#define LX_O_CLOEXEC 02000000
+
+#define LX_F_DUPFD 0
+#define LX_F_GETFD 1
+#define LX_F_SETFD 2
+#define LX_F_GETFL 3
+#define LX_F_SETFL 4
+#define LX_F_GETLK 5
+#define LX_F_SETLK 6
+#define LX_F_SETLKW 7
+#define LX_F_SETOWN 8
+#define LX_F_GETOWN 9
+#define LX_F_SETSIG 10
+#define LX_F_GETSIG 11
+
+#define LX_F_GETLK64 12
+#define LX_F_SETLK64 13
+#define LX_F_SETLKW64 14
+
+#define LX_F_SETLEASE 1024
+#define LX_F_GETLEASE 1025
+#define LX_F_NOTIFY 1026
+#define LX_F_DUPFD_CLOEXEC 1030
+
+#define LX_F_RDLCK 0
+#define LX_F_WRLCK 1
+#define LX_F_UNLCK 2
+
+/*
+ * Lx flock codes.
+ */
+#define LX_NAME_MAX 255
+#define LX_LOCK_SH 1 /* shared */
+#define LX_LOCK_EX 2 /* exclusive */
+#define LX_LOCK_NB 4 /* non-blocking */
+#define LX_LOCK_UN 8 /* unlock */
+
+/*
+ * On Linux the constants AT_REMOVEDIR and AT_EACCESS have the same value.
+ * AT_REMOVEDIR is used only by unlinkat and AT_EACCESS is used only by
+ * faccessat.
+ */
+#define LX_AT_FDCWD -100
+#define LX_AT_SYMLINK_NOFOLLOW 0x100
+#define LX_AT_REMOVEDIR 0x200
+#define LX_AT_EACCESS 0x200
+#define LX_AT_SYMLINK_FOLLOW 0x400
+#define LX_AT_NO_AUTOMOUNT 0x800
+#define LX_AT_EMPTY_PATH 0x1000
+
+struct lx_flock {
+ short l_type;
+ short l_whence;
+ long l_start;
+ long l_len;
+ int l_pid;
+};
+
+struct lx_flock64 {
+ short l_type;
+ short l_whence;
+ long long l_start;
+ long long l_len;
+ int l_pid;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FCNTL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
new file mode 100644
index 0000000000..cb0b5b4661
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
@@ -0,0 +1,197 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_H
+#define _SYS_LX_H
+
+#include <stdio.h>
+#include <alloca.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lwp.h>
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern char lx_release[LX_VERS_MAX];
+extern char lx_cmd_name[MAXNAMLEN];
+extern pid_t zoneinit_pid;
+
+/*
+ * Support for the unfortunate RPM race condition workaround.
+ */
+extern int lx_rpm_delay;
+extern boolean_t lx_is_rpm;
+
+/*
+ * Values Linux expects for init
+ */
+#define LX_INIT_PGID 0
+#define LX_INIT_SID 0
+#define LX_INIT_PID 1
+
+/*
+ * Codes to reboot(2).
+ */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 672274793
+#define LINUX_REBOOT_MAGIC2A 85072278
+#define LINUX_REBOOT_MAGIC2B 369367448
+#define LINUX_REBOOT_MAGIC2C 537993216
+
+/*
+ * This was observed as coming from Red Hat's init process, but it's not in
+ * their reboot(2) man page.
+ */
+#define LINUX_REBOOT_MAGIC2D 0x28121969
+
+#define LINUX_REBOOT_CMD_RESTART 0x1234567
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART2 0xa1b2c3d4
+#define LINUX_REBOOT_CMD_CAD_ON 0x89abcdef
+#define LINUX_REBOOT_CMD_CAD_OFF 0
+
+/*
+ * the maximum length of messages to be output with lx_msg(), lx_err(),
+ * lx_debug(), or lx_unsupported().
+ */
+#define LX_MSG_MAXLEN (128 + MAXPATHLEN)
+
+/*
+ * Linux scheduler priority ranges.
+ */
+#define LX_SCHED_PRIORITY_MIN_OTHER 0
+#define LX_SCHED_PRIORITY_MAX_OTHER 0
+#define LX_SCHED_PRIORITY_MIN_RRFIFO 1
+#define LX_SCHED_PRIORITY_MAX_RRFIFO 99
+
+/*
+ * Constants to indicate who getrusage() should return information about.
+ */
+#define LX_RUSAGE_SELF 0
+#define LX_RUSAGE_CHILDREN (-1)
+#define LX_RUSAGE_BOTH (-2)
+#define LX_RUSAGE_THREAD 1
+
+/*
+ * Constants for prctl(). We only include the ones here that we actually
+ * support; everything else will be ENOSYS.
+ */
+#define LX_PR_SET_KEEPCAPS 8
+#define LX_PR_SET_NAME 15
+
+#define LX_PR_SET_NAME_NAMELEN 16
+
+/*
+ * Based on code from brand_misc.h, but use of that is incompatible with the
+ * lx brand.
+ *
+ * These macros invoke a brandsys subcommand, B_TRUSS_POINT, which makes it
+ * easy to debug with DTrace.
+ */
+#define B_TRUSS_POINT 6
+
+#define B_TRACE_POINT_5(a0, a1, a2, a3, a4) \
+ (void) syscall(SYS_brand, B_TRUSS_POINT, (a0), (a1), (a2), (a3), (a4))
+
+#define B_TRACE_POINT_4(a0, a1, a2, a3) \
+ B_TRACE_POINT_5((a0), (a1), (a2), (a3), 0)
+
+#define B_TRACE_POINT_3(a0, a1, a2) \
+ B_TRACE_POINT_5((a0), (a1), (a2), 0, 0)
+
+#define B_TRACE_POINT_2(a0, a1) \
+ B_TRACE_POINT_5((a0), (a1), 0, 0, 0)
+
+#define B_TRACE_POINT_1(a0) \
+ B_TRACE_POINT_5((a0), 0, 0, 0, 0)
+
+#define B_TRACE_POINT_0() \
+ B_TRACE_POINT_5(0, 0, 0, 0, 0)
+
+/*
+ * normally we never want to write to stderr or stdout because it's unsafe
+ * to make assumptions about the underlying file descriptors. to protect
+ * against writes to these file descriptors we go ahead and close them
+ * our brand process initalization code. but there are still occasions
+ * where we are willing to make assumptions about our file descriptors
+ * and write to them. at thes times we should use one lx_msg() or
+ * lx_msg_error()
+ */
+extern void lx_msg(char *, ...);
+extern void lx_err(char *, ...);
+extern void lx_err_fatal(char *, ...);
+extern void lx_unsupported(char *, ...);
+
+struct ucontext;
+
+extern void lx_handler_table(void);
+extern void lx_handler_trace_table(void);
+extern void lx_emulate_done(void);
+extern lx_regs_t *lx_syscall_regs(void);
+extern int lx_errno(int);
+
+extern char *lx_fd_to_path(int fd, char *buf, int buf_size);
+extern int lx_lpid_to_spair(pid_t, pid_t *, lwpid_t *);
+extern int lx_lpid_to_spid(pid_t, pid_t *);
+
+extern int lx_ptrace_wait(siginfo_t *);
+extern void lx_ptrace_fork(void);
+extern void lx_ptrace_stop_if_option(int);
+
+extern int lx_check_alloca(size_t);
+#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL)
+
+extern int ltos_at_flag(int lflag, int allow, boolean_t enforce);
+
+/*
+ * NO_UUCOPY disables calls to the uucopy* system calls to help with
+ * debugging brand library accesses to linux application memory.
+ */
+#ifdef NO_UUCOPY
+
+int uucopy_unsafe(const void *src, void *dst, size_t n);
+int uucopystr_unsafe(const void *src, void *dst, size_t n);
+
+#define uucopy(src, dst, n) uucopy_unsafe((src), (dst), (n))
+#define uucopystr(src, dst, n) uucopystr_unsafe((src), (dst), (n))
+
+#endif /* NO_UUCOPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
new file mode 100644
index 0000000000..67dca16b67
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
@@ -0,0 +1,142 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_MOUNT_H
+#define _LX_MOUNT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rpc/rpc.h>
+#include <nfs/nfs.h>
+
+/*
+ * mount() is significantly different between Linux and Solaris. The main
+ * difference is between the set of flags. Some flags on Linux can be
+ * translated to a Solaris equivalent, some are converted to a
+ * filesystem-specific option, while others have no equivalent whatsoever.
+ */
+#define LX_MS_MGC_VAL 0xC0ED0000
+#define LX_MS_RDONLY 0x00000001
+#define LX_MS_NOSUID 0x00000002
+#define LX_MS_NODEV 0x00000004
+#define LX_MS_NOEXEC 0x00000008
+#define LX_MS_SYNCHRONOUS 0x00000010
+#define LX_MS_REMOUNT 0x00000020
+#define LX_MS_MANDLOCK 0x00000040
+#define LX_MS_NOATIME 0x00000400
+#define LX_MS_NODIRATIME 0x00000800
+#define LX_MS_BIND 0x00001000
+#define LX_MS_MOVE 0x00002000
+#define LX_MS_REC 0x00004000
+#define LX_MS_SILENT 0x00008000
+#define LX_MS_SUPPORTED (LX_MS_MGC_VAL | \
+ LX_MS_RDONLY | LX_MS_NOSUID | \
+ LX_MS_NODEV | LX_MS_NOEXEC | \
+ LX_MS_REMOUNT | LX_MS_NOATIME | \
+ LX_MS_BIND | LX_MS_SILENT)
+
+/*
+ * support for nfs mounts
+ */
+#define LX_NMD_MAXHOSTNAMELEN 256
+
+#define LX_NFS_MOUNT_SOFT 0x00000001
+#define LX_NFS_MOUNT_INTR 0x00000002
+#define LX_NFS_MOUNT_SECURE 0x00000004
+#define LX_NFS_MOUNT_POSIX 0x00000008
+#define LX_NFS_MOUNT_NOCTO 0x00000010
+#define LX_NFS_MOUNT_NOAC 0x00000020
+#define LX_NFS_MOUNT_TCP 0x00000040
+#define LX_NFS_MOUNT_VER3 0x00000080
+#define LX_NFS_MOUNT_KERBEROS 0x00000100
+#define LX_NFS_MOUNT_NONLM 0x00000200
+#define LX_NFS_MOUNT_BROKEN_SUID 0x00000400
+#define LX_NFS_MOUNT_SUPPORTED (LX_NFS_MOUNT_SOFT | \
+ LX_NFS_MOUNT_INTR | \
+ LX_NFS_MOUNT_POSIX | \
+ LX_NFS_MOUNT_NOCTO | \
+ LX_NFS_MOUNT_NOAC | \
+ LX_NFS_MOUNT_TCP | \
+ LX_NFS_MOUNT_VER3 | \
+ LX_NFS_MOUNT_NONLM)
+
+#define LX_NMD_DEFAULT_RSIZE 0
+#define LX_NMD_DEFAULT_WSIZE 0
+
+/*
+ * the nfs v3 file handle structure definitions are _almost_ the same
+ * on linux and solaris. the key difference are:
+ *
+ * 1) on linux fh3_length is an unsigned short where as on solaris it's
+ * an int.
+ *
+ * 2) on linux the file handle data doesn't 32 bit members, so the structure
+ * is not 32 bit aligned. (where as on solaris it is.)
+ *
+ * so rather than defining a structure that would allow us to intrepret
+ * all the contents of the nfs v3 file handle here, we decide to treate
+ * the file handle as an array of chars. this works just fine since it
+ * avoids the alignment issues and the actual file handle handle contects
+ * are defined by the nfs specification so they are common across solaris
+ * and linux. we do the same thing for nfs v2 file handles.
+ */
+struct lx_nfs_fh2 {
+ unsigned char lx_fh_data[NFS_FHSIZE];
+} lx_nfs_fh2;
+
+struct lx_nfs_fh3 {
+ unsigned short lx_fh3_length;
+ unsigned char lx_fh3_data[NFS3_FHSIZE];
+} lx_nfs_fh3;
+
+typedef struct lx_nfs_mount_data {
+ int nmd_version;
+ int nmd_fd;
+ struct lx_nfs_fh2 nmd_old_root;
+ int nmd_flags;
+ int nmd_rsize;
+ int nmd_wsize;
+ int nmd_timeo;
+ int nmd_retrans;
+ int nmd_acregmin;
+ int nmd_acregmax;
+ int nmd_acdirmin;
+ int nmd_acdirmax;
+ struct sockaddr_in nmd_addr;
+ char nmd_hostname[LX_NMD_MAXHOSTNAMELEN];
+ int nmd_namlen;
+ uint_t nmd_bsize;
+ struct lx_nfs_fh3 nmd_root;
+} lx_nfs_mount_data_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_MOUNT_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
new file mode 100644
index 0000000000..99abdbbf46
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_POLL_H
+#define _SYS_LX_POLL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * These events are identical between Linux and Solaris
+ */
+#define LX_POLLIN 0x001
+#define LX_POLLPRI 0x002
+#define LX_POLLOUT 0x004
+#define LX_POLLERR 0x008
+#define LX_POLLHUP 0x010
+#define LX_POLLNVAL 0x020
+#define LX_POLLRDNORM 0x040
+#define LX_POLLRDBAND 0x080
+
+#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \
+ LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND)
+
+/*
+ * These events differ between Linux and Solaris
+ */
+#define LX_POLLWRNORM 0x0100
+#define LX_POLLWRBAND 0x0200
+#define LX_POLLRDHUP 0x2000
+
+
+#define LX_POLL_SUPPORTED_EVENTS \
+ (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND | LX_POLLRDHUP)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_POLL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
new file mode 100644
index 0000000000..b4dc47faac
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
@@ -0,0 +1,406 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_SIGNAL_H
+#define _SYS_LX_SIGNAL_H
+
+#if !defined(_ASM)
+#include <sys/lx_types.h>
+#include <sys/ucontext.h>
+#include <lx_signum.h>
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux sigaction flags
+ */
+#define LX_SA_NOCLDSTOP 0x00000001
+#define LX_SA_NOCLDWAIT 0x00000002
+#define LX_SA_SIGINFO 0x00000004
+#define LX_SA_RESTORER 0x04000000
+#define LX_SA_ONSTACK 0x08000000
+#define LX_SA_RESTART 0x10000000
+#define LX_SA_NODEFER 0x40000000
+#define LX_SA_RESETHAND 0x80000000
+#define LX_SA_NOMASK LX_SA_NODEFER
+#define LX_SA_ONESHOT LX_SA_RESETHAND
+
+#define LX_SIG_BLOCK 0
+#define LX_SIG_UNBLOCK 1
+#define LX_SIG_SETMASK 2
+
+#define LX_MINSIGSTKSZ 2048
+#define LX_SS_ONSTACK 1
+#define LX_SS_DISABLE 2
+
+#define LX_SIGRT_MAGIC 0xdeadf00d
+
+#if !defined(_ASM)
+
+/*
+ * NOTE: Linux uses different definitions for sigset_ts and sigaction_ts
+ * depending on whether the definition is for user space or the kernel.
+ *
+ * The definitions below MUST correspond to the Linux kernel versions,
+ * as glibc will do the necessary translation from the Linux user
+ * versions.
+ */
+#if defined(_LP64)
+#define LX_NSIG_WORDS 1
+#define LX_WSHIFT 6
+#else /* is _ILP32 */
+#define LX_NSIG_WORDS 2
+#define LX_WSHIFT 5
+#endif
+
+typedef struct {
+ ulong_t __bits[LX_NSIG_WORDS];
+} lx_sigset_t;
+
+#define LX_NBITS (sizeof (ulong_t) * NBBY)
+#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS))
+#define lx_sigword(n) (((ulong_t)((n) - 1)) >> LX_WSHIFT)
+#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)])
+#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n))
+
+typedef struct lx_sigaction {
+ void (*lxsa_handler)();
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+ lx_sigset_t lxsa_mask;
+} lx_sigaction_t;
+
+#if defined(_ILP32)
+typedef uint32_t lx_osigset_t;
+
+#define OSIGSET_NBITS (sizeof (lx_osigset_t) * NBBY)
+#define OSIGSET_BITSET(sig) (1U << (((sig) - 1) % OSIGSET_NBITS))
+
+typedef struct lx_osigaction {
+ void (*lxsa_handler)();
+ lx_osigset_t lxsa_mask;
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+} lx_osigaction_t;
+#endif
+
+/*
+ * Flag settings to determine whether common routines should operate on
+ * lx_sigset_ts or lx_osigset_ts.
+ */
+#define USE_OSIGSET 0
+#define USE_SIGSET 1
+
+#define LX_SI_MAX_SIZE 128
+#if defined(_LP64)
+/*
+ * Because of the odd number (3) of ints before the union, we need to account
+ * for the smaller padding needed on x64 due to the union being offset to an 8
+ * byte boundary.
+ */
+#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 4)
+
+#else
+#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3)
+#endif
+
+typedef struct lx_siginfo {
+ int lsi_signo;
+ int lsi_errno;
+ int lsi_code;
+ union {
+ int _pad[LX_SI_PAD_SIZE];
+
+ struct {
+ pid_t _pid;
+ lx_uid16_t _uid;
+ } _kill;
+
+ struct {
+ uint_t _timer1;
+ uint_t _timer2;
+ } _timer;
+
+ struct {
+ pid_t _pid; /* sender's pid */
+ lx_uid16_t _uid; /* sender's uid */
+ union sigval _sigval;
+ } _rt;
+
+ struct {
+ pid_t _pid; /* which child */
+ lx_uid16_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ clock_t _utime;
+ clock_t _stime;
+ } _sigchld;
+
+ struct {
+ void *_addr; /* faulting insn/memory ref. */
+ } _sigfault;
+
+ struct {
+ int _band; /* POLL_IN,POLL_OUT,POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} lx_siginfo_t;
+
+/*
+ * lx_siginfo_t lsi_code values
+ *
+ * LX_SI_ASYNCNL: Sent by asynch name lookup completion
+ * LX_SI_DETHREAD: Sent by execve() killing subsidiary threads
+ * LX_SI_SIGIO: Sent by queued SIGIO
+ * LX_SI_ASYNCIO: Sent by asynchronous I/O completion
+ * LX_SI_MESGQ: Sent by real time message queue state change
+ * LX_SI_TIMER: Sent by timer expiration
+ * LX_SI_QUEUE: Sent by sigqueue
+ * LX_SI_USER: Sent by kill, sigsend, raise, etc.
+ * LX_SI_KERNEL: Sent by kernel
+ * LX_SI_CODE_NOT_EXIST: Error code. When translating from Linux to
+ * illumos errors, if there is no translation available, this value
+ * should be used. This value should have no meaning as an si_code in
+ * illumos or Linux.
+ *
+ * At present, LX_SI_ASYNCNL, LX_SI_DETHREAD, and LX_SI_SIGIO are unused by
+ * BrandZ.
+ */
+#define LX_SI_CODE_NOT_EXIST (-61)
+#define LX_SI_ASYNCNL (-60)
+#define LX_SI_DETHREAD (-7)
+#define LX_SI_TKILL (-6)
+#define LX_SI_SIGIO (-5)
+#define LX_SI_ASYNCIO (-4)
+#define LX_SI_MESGQ (-3)
+#define LX_SI_TIMER (-2)
+#define LX_SI_QUEUE (-1)
+#define LX_SI_USER (0)
+#define LX_SI_KERNEL (0x80)
+
+typedef struct lx_sighandlers {
+ struct lx_sigaction lx_sa[LX_NSIG + 1];
+} lx_sighandlers_t;
+
+typedef struct lx_sigaltstack {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} lx_stack_t;
+
+/*
+ * _fpreg, _fpxreg, _xmmreg and _fpstate are defined in Linux src in:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ */
+#define LX_X86_FXSR_MAGIC 0x0000
+#define LX_X86_FXSR_NONE 0xffff
+
+#if defined(_LP64)
+
+typedef struct lx_fpstate {
+ ushort_t cwd;
+ ushort_t swd;
+ ushort_t twd; /* Note this is not the same as the 32bit/x87/FSAVE twd */
+ ushort_t fop;
+ uint64_t rip;
+ uint64_t rdp;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ uint32_t st_space[32]; /* 8 * 16 bytes for each FP-reg */
+ uint32_t xmm_space[64]; /* 16 * 16 bytes for each XMM-reg */
+ uint32_t reserved2[12];
+ uint32_t reserved3[12];
+} lx_fpstate_t;
+
+/*
+ * The Linux layout is defined in the Linux src tree in:
+ * arch/x86/include/asm/sigcontext.h
+ * and the user-level def (which is what we want) at:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ *
+ * The Illumos offsets of the registers in the context are defined in:
+ * usr/src/uts/intel/sys/regset.h
+ * this is an mcontext_t from uc_mcontext.
+ *
+ * For the 64-bit case the register layout is completely different in the
+ * context.
+ */
+typedef struct lx_sigcontext {
+ ulong_t sc_r8;
+ ulong_t sc_r9;
+ ulong_t sc_r10;
+ ulong_t sc_r11;
+ ulong_t sc_r12;
+ ulong_t sc_r13;
+ ulong_t sc_r14;
+ ulong_t sc_r15;
+ ulong_t sc_rdi;
+ ulong_t sc_rsi;
+ ulong_t sc_rbp;
+ ulong_t sc_rbx;
+ ulong_t sc_rdx;
+ ulong_t sc_rax;
+ ulong_t sc_rcx;
+ ulong_t sc_rsp;
+ ulong_t sc_rip;
+ ulong_t sc_eflags;
+ ushort_t sc_cs;
+ ushort_t sc_gs;
+ ushort_t sc_fs;
+ ushort_t sc_pad0;
+ ulong_t sc_err;
+ ulong_t sc_trapno;
+
+ ulong_t sc_mask;
+ ulong_t sc_cr2;
+ lx_fpstate_t *sc_fpstate;
+
+ ulong_t reserved[8];
+} lx_sigcontext_t;
+
+#else /* is _ILP32 */
+
+struct lx_fpreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+};
+
+struct lx_fpxreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+ ushort_t padding[3];
+};
+
+struct lx_xmmreg {
+ uint32_t element[4];
+};
+
+typedef struct lx_fpstate {
+ /* Regular FPU environment */
+ ulong_t cw;
+ ulong_t sw;
+ ulong_t tag;
+ ulong_t ipoff;
+ ulong_t cssel;
+ ulong_t dataoff;
+ ulong_t datasel;
+ struct lx_fpreg _st[8];
+ ushort_t status;
+ ushort_t magic; /* 0xffff = regular FPU data */
+
+ /* FXSR FPU environment */
+ ulong_t _fxsr_env[6]; /* env is ignored */
+ ulong_t mxcsr;
+ ulong_t reserved;
+ struct lx_fpxreg _fxsr_st[8]; /* reg data is ignored */
+ struct lx_xmmreg _xmm[8];
+ ulong_t padding[56];
+} lx_fpstate_t;
+
+/*
+ * The Linux layout is defined in the Linux src tree in:
+ * arch/x86/include/asm/sigcontext.h
+ * and the user-level def (which is what we want) at:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ *
+ * The Illumos offsets of the registers in the context are defined by the
+ * i386 ABI (see usr/src/uts/intel/sys/regset.h).
+ *
+ * Both Illumos and Linux match up here.
+ */
+typedef struct lx_sigcontext {
+ ulong_t sc_gs;
+ ulong_t sc_fs;
+ ulong_t sc_es;
+ ulong_t sc_ds;
+ ulong_t sc_edi;
+ ulong_t sc_esi;
+ ulong_t sc_ebp;
+ ulong_t sc_esp;
+ ulong_t sc_ebx;
+ ulong_t sc_edx;
+ ulong_t sc_ecx;
+ ulong_t sc_eax;
+ ulong_t sc_trapno;
+ ulong_t sc_err;
+ ulong_t sc_eip;
+ ulong_t sc_cs;
+ ulong_t sc_eflags;
+ ulong_t sc_esp_at_signal;
+ ulong_t sc_ss;
+
+ lx_fpstate_t *sc_fpstate;
+ ulong_t sc_mask;
+ ulong_t sc_cr2;
+} lx_sigcontext_t;
+#endif
+
+typedef struct lx_ucontext {
+ ulong_t uc_flags; /* Linux always sets this to 0 */
+ struct lx_ucontext *uc_link; /* Linux always sets this to NULL */
+ lx_stack_t uc_stack;
+ lx_sigcontext_t uc_sigcontext;
+ lx_sigset_t uc_sigmask;
+} lx_ucontext_t;
+
+#define lsi_pid _sifields._kill._pid
+#define lsi_uid _sifields._kill._uid
+#define lsi_status _sifields._sigchld._status
+#define lsi_utime _sifields._sigchld._utime
+#define lsi_stime _sifields._sigchld._stime
+#define lsi_value _sifields._rt._sigval
+#define lsi_int _sifields._rt._sigval.sivalx_int
+#define lsi_ptr _sifields._rt._sigval.sivalx_ptr
+#define lsi_addr _sifields._sigfault._addr
+#define lsi_band _sifields._sigpoll._band
+#define lsi_fd _sifields._sigpoll._fd
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+extern void setsigacthandler(void (*)(int, siginfo_t *, void *),
+ void (**)(int, siginfo_t *, void *),
+ int (*)(const ucontext_t *));
+
+extern int lx_siginit(void);
+
+extern void lx_sigreturn_tolibc(uintptr_t);
+extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(),
+ void (*)(), uintptr_t);
+
+extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop);
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SIGNAL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h
new file mode 100644
index 0000000000..219bac5f21
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h
@@ -0,0 +1,73 @@
+/*
+* This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_SIGSTACK_H
+#define _SYS_LX_SIGSTACK_H
+
+#if !defined(_ASM)
+#include <sys/lx_types.h>
+#include <sys/ucontext.h>
+#include <sys/lx_signal.h>
+#include <lx_signum.h>
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Two flavors of Linux signal stacks:
+ *
+ * lx_sigstack - used for "modern" signal handlers, in practice those
+ * that have the sigaction(2) flag SA_SIGINFO set
+ *
+ * lx_oldsigstack - used for legacy signal handlers, those that do not have
+ * the sigaction(2) flag SA_SIGINFO set or that were setup via
+ * the signal(2) call.
+ *
+ * NOTE: Since these structures will be placed on the stack and stack math will
+ * be done with their sizes, for the 32-bit code they must be word
+ * aligned in size (4 bytes) so the stack remains word aligned per the
+ * i386 ABI, or, for 64-bit code they must be 16 byte aligned as per the
+ * AMD64 ABI.
+ */
+#if defined(_LP64)
+typedef struct lx_sigstack {
+ void (*retaddr)(); /* address of real lx_rt_sigreturn code */
+ lx_siginfo_t si; /* saved signal information */
+ lx_ucontext_t uc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ char pad[2]; /* stack alignment */
+} lx_sigstack_t;
+#else
+struct lx_sigstack {
+ void (*retaddr)(); /* address of real lx_rt_sigreturn code */
+ int sig; /* signal number */
+ lx_siginfo_t *sip; /* points to "si" if valid, NULL if not */
+ lx_ucontext_t *ucp; /* points to "uc" */
+ lx_siginfo_t si; /* saved signal information */
+ lx_ucontext_t uc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ char trampoline[8]; /* code for trampoline to lx_rt_sigreturn() */
+};
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SIGSTACK_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
new file mode 100644
index 0000000000..28fcbb2a0a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
@@ -0,0 +1,352 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_SOCKET_H
+#define _SYS_LX_SOCKET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_types.h>
+
+/*
+ * Linux address family definitions
+ * Some of these are not supported
+ */
+#define LX_AF_UNSPEC 0 /* Unspecified */
+#define LX_AF_UNIX 1 /* local file/pipe name */
+#define LX_AF_INET 2 /* IP protocol family */
+#define LX_AF_AX25 3 /* Amateur Radio AX.25 */
+#define LX_AF_IPX 4 /* Novell Internet Protocol */
+#define LX_AF_APPLETALK 5 /* Appletalk */
+#define LX_AF_NETROM 6 /* Amateur radio */
+#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */
+#define LX_AF_ATMPVC 8 /* ATM PVCs */
+#define LX_AF_X25 9 /* X.25 */
+#define LX_AF_INET6 10 /* IPV 6 */
+#define LX_AF_ROSE 11 /* Amateur Radio X.25 */
+#define LX_AF_DECnet 12 /* DECnet */
+#define LX_AF_NETBEUI 13 /* 802.2LLC */
+#define LX_AF_SECURITY 14 /* Security callback */
+#define LX_AF_KEY 15 /* key management */
+#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */
+#define LX_AF_PACKET 17 /* Packet family */
+#define LX_AF_ASH 18 /* Ash ? */
+#define LX_AF_ECONET 19 /* Acorn Econet */
+#define LX_AF_ATMSVC 20 /* ATM SVCs */
+#define LX_AF_SNA 22 /* Linux SNA */
+#define LX_AF_IRDA 23 /* IRDA sockets */
+#define LX_AF_PPPOX 24 /* PPPoX sockets */
+#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */
+#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define LX_AF_MAX 32 /* MAX socket type */
+
+#define AF_NOTSUPPORTED -1
+#define AF_INVAL -2
+
+/*
+ * Linux ARP protocol hardware identifiers
+ */
+#define LX_ARPHRD_ETHER 1 /* Ethernet */
+#define LX_ARPHRD_LOOPBACK 772 /* Loopback */
+#define LX_ARPHRD_VOID 0xffff /* Unknown */
+
+/*
+ * Linux socket type definitions
+ */
+#define LX_SOCK_STREAM 1 /* Connection-based byte streams */
+#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */
+#define LX_SOCK_RAW 3 /* Raw protocol interface */
+#define LX_SOCK_RDM 4 /* Reliably-delivered message */
+#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */
+#define LX_SOCK_PACKET 10 /* Linux specific */
+#define LX_SOCK_MAX 11
+
+/*
+ * The Linux socket type can be or-ed with other flags (e.g. SOCK_CLOEXEC).
+ */
+#define LX_SOCK_TYPE_MASK 0xf
+
+/*
+ * Linux flags for socket, socketpair and accept4. These are or-ed into the
+ * socket type value. In the Linux net.h header these come from fcntl.h (note
+ * that they are in octal in the Linux header).
+ */
+#define LX_SOCK_CLOEXEC 0x80000
+#define LX_SOCK_NONBLOCK 0x800
+
+#define SOCK_NOTSUPPORTED -1
+#define SOCK_INVAL -2
+
+/*
+ * IP Protocol levels. Some of these match the Illumos IPPROTO_* values.
+ */
+#define LX_IPPROTO_IP 0
+#define LX_IPPROTO_ICMP 1
+#define LX_IPPROTO_IGMP 2
+#define LX_IPPROTO_TCP 6
+#define LX_IPPROTO_UDP 17
+#define LX_IPPROTO_RAW 255
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * IPPROTO_IP
+ */
+#define LX_IP_TOS 1
+#define LX_IP_TTL 2
+#define LX_IP_HDRINCL 3
+#define LX_IP_OPTIONS 4
+#define LX_IP_ROUTER_ALERT 5
+#define LX_IP_RECVOPTS 6
+#define LX_IP_RETOPTS 7
+#define LX_IP_PKTINFO 8
+#define LX_IP_PKTOPTIONS 9
+#define LX_IP_MTU_DISCOVER 10
+#define LX_IP_RECVERR 11
+#define LX_IP_RECVTTL 12
+#define LX_IP_RECVTOS 13
+#define LX_IP_MTU 14
+#define LX_IP_FREEBIND 15
+#define LX_IP_IPSEC_POLICY 16
+#define LX_IP_XFRM_POLICY 17
+#define LX_IP_PASSSEC 18
+#define LX_IP_TRANSPARENT 19
+#define LX_IP_ORIGDSTADDR 20
+#define LX_IP_MINTTL 21
+#define LX_IP_NODEFRAG 22
+/* Linux apparently leaves a gap here */
+#define LX_IP_MULTICAST_IF 32
+#define LX_IP_MULTICAST_TTL 33
+#define LX_IP_MULTICAST_LOOP 34
+#define LX_IP_ADD_MEMBERSHIP 35
+#define LX_IP_DROP_MEMBERSHIP 36
+#define LX_IP_UNBLOCK_SOURC 37
+#define LX_IP_BLOCK_SOURCE 38
+#define LX_IP_ADD_SOURCE_MEMBERSHIP 39
+#define LX_IP_DROP_SOURCE_MEMBERSHIP 40
+#define LX_IP_MSFILTER 41
+#define LX_MCAST_JOIN_GROUP 42
+#define LX_MCAST_BLOCK_SOURCE 43
+#define LX_MCAST_UNBLOCK_SOURCE 44
+#define LX_MCAST_LEAVE_GROUP 45
+#define LX_MCAST_JOIN_SOURCE_GROUP 46
+#define LX_MCAST_LEAVE_SOURCE_GROUP 47
+#define LX_MCAST_MSFILTER 48
+#define LX_IP_MULTICAST_ALL 49
+#define LX_IP_UNICAST_IF 50
+
+/*
+ * Options for use with [gs]etsockopt at the TCP level.
+ * IPPROTO_TCP
+ */
+#define LX_TCP_NODELAY 1 /* Don't delay send to coalesce packets */
+#define LX_TCP_MAXSEG 2 /* Set maximum segment size */
+#define LX_TCP_CORK 3 /* Control sending of partial frames */
+#define LX_TCP_KEEPIDLE 4 /* Start keeplives after this period */
+#define LX_TCP_KEEPINTVL 5 /* Interval between keepalives */
+#define LX_TCP_KEEPCNT 6 /* Number of keepalives before death */
+#define LX_TCP_SYNCNT 7 /* Number of SYN retransmits */
+#define LX_TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */
+#define LX_TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */
+#define LX_TCP_WINDOW_CLAMP 10 /* Bound advertised window */
+#define LX_TCP_INFO 11 /* Information about this connection. */
+#define LX_TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */
+#define LX_TCP_CONGESTION 13 /* Congestion control algorithm */
+#define LX_TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
+#define LX_TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts on thin streams */
+#define LX_TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
+#define LX_TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */
+#define LX_TCP_REPAIR 19 /* TCP socket under repair */
+#define LX_TCP_REPAIR_QUEUE 20
+#define LX_TCP_QUEUE_SEQ 21
+#define LX_TCP_REPAIR_OPTIONS 22
+#define LX_TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
+#define LX_TCP_TIMESTAMP 24
+#define LX_TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes */
+
+/*
+ * Options for use with [gs]etsockopt at the IGMP level.
+ * IPPROTO_IGMP
+ */
+#define LX_IGMP_MINLEN 8
+#define LX_IGMP_MAX_HOST_REPORT_DELAY 10
+#define LX_IGMP_HOST_MEMBERSHIP_QUERY 0x11
+#define LX_IGMP_HOST_MEMBERSHIP_REPORT 0x12
+#define LX_IGMP_DVMRP 0x13
+#define LX_IGMP_PIM 0x14
+#define LX_IGMP_TRACE 0x15
+#define LX_IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16
+#define LX_IGMP_HOST_LEAVE_MESSAGE 0x17
+#define LX_IGMP_MTRACE_RESP 0x1e
+#define LX_IGMP_MTRACE 0x1f
+
+/*
+ * Options for use with [gs]etsockopt at the SOL_SOCKET level.
+ */
+#define LX_SOL_SOCKET 1
+
+#define LX_SCM_RIGHTS 1
+#define LX_SCM_CRED 2
+
+#define LX_SO_DEBUG 1
+#define LX_SO_REUSEADDR 2
+#define LX_SO_TYPE 3
+#define LX_SO_ERROR 4
+#define LX_SO_DONTROUTE 5
+#define LX_SO_BROADCAST 6
+#define LX_SO_SNDBUF 7
+#define LX_SO_RCVBUF 8
+#define LX_SO_KEEPALIVE 9
+#define LX_SO_OOBINLINE 10
+#define LX_SO_NO_CHECK 11
+#define LX_SO_PRIORITY 12
+#define LX_SO_LINGER 13
+#define LX_SO_BSDCOMPAT 14
+#define LX_SO_REUSEPORT 15
+/*
+ * For Linux see unix(7) man page SO_PASSCRED description. For Illumos see
+ * socket.h(3HEAD) man page SO_RECVUCRED description.
+ */
+#define LX_SO_PASSCRED 16
+#define LX_SO_PEERCRED 17
+#define LX_SO_RCVLOWAT 18
+#define LX_SO_SNDLOWAT 19
+#define LX_SO_RCVTIMEO 20
+#define LX_SO_SNDTIMEO 21
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define LX_SO_SECURITY_AUTHENTICATION 22
+#define LX_SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define LX_SO_SECURITY_ENCRYPTION_NETWORK 24
+#define LX_SO_BINDTODEVICE 25
+/* Socket filtering */
+#define LX_SO_ATTACH_FILTER 26
+#define LX_SO_DETACH_FILTER 27
+#define LX_SO_PEERNAME 28
+#define LX_SO_TIMESTAMP 29
+#define LX_SCM_TIMESTAMP LX_SO_TIMESTAMP
+#define LX_SO_ACCEPTCONN 30
+
+#define LX_SO_PEERSEC 31
+#define LX_SO_SNDBUFFORCE 32
+#define LX_SO_RCVBUFFORCE 33
+#define LX_SO_PASSSEC 34
+#define LX_SO_TIMESTAMPNS 35
+#define LX_SCM_TIMESTAMPNS LX_SO_TIMESTAMPNS
+#define LX_SO_MARK 36
+#define LX_SO_TIMESTAMPING 37
+#define LX_SCM_TIMESTAMPING LX_SO_TIMESTAMPING
+#define LX_SO_PROTOCOL 38
+#define LX_SO_DOMAIN 39
+#define LX_SO_RXQ_OVFL 40
+#define LX_SO_WIFI_STATUS 41
+#define LX_SCM_WIFI_STATUS LX_SO_WIFI_STATUS
+#define LX_SO_PEEK_OFF 42
+#define LX_SO_NOFCS 43
+#define LX_SO_LOCK_FILTER 44
+#define LX_SO_SELECT_ERR_QUEUE 45
+#define LX_SO_BUSY_POLL 46
+#define LX_SO_MAX_PACING_RATE 47
+#define LX_SO_BPF_EXTENSIONS 48
+
+/*
+ * Options for use with [gs]etsockopt at the RAW level.
+ * IPPROTO_RAW
+ */
+#define LX_ICMP_FILTER 1
+
+/*
+ * Linux socketcall indices.
+ * These constitute all 17 socket related system calls
+ *
+ * These system calls are called via a single system call socketcall().
+ * The first arg being the endex of the system call type
+ */
+#define LX_SOCKET 1
+#define LX_BIND 2
+#define LX_CONNECT 3
+#define LX_LISTEN 4
+#define LX_ACCEPT 5
+#define LX_GETSOCKNAME 6
+#define LX_GETPEERNAME 7
+#define LX_SOCKETPAIR 8
+#define LX_SEND 9
+#define LX_RECV 10
+#define LX_SENDTO 11
+#define LX_RECVFROM 12
+#define LX_SHUTDOWN 13
+#define LX_SETSOCKOPT 14
+#define LX_GETSOCKOPT 15
+#define LX_SENDMSG 16
+#define LX_RECVMSG 17
+#define LX_ACCEPT4 18
+#define LX_RECVMMSG 19
+#define LX_SENDMMSG 20
+
+/*
+ * Linux socket flags for use with recv(2)/send(2)/recvmsg(2)/sendmsg(2)
+ */
+#define LX_MSG_OOB 1
+#define LX_MSG_PEEK 2
+#define LX_MSG_DONTROUTE 4
+#define LX_MSG_CTRUNC 8
+#define LX_MSG_PROXY 0x10
+#define LX_MSG_TRUNC 0x20
+#define LX_MSG_DONTWAIT 0x40
+#define LX_MSG_EOR 0x80
+#define LX_MSG_WAITALL 0x100
+#define LX_MSG_FIN 0x200
+#define LX_MSG_SYN 0x400
+#define LX_MSG_CONFIRM 0x800
+#define LX_MSG_RST 0x1000
+#define LX_MSG_ERRQUEUE 0x2000
+#define LX_MSG_NOSIGNAL 0x4000
+#define LX_MSG_MORE 0x8000
+#define LX_MSG_WAITFORONE 0x10000
+#define LX_MSG_FASTOPEN 0x20000000
+#define LX_MSG_CMSG_CLOEXEC 0x40000000
+
+struct lx_msghdr {
+ void *msg_name; /* optional address */
+ socklen_t msg_namelen; /* size of address */
+ struct iovec *msg_iov; /* scatter/gather array */
+ int msg_iovlen; /* # elements in msg_iov */
+ void *msg_control; /* ancillary data */
+ socklen_t msg_controllen; /* ancillary data buffer len */
+ int msg_flags; /* flags on received message */
+};
+
+struct lx_ucred {
+ pid_t lxu_pid;
+ lx_uid_t lxu_uid;
+ lx_gid_t lxu_gid;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SOCKET_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h
new file mode 100644
index 0000000000..731555ec74
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h
@@ -0,0 +1,116 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_STAT_H
+#define _SYS_LX_STAT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_types.h>
+#include <sys/stat.h>
+
+#define LX_MAJORSHIFT 8
+#define LX_MINORMASK ((1 << LX_MAJORSHIFT) - 1)
+#define LX_MAKEDEVICE(lx_maj, lx_min) \
+ ((lx_dev_t)((lx_maj) << LX_MAJORSHIFT | ((lx_min) & LX_MINORMASK)))
+
+#define LX_GETMAJOR(lx_dev) ((lx_dev) >> LX_MAJORSHIFT)
+#define LX_GETMINOR(lx_dev) ((lx_dev) & LX_MINORMASK)
+
+#undef st_atime
+#undef st_mtime
+#undef st_ctime
+
+struct lx_stat {
+ lx_dev16_t st_dev;
+ uint16_t st_pad1;
+ lx_ino_t st_ino;
+ lx_mode16_t st_mode;
+ uint16_t st_nlink;
+ lx_uid16_t st_uid;
+ lx_gid16_t st_gid;
+ lx_dev16_t st_rdev;
+ uint16_t st_pad2;
+ lx_off_t st_size;
+ lx_blksize_t st_blksize;
+ lx_blkcnt_t st_blocks;
+ struct lx_timespec st_atime;
+ struct lx_timespec st_mtime;
+ struct lx_timespec st_ctime;
+ uint32_t st_pad3;
+ uint32_t st_pad4;
+};
+
+#if defined(_LP64)
+struct lx_stat64 {
+ ulong_t st_dev;
+ ulong_t st_ino;
+ ulong_t st_nlink; /* yes, the order really is */
+ uint_t st_mode; /* different for these two */
+ uint_t st_uid;
+ uint_t st_gid;
+ uint_t st_pad0;
+ ulong_t st_rdev;
+ long st_size;
+ long st_blksize;
+ long st_blocks;
+ struct lx_timespec st_atime;
+ struct lx_timespec st_mtime;
+ struct lx_timespec st_ctime;
+ long st_unused[3];
+};
+
+#else /* is 32-bit */
+
+struct lx_stat64 {
+ lx_dev_t st_dev;
+ uint32_t st_pad1;
+ lx_ino_t st_small_ino;
+ lx_mode_t st_mode;
+ uint_t st_nlink;
+ lx_uid_t st_uid;
+ lx_gid_t st_gid;
+ lx_dev_t st_rdev;
+ uint32_t st_pad2;
+ lx_off64_t st_size;
+ lx_blksize_t st_blksize;
+ lx_blkcnt64_t st_blocks;
+ struct lx_timespec st_atime;
+ struct lx_timespec st_mtime;
+ struct lx_timespec st_ctime;
+ lx_ino64_t st_ino;
+};
+#endif
+
+extern int lx_stat_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_STAT_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
new file mode 100644
index 0000000000..b934373f76
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_STATFS_H
+#define _LX_STATFS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_statfs_init(void);
+
+struct lx_statfs {
+ size_t f_type;
+ size_t f_bsize;
+ ulong_t f_blocks;
+ ulong_t f_bfree;
+ ulong_t f_bavail;
+ ulong_t f_files;
+ ulong_t f_ffree;
+ u_longlong_t f_fsid;
+ size_t f_namelen;
+ size_t f_frsize;
+ size_t f_spare[5];
+};
+
+struct lx_statfs64 {
+ int f_type;
+ int f_bsize;
+ u_longlong_t f_blocks;
+ u_longlong_t f_bfree;
+ u_longlong_t f_bavail;
+ u_longlong_t f_files;
+ u_longlong_t f_ffree;
+ u_longlong_t f_fsid;
+ int f_namelen;
+ int f_frsize;
+ int f_spare[5];
+};
+
+/*
+ * These magic values are taken mostly from statfs(2) or magic.h
+ */
+#define LX_DEVFS_SUPER_MAGIC 0x1373
+#define LX_DEVPTS_SUPER_MAGIC 0x1cd1
+#define LX_ISOFS_SUPER_MAGIC 0x9660
+#define LX_MSDOS_SUPER_MAGIC 0x4d44
+#define LX_NFS_SUPER_MAGIC 0x6969
+#define LX_PROC_SUPER_MAGIC 0x9fa0
+#define LX_TMPFS_SUPER_MAGIC 0x01021994
+#define LX_UFS_MAGIC 0x00011954
+#define LX_PIPEFS_MAGIC 0x50495045
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_STATFS_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
new file mode 100644
index 0000000000..b5b6f6f1a1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
@@ -0,0 +1,396 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_SYSCALL_H
+#define _SYS_LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+#include <sys/procset.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_install;
+
+extern long lx_openat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mkdirat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mknodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fchownat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_futimesat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_utimensat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fstatat64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_unlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_renameat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_linkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_symlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_readlinkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fchmodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_access(uintptr_t, uintptr_t);
+extern long lx_faccessat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_stat(uintptr_t, uintptr_t);
+extern long lx_fstat(uintptr_t, uintptr_t);
+extern long lx_lstat(uintptr_t, uintptr_t);
+extern long lx_stat64(uintptr_t, uintptr_t);
+extern long lx_fstat64(uintptr_t, uintptr_t);
+extern long lx_lstat64(uintptr_t, uintptr_t);
+extern long lx_fcntl(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fcntl64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_flock(uintptr_t, uintptr_t);
+extern long lx_open(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_readlink(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_readdir(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getdents(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getdents64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getpid(void);
+extern long lx_execve(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_dup2(uintptr_t, uintptr_t);
+extern long lx_dup3(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_ioctl(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_vhangup(void);
+extern long lx_fadvise64(uintptr_t, off64_t, uintptr_t, uintptr_t);
+extern long lx_fadvise64_64(uintptr_t, off64_t, off64_t, uintptr_t);
+
+extern long lx_readv(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_writev(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_pread(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_pwrite(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_pread64(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+extern long lx_pwrite64(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_socketcall(uintptr_t, uintptr_t);
+extern long lx_accept(int, void *, int *);
+extern long lx_accept4(int, void *, int *, int);
+extern long lx_bind(int, void *, int);
+extern long lx_connect(int, void *, int);
+extern long lx_getpeername(int, void *, int *);
+extern long lx_getsockname(int, void *, int *);
+extern long lx_getsockopt(int, int, int, void *, int *);
+extern long lx_listen(int, int);
+extern long lx_recvfrom(int, void *, size_t, int, void *, int *);
+extern long lx_recvmsg(int, void *, int);
+extern long lx_sendmsg(int, void *, int);
+extern long lx_sendto(int, void *, size_t, int, void *, int);
+extern long lx_setsockopt(int, int, int, void *, int);
+extern long lx_shutdown(int, int);
+extern long lx_socket(int, int, int);
+extern long lx_socketpair(int, int, int, int *);
+
+extern long lx_select(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_pselect6(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+extern long lx_poll(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_ppoll(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_epoll_ctl(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_oldgetrlimit(uintptr_t, uintptr_t);
+extern long lx_getrlimit(uintptr_t, uintptr_t);
+extern long lx_setrlimit(uintptr_t, uintptr_t);
+extern long lx_prlimit64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_gettimeofday(uintptr_t, uintptr_t);
+extern long lx_settimeofday(uintptr_t, uintptr_t);
+extern long lx_getrusage(uintptr_t, uintptr_t);
+extern long lx_mknod(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getpgrp(void);
+extern long lx_getpgid(uintptr_t);
+extern long lx_setpgid(uintptr_t, uintptr_t);
+extern long lx_getsid(uintptr_t);
+extern long lx_setsid(void);
+extern long lx_setgroups(uintptr_t, uintptr_t);
+
+
+extern long lx_waitpid(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_waitid(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getuid16(void);
+extern long lx_getgid16(void);
+extern long lx_geteuid16(void);
+extern long lx_getegid16(void);
+extern long lx_geteuid(void);
+extern long lx_getegid(void);
+extern long lx_getresuid16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresgid16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresuid(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresgid(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_capget(uintptr_t, uintptr_t);
+extern long lx_capset(uintptr_t, uintptr_t);
+
+extern long lx_setuid16(uintptr_t);
+extern long lx_setreuid16(uintptr_t, uintptr_t);
+extern long lx_setregid16(uintptr_t, uintptr_t);
+extern long lx_setgid16(uintptr_t);
+extern long lx_setfsuid16(uintptr_t);
+extern long lx_setfsgid16(uintptr_t);
+
+extern long lx_setfsuid(uintptr_t);
+extern long lx_setfsgid(uintptr_t);
+
+extern long lx_clock_settime(int, struct timespec *);
+extern long lx_clock_gettime(int, struct timespec *);
+extern long lx_clock_getres(int, struct timespec *);
+extern long lx_clock_nanosleep(int, int flags, struct timespec *,
+ struct timespec *);
+extern long lx_adjtimex(void *);
+extern long lx_timer_create(int, struct sigevent *, timer_t *);
+extern long lx_timer_settime(timer_t, int, struct itimerspec *,
+ struct itimerspec *);
+extern long lx_timer_gettime(timer_t, struct itimerspec *);
+extern long lx_timer_getoverrun(timer_t);
+extern long lx_timer_delete(timer_t);
+
+extern long lx_truncate(uintptr_t, uintptr_t);
+extern long lx_ftruncate(uintptr_t, uintptr_t);
+extern long lx_truncate64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_ftruncate64(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sysctl(uintptr_t);
+extern long lx_fsync(uintptr_t);
+extern long lx_fdatasync(uintptr_t);
+extern long lx_pipe2(uintptr_t, uintptr_t);
+extern long lx_link(uintptr_t, uintptr_t);
+extern long lx_unlink(uintptr_t);
+extern long lx_rmdir(uintptr_t);
+extern long lx_chown16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fchown16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_lchown16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_chown(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fchown(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_chmod(uintptr_t, uintptr_t);
+extern long lx_rename(uintptr_t, uintptr_t);
+extern long lx_utime(uintptr_t, uintptr_t);
+extern long lx_llseek(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_lseek(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sysfs(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getcpu(unsigned int *, uintptr_t, uintptr_t);
+extern long lx_getcwd(uintptr_t, uintptr_t);
+extern long lx_uname(uintptr_t);
+extern long lx_reboot(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getgroups16(uintptr_t, uintptr_t);
+extern long lx_setgroups16(uintptr_t, uintptr_t);
+extern long lx_personality(uintptr_t);
+
+extern long lx_query_module(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_time(uintptr_t);
+extern long lx_times(uintptr_t);
+extern long lx_setitimer(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_clone(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_exit(uintptr_t);
+extern long lx_group_exit(uintptr_t);
+
+extern long lx_mlock(uintptr_t, uintptr_t);
+extern long lx_mlockall(uintptr_t);
+extern long lx_munlock(uintptr_t, uintptr_t);
+extern long lx_munlockall(void);
+extern long lx_msync(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_madvise(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mprotect(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mmap(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+extern long lx_mmap2(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+extern long lx_remap(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_mount(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_umount(uintptr_t);
+extern long lx_umount2(uintptr_t, uintptr_t);
+
+extern long lx_statfs(uintptr_t, uintptr_t);
+extern long lx_fstatfs(uintptr_t, uintptr_t);
+extern long lx_statfs64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fstatfs64(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sigreturn(void);
+extern long lx_rt_sigreturn(void);
+extern long lx_signal(uintptr_t, uintptr_t);
+extern long lx_sigaction(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigaction(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sigaltstack(uintptr_t, uintptr_t);
+extern long lx_sigpending(uintptr_t);
+extern long lx_rt_sigpending(uintptr_t, uintptr_t);
+extern long lx_sigprocmask(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigprocmask(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sigsuspend(uintptr_t);
+extern long lx_rt_sigsuspend(uintptr_t, uintptr_t);
+extern long lx_rt_sigwaitinfo(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigqueueinfo(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_tgsigqueueinfo(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sync(void);
+
+extern long lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+extern long lx_tgkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_sethostname(uintptr_t, uintptr_t);
+extern long lx_setdomainname(uintptr_t, uintptr_t);
+
+extern long lx_sendfile(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sendfile64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_fork(void);
+extern long lx_vfork(void);
+extern long lx_exec(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getpriority(uintptr_t, uintptr_t);
+extern long lx_setpriority(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sched_getaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_setaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_getparam(uintptr_t, uintptr_t);
+extern long lx_sched_setparam(uintptr_t, uintptr_t);
+extern long lx_sched_rr_get_interval(uintptr_t pid, uintptr_t);
+extern long lx_sched_getscheduler(uintptr_t);
+extern long lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_get_priority_min(uintptr_t);
+extern long lx_sched_get_priority_max(uintptr_t);
+
+extern long lx_xattr2(uintptr_t, uintptr_t);
+extern long lx_xattr3(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_xattr4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_keyctl(void);
+
+extern long lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_msgget(key_t, int);
+extern long lx_msgsnd(int, void *, size_t, int);
+extern long lx_msgrcv(int, void *, size_t, long, int);
+extern long lx_msgctl(int, int, void *);
+extern long lx_semget(key_t, int, int);
+extern long lx_semop(int, void *, size_t);
+extern long lx_semtimedop(int, void *, size_t, struct timespec *);
+extern long lx_semctl(int, int, int, void *);
+extern long lx_shmget(key_t, size_t, int);
+extern long lx_shmat(int, void *, int);
+extern long lx_shmctl(int, int, void *);
+
+extern long lx_prctl(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_alarm(unsigned int);
+extern long lx_close(int);
+extern long lx_chdir(const char *);
+extern long lx_chroot(const char *);
+extern long lx_creat(const char *, mode_t);
+extern long lx_dup(int);
+extern long lx_epoll_pwait(int, void *, int, int, const sigset_t *);
+extern long lx_epoll_create(int);
+extern long lx_epoll_create1(int);
+extern long lx_epoll_wait(int, void *, int, int);
+extern long lx_fchdir(int);
+extern long lx_fchmod(int, mode_t);
+extern long lx_getgid(void);
+extern long lx_getgroups(int, gid_t *);
+extern long lx_getitimer(int, struct itimerval *);
+extern long lx_getuid(void);
+extern long lx_inotify_add_watch(int, const char *, uint32_t);
+extern long lx_inotify_init(void);
+extern long lx_inotify_init1(int);
+extern long lx_inotify_rm_watch(int, int);
+extern long lx_lchown(const char *, uid_t, gid_t);
+extern long lx_mincore(caddr_t, size_t, char *);
+extern long lx_mkdir(const char *, mode_t);
+extern long lx_munmap(void *, size_t);
+extern long lx_nanosleep(const struct timespec *, struct timespec *);
+extern long lx_nice(int);
+extern long lx_pause(void);
+extern long lx_setgid(gid_t);
+extern long lx_setuid(uid_t);
+extern long lx_setregid(gid_t, gid_t);
+extern long lx_setreuid(uid_t, uid_t);
+extern long lx_shmdt(char *);
+extern long lx_stime(const time_t *);
+extern long lx_symlink(const char *, const char *);
+extern long lx_syslog(int, char *, int);
+extern long lx_sysinfo32(uintptr_t);
+extern long lx_umask(mode_t);
+extern long lx_utimes(const char *, const struct timeval *);
+extern long lx_write(int, const void *, size_t);
+extern long lx_yield(void);
+
+#endif /* !defined(_ASM) */
+
+/*
+ * Constants for the In-Kernel Emulation table.
+ */
+#define LX_EMUL_getpid 1
+#define LX_EMUL_kill 2
+#define LX_EMUL_pipe 3
+#define LX_EMUL_brk 4
+#define LX_EMUL_getppid 5
+#define LX_EMUL_sysinfo 6
+#define LX_EMUL_clone 7
+#define LX_EMUL_modify_ldt 8
+#define LX_EMUL_sched_setparam 9
+#define LX_EMUL_sched_getparam 10
+#define LX_EMUL_sched_rr_get_interval 11
+#define LX_EMUL_setresuid16 12
+#define LX_EMUL_setresgid16 13
+#define LX_EMUL_rt_sigqueueinfo 14
+#define LX_EMUL_setgroups 15
+#define LX_EMUL_setresuid 16
+#define LX_EMUL_setresgid 17
+#define LX_EMUL_gettid 18
+#define LX_EMUL_tkill 19
+#define LX_EMUL_futex 20
+#define LX_EMUL_set_thread_area 21
+#define LX_EMUL_get_thread_area 22
+#define LX_EMUL_set_tid_address 23
+#define LX_EMUL_pipe2 24
+#define LX_EMUL_rt_tgsigqueueinfo 25
+#define LX_EMUL_arch_prctl 26
+#define LX_EMUL_tgkill 27
+#define LX_EMUL_read 28
+#define LX_EMUL_ioctl LX_N_IKE_FUNCS
+
+/* Note: adjust LX_N_IKE_FUNCS when adding new in-kernel functions */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SYSCALL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
new file mode 100644
index 0000000000..9cd9cdedb7
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
@@ -0,0 +1,222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_SYSV_IPC_H
+#define _LX_SYSV_IPC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * msg-related definitions.
+ */
+#define LX_IPC_CREAT 00001000
+#define LX_IPC_EXCL 00002000
+#define LX_IPC_NOWAIT 00004000
+
+#define LX_IPC_RMID 0
+#define LX_IPC_SET 1
+#define LX_IPC_STAT 2
+#define LX_IPC_INFO 3
+
+#define LX_IPC_64 0x0100
+
+#define LX_SEMOP 1
+#define LX_SEMGET 2
+#define LX_SEMCTL 3
+#define LX_MSGSND 11
+#define LX_MSGRCV 12
+#define LX_MSGGET 13
+#define LX_MSGCTL 14
+#define LX_SHMAT 21
+#define LX_SHMDT 22
+#define LX_SHMGET 23
+#define LX_SHMCTL 24
+
+#define LX_MSG_STAT 11
+#define LX_MSG_INFO 12
+
+#define LX_MSG_NOERROR 010000
+
+/*
+ * Linux hard codes the maximum msgbuf length to be 8192 bytes. Really.
+ */
+#define LX_MSGMAX 8192
+
+struct lx_ipc_perm {
+ key_t key;
+ uid_t uid;
+ uid_t gid;
+ uid_t cuid;
+ uid_t cgid;
+ ushort_t mode;
+ ushort_t _pad1;
+ ushort_t seq;
+ ushort_t _pad2;
+ ulong_t _unused1;
+ ulong_t _unused2;
+};
+
+struct lx_msqid_ds {
+ struct lx_ipc_perm msg_perm;
+ time_t msg_stime;
+#if defined(_ILP32)
+ ulong_t _unused1;
+#endif
+ time_t msg_rtime;
+#if defined(_ILP32)
+ ulong_t _unused2;
+#endif
+ time_t msg_ctime;
+#if defined(_ILP32)
+ ulong_t _unused3;
+#endif
+ ulong_t msg_cbytes;
+ ulong_t msg_qnum;
+ ulong_t msg_qbytes;
+ pid_t msg_lspid;
+ pid_t msg_lrpid;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_msginfo {
+ int msgpool;
+ int msgmap;
+ int msgmax;
+ int msgmnb;
+ int msgmni;
+ int msgssz;
+ int msgtql;
+ ushort_t msgseg;
+};
+
+/*
+ * semaphore-related definitions.
+ */
+#define LX_GETPID 11
+#define LX_GETVAL 12
+#define LX_GETALL 13
+#define LX_GETNCNT 14
+#define LX_GETZCNT 15
+#define LX_SETVAL 16
+#define LX_SETALL 17
+#define LX_SEM_STAT 18
+#define LX_SEM_INFO 19
+#define LX_SEM_UNDO 0x1000
+#define LX_SEMVMX 32767
+
+struct lx_semid_ds {
+ struct lx_ipc_perm sem_perm;
+ time_t sem_otime;
+ ulong_t _unused1;
+ time_t sem_ctime;
+ ulong_t _unused2;
+ ulong_t sem_nsems;
+ ulong_t _unused3;
+ ulong_t _unused4;
+};
+
+struct lx_seminfo {
+ int semmap;
+ int semmni;
+ int semmns;
+ int semmnu;
+ int semmsl;
+ int semopm;
+ int semume;
+ int semusz;
+ int semvmx;
+ int semaem;
+};
+
+union lx_semun {
+ int val;
+ struct lx_semid_ds *semds;
+ ushort_t *sems;
+ struct lx_seminfo *info;
+ uintptr_t dummy;
+};
+
+/*
+ * shm-related definitions
+ */
+#define LX_SHM_LOCKED 02000
+#define LX_SHM_RDONLY 010000
+#define LX_SHM_RND 020000
+#define LX_SHM_REMAP 040000
+
+#define LX_SHM_LOCK 11
+#define LX_SHM_UNLOCK 12
+#define LX_SHM_STAT 13
+#define LX_SHM_INFO 14
+
+struct lx_shmid_ds {
+ struct lx_ipc_perm shm_perm;
+ size_t shm_segsz;
+ time_t shm_atime;
+#if defined(_ILP32)
+ ulong_t _unused1;
+#endif
+ time_t shm_dtime;
+#if defined(_ILP32)
+ ulong_t _unused2;
+#endif
+ time_t shm_ctime;
+#if defined(_ILP32)
+ ulong_t _unused3;
+#endif
+ pid_t shm_cpid;
+ pid_t shm_lpid;
+ ushort_t shm_nattch;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_shm_info {
+ int used_ids;
+ ulong_t shm_tot;
+ ulong_t shm_rss;
+ ulong_t shm_swp;
+ ulong_t swap_attempts;
+ ulong_t swap_successes;
+};
+
+struct lx_shminfo {
+ long shmmax;
+ long shmmin;
+ long shmmni;
+ long shmseg;
+ long shmall;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSV_IPC_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
new file mode 100644
index 0000000000..d1a17f1d89
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_THREAD_H
+#define _SYS_LX_THREAD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <thread.h>
+
+typedef struct lx_tsd {
+#if defined(_ILP32)
+ /* 32-bit thread-specific Linux %gs value */
+ uintptr_t lxtsd_gs;
+#endif
+ int lxtsd_exit;
+ int lxtsd_exit_status;
+ ucontext_t lxtsd_exit_context;
+} lx_tsd_t;
+
+extern thread_key_t lx_tsd_key;
+
+extern void lx_swap_gs(long, long *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_THREAD_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h
new file mode 100644
index 0000000000..a56fe8eeb3
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_THUNK_SERVER_H
+#define _LX_THUNK_SERVER_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <netdb.h>
+#include <procfs.h>
+
+/*
+ * Binary that should be exec'd to start up the thunking server
+ */
+#define LXT_SERVER_BINARY "/native/usr/lib/brand/lx/lx_thunk"
+
+/*
+ * When the thunking server is started it will need to communicate
+ * to the client via two fifos. These fifos will be passed to the
+ * thunking server via the following file descriptors:
+ */
+#define LXT_SERVER_FIFO_RD_FD 3
+#define LXT_SERVER_FIFO_WR_FD 4
+
+/*
+ * Operations supported by the thunking server
+ */
+#define LXT_SERVER_OP_MIN 0
+#define LXT_SERVER_OP_PING 0
+#define LXT_SERVER_OP_NAME2HOST 1
+#define LXT_SERVER_OP_ADDR2HOST 2
+#define LXT_SERVER_OP_NAME2SERV 3
+#define LXT_SERVER_OP_PORT2SERV 4
+#define LXT_SERVER_OP_OPENLOG 5
+#define LXT_SERVER_OP_SYSLOG 6
+#define LXT_SERVER_OP_CLOSELOG 7
+#define LXT_SERVER_OP_MAX 8
+
+/*
+ * Macros used to translate pointer into offsets for when they are
+ * being transmitted between the client and server processes.
+ *
+ * NOTE: We're going to add 1 to every offset value. The reason
+ * for this is that some of the pointers we're converting to offsets are
+ * stored in NULL terminated arrays, and if one of the members of
+ * one of these arrays happened to be at the beginning of the storage
+ * buffer it would have an offset of 0 and when the client tries to
+ * translate the offsets back into pointers it wouldn't be able
+ * to differentiate between the 0 offset from the end of the array.
+ */
+#define LXT_PTR_TO_OFFSET(ptr, base) \
+ ((void *)((uintptr_t)(ptr) - (uintptr_t)(base) + 1))
+#define LXT_OFFSET_TO_PTR(offset, base) \
+ ((void *)((uintptr_t)(offset) + (uintptr_t)(base) - 1))
+
+/*
+ * Structures passed to the thunking server via door calls
+ */
+typedef struct lxt_server_arg {
+ int lxt_sa_op;
+ int lxt_sa_success;
+ int lxt_sa_errno;
+ char lxt_sa_data[1];
+} lxt_server_arg_t;
+
+typedef struct lxt_gethost_arg {
+ struct hostent lxt_gh_result;
+
+ int lxt_gh_h_errno;
+
+ int lxt_gh_type;
+ int lxt_gh_token_len;
+ int lxt_gh_buf_len;
+
+ int lxt_gh_storage_len;
+ char lxt_gh_storage[1];
+} lxt_gethost_arg_t;
+
+typedef struct lxt_getserv_arg {
+ struct servent lxt_gs_result;
+
+ int lxt_gs_token_len;
+ int lxt_gs_buf_len;
+ char lxt_gs_proto[5];
+
+ int lxt_gs_storage_len;
+ char lxt_gs_storage[1];
+} lxt_getserv_arg_t;
+
+typedef struct lxt_openlog_arg {
+ int lxt_ol_logopt;
+ int lxt_ol_facility;
+ char lxt_ol_ident[128];
+} lxt_openlog_arg_t;
+
+typedef struct lxt_syslog_arg {
+ int lxt_sl_priority;
+ pid_t lxt_sl_pid;
+ char lxt_sl_progname[PRFNSZ];
+ char lxt_sl_message[1024];
+} lxt_syslog_arg_t;
+
+
+/*
+ * Functions called by the brand library to manage startup of the
+ * thunk server process.
+ */
+void lxt_server_init(int, char *[]);
+int lxt_server_pid(int *pid);
+void lxt_server_exec_check(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_THUNK_SERVER_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h
new file mode 100644
index 0000000000..f69ed45820
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h
@@ -0,0 +1,109 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_TYPES_H
+#define _SYS_LX_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHRT_MIN (-32768) /* min value of a "short int" */
+#define SHRT_MAX 32767 /* max value of a "short int" */
+#define USHRT_MAX 65535 /* max of "unsigned short int" */
+#define INT_MIN (-2147483647-1) /* min value of an "int" */
+#define INT_MAX 2147483647 /* max value of an "int" */
+#define UINT_MAX 4294967295U /* max value of an "unsigned int" */
+
+#if defined(_LP64)
+#define LONG_MAX 9223372036854775807L
+#else
+#define LONG_MAX 2147483647L /* max value of a 32-bit "long int" */
+#endif
+
+#define LX_SYS_UTS_LN 65
+
+struct lx_utsname {
+ char sysname[LX_SYS_UTS_LN];
+ char nodename[LX_SYS_UTS_LN];
+ char release[LX_SYS_UTS_LN];
+ char version[LX_SYS_UTS_LN];
+ char machine[LX_SYS_UTS_LN];
+ char domainname[LX_SYS_UTS_LN];
+};
+
+typedef uint64_t lx_dev_t;
+typedef uint16_t lx_dev16_t;
+typedef uint32_t lx_ino_t;
+typedef uint64_t lx_ino64_t;
+typedef uint32_t lx_uid_t;
+typedef uint16_t lx_uid16_t;
+typedef uint32_t lx_gid_t;
+typedef uint16_t lx_gid16_t;
+typedef uint32_t lx_off_t;
+typedef uint64_t lx_off64_t;
+typedef uint32_t lx_blksize_t;
+typedef uint32_t lx_blkcnt_t;
+typedef uint64_t lx_blkcnt64_t;
+typedef uint32_t lx_mode_t;
+typedef uint16_t lx_mode16_t;
+
+#define LX_UID16_TO_UID32(uid16) \
+ (((uid16) == (lx_uid16_t)-1) ? ((lx_uid_t)-1) : (lx_uid_t)(uid16))
+
+#define LX_GID16_TO_GID32(gid16) \
+ (((gid16) == (lx_gid16_t)-1) ? ((lx_gid_t)-1) : (lx_gid_t)(gid16))
+
+/* Overflow values default to NFS nobody. */
+
+#define UID16_OVERFLOW ((lx_uid16_t)65534)
+#define GID16_OVERFLOW ((lx_gid16_t)65534)
+
+/*
+ * All IDs with high word non-zero are converted to default overflow values to
+ * avoid inadvertent truncation to zero (root) (!).
+ */
+#define LX_UID32_TO_UID16(uid32) \
+ ((((uid32) & 0xffff0000) == 0) ? ((lx_uid16_t)(uid32)) : \
+ (((uid32) == ((lx_uid_t)-1)) ? ((lx_uid16_t)-1) : UID16_OVERFLOW))
+
+#define LX_GID32_TO_GID16(gid32) \
+ ((((gid32) & 0xffff0000) == 0) ? ((lx_gid16_t)(gid32)) : \
+ (((gid32) == ((lx_gid_t)-1)) ? ((lx_gid16_t)-1) : GID16_OVERFLOW))
+
+struct lx_timespec {
+ time_t ts_sec;
+ long ts_nsec;
+};
+
+#define LX_32TO64(lo, hi) \
+ ((uint64_t)((uint64_t)(lo) | ((uint64_t)(hi) << 32)))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_TYPES_H */
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile
new file mode 100644
index 0000000000..f69dcec561
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS = $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com
new file mode 100644
index 0000000000..a0fd9da3fe
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_nametoaddr.a
+VERS = .1
+
+COBJS = lx_nametoaddr.o
+OBJECTS = $(COBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+LX_THUNK = ../../lx_thunk
+
+ASFLAGS += -P -D_ASM
+LDLIBS += -lc -lnsl
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(LX_THUNK)
+DYNFLAGS += $(MAPOPTS) '-R$$ORIGIN'
+
+LIBS = $(DYNLIB)
+
+LINTFLAGS += $(LX_THUNK)/$(MACH)/llib-llx_thunk.ln
+LINTFLAGS64 += $(LX_THUNK)/$(MACH64)/llib-llx_thunk.ln
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+.KEEP_STATE:
+
+all: $(DYNLIB)
+
+lint: lintcheck
+
+include ../../../../Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile
new file mode 100644
index 0000000000..a526d34834
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += $(LX_THUNK)/$(MACH64)/lx_thunk.so.1
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c
new file mode 100644
index 0000000000..4b6a0532a9
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c
@@ -0,0 +1,479 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * BrandZ lx name services translation library.
+ *
+ * This library is specified as the default name services translation
+ * library in a custom netconfig(4) file that is only used when running
+ * native solaris processes in a Linux branded zone.
+ *
+ * What this means it that when a native solaris process runs in a
+ * Linux branded zone and issues a name service request to libnsl.so
+ * (either directly or indirectly via any libraries the program may
+ * be linked against) libnsl.so will dlopen(3c) this library and call
+ * into it to service these requests.
+ *
+ * This library is in turn linked against lx_thunk.so and will attempt
+ * to call interfaces in lx_thunk.so to resolve these requests. The
+ * functions that are called in lx_thunk.so are designed to have the
+ * same signature and behavior as the existing solaris name service
+ * interfaces. The name services interfaces we call are:
+ *
+ * Native Interface -> lx_thunk.so Interface
+ * ---------------- -> ---------------------
+ * gethostbyname_r -> lxt_gethostbyname_r
+ * gethostbyaddr_r -> lxt_gethostbyaddr_r
+ * getservbyname_r -> lxt_getservbyname_r
+ * getservbyport_r -> lxt_getservbyport_r
+ *
+ * This library also uses one additional interface from lx_thunk.so:
+ * lxt_debug
+ * Information debugging messages are sent to lx_thunk.so via this
+ * interface and that library can decided if it wants to drop the
+ * messages or output them somewhere.
+ */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netdir.h>
+#include <nss_dbdefs.h>
+#include <rpc/clnt.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/wait.h>
+#include <thread.h>
+#include <tiuser.h>
+#include <unistd.h>
+#include <sys/lx_thunk.h>
+
+
+/*
+ * Private nametoaddr library interfaces.
+ */
+static int
+netconfig_is_ipv4(struct netconfig *config)
+{
+ int i;
+ /*
+ * If we look at the rpc services registered on a Linux system
+ * (this can be done via rpcinfo(1M)) for both on the loopback
+ * interface and on any remote interfaces we only see services
+ * registered for tcp and udp. So here we'll limit our support
+ * to these transports.
+ */
+ char *ipv4_netids[] = {
+ "tcp",
+ "udp",
+ NULL
+ };
+
+ for (i = 0; ipv4_netids[i] != NULL; i++) {
+ if (strcmp(ipv4_netids[i], config->nc_netid) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Public nametoaddr library interfaces.
+ *
+ * These are the functional entry points that libnsl will lookup (via
+ * the symbol names) when it loads this nametoaddr translation library.
+ */
+
+/*
+ * _netdir_getbyname() returns all of the addresses for
+ * a specified host and service.
+ */
+struct nd_addrlist *
+_netdir_getbyname(struct netconfig *netconfigp,
+ struct nd_hostserv *nd_hostservp)
+{
+ struct nd_addrlist *rp = NULL;
+ struct netbuf *nbp = NULL;
+ struct sockaddr_in *sap = NULL;
+ struct hostent n2h_result;
+ struct servent n2s_result;
+ char *n2h_buf = NULL, *n2s_buf = NULL;
+ int h_errno, i, host_self = 0, r_count;
+ int n2h_count = 0, n2s_count = 0;
+
+ lxt_debug("_netdir_getbyname: request recieved\n");
+
+ /* Make sure this is an ipv4 request. */
+ if (!netconfig_is_ipv4(netconfigp)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /* Allocate memory for the queries. */
+ if (((n2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) ||
+ ((n2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL))
+ goto malloc_fail;
+
+ /* Check if the host name specified is HOST_SELF. */
+ if (strcmp(nd_hostservp->h_host, HOST_SELF) == 0)
+ host_self = 1;
+
+ /*
+ * If the hostname specified is HOST_SELF, the we're just
+ * just doing a service lookup so don't bother with trying
+ * to lookup the host name.
+ */
+ if (!host_self) {
+ /* Resolve the hostname. */
+ lxt_debug("_netdir_getbyname: "
+ "resolving host name: %s\n", nd_hostservp->h_host);
+ if (lxt_gethostbyname_r(nd_hostservp->h_host, &n2h_result,
+ n2h_buf, NSS_BUFLEN_HOSTS, &h_errno) == NULL) {
+ if (errno == ERANGE) {
+ _nderror = ND_SYSTEM;
+ } else if (h_errno == HOST_NOT_FOUND) {
+ _nderror = ND_NOHOST;
+ } else if (h_errno == TRY_AGAIN) {
+ _nderror = ND_TRY_AGAIN;
+ } else if (h_errno == NO_RECOVERY) {
+ _nderror = ND_NO_RECOVERY;
+ } else if (h_errno == NO_DATA) {
+ _nderror = ND_NO_DATA;
+ } else {
+ _nderror = ND_SYSTEM;
+ }
+ goto fail;
+ }
+ while (n2h_result.h_addr_list[n2h_count++] != NULL);
+ n2h_count--;
+ }
+
+ if (nd_hostservp->h_serv != NULL) {
+ /* Resolve the service name */
+ lxt_debug("_netdir_getbyname: "
+ "resolving service name: %s\n", nd_hostservp->h_serv);
+ if (lxt_getservbyname_r(nd_hostservp->h_serv,
+ netconfigp->nc_proto, &n2s_result,
+ n2s_buf, NSS_BUFLEN_SERVICES) == NULL) {
+ _nderror = ND_SYSTEM;
+ goto fail;
+ }
+ n2s_count = 1;
+ }
+
+ /* Make sure we got some results. */
+ if ((n2h_count + n2s_count) == 0) {
+ lxt_debug("_netdir_getbyname: no results!\n");
+ goto exit;
+ }
+ r_count = (n2h_count != 0) ? n2h_count : 1;
+
+ /*
+ * Allocate the return buffers. These buffers will be free'd
+ * by libnsl`netdir_free(), so we need to allocate them in the
+ * way that libnsl`netdir_free() expects.
+ */
+ if (((rp = calloc(1, sizeof (struct nd_addrlist))) == NULL) ||
+ ((nbp = calloc(1, sizeof (struct netbuf) * r_count)) == NULL) ||
+ ((sap = calloc(1, sizeof (struct sockaddr_in) * r_count)) == NULL))
+ goto malloc_fail;
+
+ /* Initialize the structures we're going to return. */
+ rp->n_cnt = r_count;
+ rp->n_addrs = nbp;
+ for (i = 0; i < r_count; i++) {
+
+ /* Initialize the netbuf. */
+ nbp[i].maxlen = nbp[i].len = sizeof (struct sockaddr_in);
+ nbp[i].buf = (char *)&sap[i];
+
+ /* Initialize the sockaddr_in. */
+ sap[i].sin_family = AF_INET;
+
+ /* If we looked up any host address copy them out. */
+ if (!host_self)
+ bcopy(n2h_result.h_addr_list[i], &sap[i].sin_addr,
+ sizeof (sap[i].sin_addr));
+
+ /* If we looked up any service ports copy them out. */
+ if (nd_hostservp->h_serv != NULL)
+ sap[i].sin_port = n2s_result.s_port;
+ }
+
+ /* We're finally done. */
+ lxt_debug("_netdir_getbyname: success\n");
+ return (rp);
+
+malloc_fail:
+ _nderror = ND_NOMEM;
+
+fail:
+ lxt_debug("_netdir_getbyname: failed!\n");
+
+exit:
+ if (n2h_buf == NULL)
+ free(n2h_buf);
+ if (n2s_buf == NULL)
+ free(n2s_buf);
+ if (rp == NULL)
+ free(rp);
+ if (nbp == NULL)
+ free(nbp);
+ if (sap == NULL)
+ free(sap);
+ return (NULL);
+}
+
+/*
+ * _netdir_getbyaddr() takes an address (hopefully obtained from
+ * someone doing a _netdir_getbyname()) and returns all hosts with
+ * that address.
+ */
+struct nd_hostservlist *
+/*ARGSUSED*/
+_netdir_getbyaddr(struct netconfig *netconfigp, struct netbuf *nbp)
+{
+ struct nd_hostservlist *rp = NULL;
+ struct nd_hostserv *hsp = NULL;
+ struct sockaddr_in *sap;
+ struct servent p2s_result;
+ struct hostent a2h_result;
+ char *a2h_buf = NULL, *p2s_buf = NULL;
+ int h_errno, i;
+ int r_count = 0;
+ int a2h_count = 0, p2s_count = 0;
+
+ lxt_debug("_netdir_getbyaddr: request recieved\n");
+
+ /* Make sure this is an ipv4 request. */
+ if (!netconfig_is_ipv4(netconfigp)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /*
+ * Make sure the netbuf contains one struct sockaddr_in of
+ * type AF_INET.
+ */
+ if ((nbp->len != sizeof (struct sockaddr_in)) ||
+ (nbp->len < nbp->maxlen)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+ /*LINTED*/
+ sap = (struct sockaddr_in *)nbp->buf;
+ if (sap->sin_family != AF_INET) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /* Allocate memory for the queries. */
+ if (((a2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) ||
+ ((p2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL))
+ goto malloc_fail;
+
+ if (sap->sin_addr.s_addr != INADDR_ANY) {
+ lxt_debug("_netdir_getbyaddr: "
+ "resolving host address: 0x%x\n", sap->sin_addr.s_addr);
+ if (lxt_gethostbyaddr_r((char *)&sap->sin_addr.s_addr,
+ sizeof (sap->sin_addr.s_addr), AF_INET,
+ &a2h_result, a2h_buf, NSS_BUFLEN_HOSTS,
+ &h_errno) == NULL) {
+ if (errno == ERANGE) {
+ _nderror = ND_SYSTEM;
+ } else if (h_errno == HOST_NOT_FOUND) {
+ _nderror = ND_NOHOST;
+ } else if (h_errno == TRY_AGAIN) {
+ _nderror = ND_TRY_AGAIN;
+ } else if (h_errno == NO_RECOVERY) {
+ _nderror = ND_NO_RECOVERY;
+ } else if (h_errno == NO_DATA) {
+ _nderror = ND_NO_DATA;
+ } else {
+ _nderror = ND_SYSTEM;
+ }
+ goto fail;
+ }
+ while (a2h_result.h_aliases[a2h_count++] != NULL);
+ /*
+ * We need to count a2h_result.h_name as a valid name for
+ * for the address we just looked up. Of course a2h_count
+ * is actually over estimated by one, so instead of
+ * decrementing it here we'll just leave it as it to
+ * account for a2h_result.h_name.
+ */
+ }
+
+ if (sap->sin_port != 0) {
+ lxt_debug("_netdir_getbyaddr: "
+ "resolving service port: 0x%x\n", sap->sin_port);
+ if (lxt_getservbyport_r(sap->sin_port,
+ netconfigp->nc_proto, &p2s_result,
+ p2s_buf, NSS_BUFLEN_SERVICES) == NULL) {
+ _nderror = ND_SYSTEM;
+ goto fail;
+ }
+ p2s_count = 1;
+ }
+
+ /* Make sure we got some results. */
+ if ((a2h_count + p2s_count) == 0) {
+ lxt_debug("_netdir_getbyaddr: no results!\n");
+ goto exit;
+ }
+ r_count = (a2h_count != 0) ? a2h_count : 1;
+
+ /*
+ * Allocate the return buffers. These buffers will be free'd
+ * by libnsl`netdir_free(), so we need to allocate them in the
+ * way that libnsl`netdir_free() expects.
+ */
+ if (((rp = calloc(1, sizeof (struct nd_hostservlist))) == NULL) ||
+ ((hsp = calloc(1, sizeof (struct nd_hostserv) * r_count)) == NULL))
+ goto malloc_fail;
+
+ lxt_debug("_netdir_getbyaddr: hahaha0 - %d\n", r_count);
+ rp->h_cnt = r_count;
+ rp->h_hostservs = hsp;
+ for (i = 0; i < r_count; i++) {
+ /* If we looked up any host names copy them out. */
+ lxt_debug("_netdir_getbyaddr: hahaha1 - %d\n", r_count);
+ if ((a2h_count > 0) && (i == 0) &&
+ ((hsp[i].h_host = strdup(a2h_result.h_name)) == NULL))
+ goto malloc_fail;
+
+ if ((a2h_count > 0) && (i > 0) &&
+ ((hsp[i].h_host =
+ strdup(a2h_result.h_aliases[i - 1])) == NULL))
+ goto malloc_fail;
+
+ lxt_debug("_netdir_getbyaddr: hahaha2 - %d\n", r_count);
+ /* If we looked up any service names copy them out. */
+ if ((p2s_count > 0) &&
+ ((hsp[i].h_serv = strdup(p2s_result.s_name)) == NULL))
+ goto malloc_fail;
+ lxt_debug("_netdir_getbyaddr: hahaha3 - %d\n", r_count);
+ }
+
+ /* We're finally done. */
+ lxt_debug("_netdir_getbyaddr: success\n");
+ return (rp);
+
+malloc_fail:
+ _nderror = ND_NOMEM;
+
+fail:
+ lxt_debug("_netdir_getbyaddr: failed!\n");
+
+exit:
+ if (a2h_buf == NULL)
+ free(a2h_buf);
+ if (p2s_buf == NULL)
+ free(p2s_buf);
+ if (rp == NULL)
+ free(rp);
+ if (hsp != NULL) {
+ for (i = 0; i < r_count; i++) {
+ if (hsp[i].h_host != NULL)
+ free(hsp[i].h_host);
+ if (hsp[i].h_serv != NULL)
+ free(hsp[i].h_serv);
+ }
+ free(hsp);
+ }
+ return (NULL);
+}
+
+char *
+/* ARGSUSED */
+_taddr2uaddr(struct netconfig *netconfigp, struct netbuf *nbp)
+{
+ extern char *inet_ntoa_r();
+
+ struct sockaddr_in *sa;
+ char tmp[RPC_INET6_MAXUADDRSIZE];
+ unsigned short myport;
+
+ if (netconfigp == NULL || nbp == NULL || nbp->buf == NULL) {
+ _nderror = ND_BADARG;
+ return (NULL);
+ }
+
+ if (strcmp(netconfigp->nc_protofmly, NC_INET) != 0) {
+ /* we only support inet address translation */
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (NULL);
+ }
+
+ /* LINTED pointer cast */
+ sa = (struct sockaddr_in *)(nbp->buf);
+ myport = ntohs(sa->sin_port);
+ (void) inet_ntoa_r(sa->sin_addr, tmp);
+
+ (void) sprintf(tmp + strlen(tmp), ".%d.%d",
+ myport >> 8, myport & 255);
+ return (strdup(tmp)); /* Doesn't return static data ! */
+}
+
+/*
+ * _uaddr2taddr() translates a universal address back into a
+ * netaddr structure. Since the universal address is a string,
+ * put that into the TLI buffer (making sure to change all \ddd
+ * characters back and strip off the trailing \0 character).
+ */
+struct netbuf *
+/* ARGSUSED */
+_uaddr2taddr(struct netconfig *netconfigp, char *uaddr)
+{
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (NULL);
+}
+
+/*
+ * _netdir_options() is a "catch-all" routine that does
+ * transport specific things. The only thing that these
+ * routines have to worry about is ND_MERGEADDR.
+ */
+int
+/* ARGSUSED */
+_netdir_options(struct netconfig *netconfigp, int option, int fd, void *par)
+{
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers
new file mode 100644
index 0000000000..3ed165195b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ _netdir_getbyname;
+ _netdir_getbyaddr;
+ _taddr2uaddr;
+ _uaddr2taddr;
+ _netdir_options;
+
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile
new file mode 100644
index 0000000000..67545e46cd
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+DYNFLAGS += $(LX_THUNK)/$(MACH)/lx_thunk.so.1
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_support/Makefile b/usr/src/lib/brand/lx/lx_support/Makefile
new file mode 100644
index 0000000000..e7c958e13a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_support/Makefile
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = lx_support
+PROGS = $(PROG)
+OBJS = lx_support
+
+all: $(PROG)
+
+include ../Makefile.lx
+include $(SRC)/cmd/Makefile.cmd
+
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(OBJS) $(ROOTPROGS)
+
+UTSBASE = $(SRC)/uts
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx
+LDLIBS += -lzonecfg
+
+.KEEP_STATE:
+
+install: all $(ROOTPROGS)
+
+clean:
+ $(RM) $(PROG) $(OBJS)
+
+lint: lint_PROG
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c
new file mode 100644
index 0000000000..206fa2a9e9
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_support/lx_support.c
@@ -0,0 +1,542 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lx_support is a small cli utility used to perform some brand-specific
+ * tasks when booting, halting, or verifying a zone. This utility is not
+ * intended to be called by users - it is intended to be invoked by the
+ * zones utilities.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stropts.h>
+#include <sys/ioccom.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <libzonecfg.h>
+#include <sys/lx_audio.h>
+#include <sys/lx_brand.h>
+
+static void lxs_err(char *msg, ...) __NORETURN;
+static void usage(void) __NORETURN;
+
+#define CP_CMD "/usr/bin/cp"
+#define MOUNT_CMD "/sbin/mount"
+
+#define LXA_AUDIO_DEV "/dev/brand/lx/audio_devctl"
+#define INTSTRLEN 32
+#define KVSTRLEN 10
+
+static char *bname = NULL;
+static char *zonename = NULL;
+static char *zoneroot = NULL;
+
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+static void
+lxs_err(char *msg, ...)
+{
+ char buf[1024];
+ va_list ap;
+
+ va_start(ap, msg);
+ /*LINTED*/
+ (void) vsnprintf(buf, sizeof (buf), msg, ap);
+ va_end(ap);
+
+ (void) printf("%s error: %s\n", bname, buf);
+
+ exit(1);
+ /*NOTREACHED*/
+}
+
+/*
+ * The Linux init(1M) command requires communication over the /dev/initctl
+ * FIFO. Since any attempt to create a file in /dev will fail, we must
+ * create it here.
+ */
+static void
+lxs_make_initctl()
+{
+ char cmdbuf[ARG_MAX];
+ char path[MAXPATHLEN];
+ char special[MAXPATHLEN];
+ struct stat buf;
+ int err;
+
+ if (snprintf(special, sizeof (special), "%s/dev/initctl", zoneroot) >=
+ sizeof (special))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (snprintf(path, sizeof (path), "%s/root/dev/initctl", zoneroot) >=
+ sizeof (path))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ /* create the actual fifo as <zoneroot>/dev/initctl */
+ if (stat(special, &buf) != 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ if (mkfifo(special, 0644) < 0) {
+ err = errno;
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ }
+ } else {
+ if ((buf.st_mode & S_IFIFO) == 0)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ gettext("It already exists, and is not a FIFO."));
+ }
+
+ /*
+ * now lofs mount the <zoneroot>/dev/initctl fifo onto
+ * <zoneroot>/root/dev/initctl
+ */
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s -F lofs %s %s", MOUNT_CMD,
+ special, path) >= sizeof (cmdbuf))
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (system(cmdbuf) < 0) {
+ err = errno;
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ strerror(err));
+ }
+}
+
+/*
+ * fsck gets really confused when run inside a zone. Removing this file
+ * prevents it from running
+ */
+static void
+lxs_remove_autofsck()
+{
+ char path[MAXPATHLEN];
+ int err;
+
+ if (snprintf(path, MAXPATHLEN, "%s/root/.autofsck", zoneroot) >=
+ MAXPATHLEN)
+ lxs_err("%s: %s", gettext("Failed to remove /.autofsck"),
+ gettext("zoneroot is too long"));
+
+ if (unlink(path) < 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to remove /.autofsck"),
+ strerror(err));
+ }
+}
+
+/*
+ * Extract any lx-supported attributes from the zone configuration file.
+ */
+static void
+lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio,
+ char **idev, char **odev, char **kvers)
+{
+ struct zone_attrtab attrtab;
+ int err;
+
+ /* initialize the attribute iterator */
+ if (zonecfg_setattrent(zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("error accessing zone configuration"));
+ }
+
+ *idev = (char *)malloc(INTSTRLEN);
+ *odev = (char *)malloc(INTSTRLEN);
+ *kvers = (char *)malloc(KVSTRLEN);
+ if (*idev == NULL || *odev == NULL || *kvers == NULL)
+ lxs_err(gettext("out of memory"));
+
+ *audio = B_FALSE;
+ *restart = B_FALSE;
+ bzero(*idev, INTSTRLEN);
+ bzero(*odev, INTSTRLEN);
+ bzero(*kvers, KVSTRLEN);
+ while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) {
+ if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) &&
+ (zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio") == 0) &&
+ (zonecfg_get_attr_boolean(&attrtab, audio) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio-inputdev") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *idev,
+ INTSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio-outputdev") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *odev,
+ INTSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *kvers,
+ KVSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ }
+
+ if (strlen(*kvers) == 0) {
+ free(*kvers);
+ *kvers = NULL;
+ }
+
+ /* some kind of error while looking up attributes */
+ if (err != Z_NO_ENTRY)
+ lxs_err(gettext("error accessing zone configuration"));
+}
+
+static int
+lxs_iodev_ok(char *dev)
+{
+ int i, j;
+
+ if ((j = strlen(dev)) == 0)
+ return (1);
+ if (strcmp(dev, "default") == 0)
+ return (1);
+ if (strcmp(dev, "none") == 0)
+ return (1);
+ for (i = 0; i < j; i++) {
+ if (!isdigit(dev[i]))
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * The audio configuration settings are read from the zone configuration
+ * file. Audio configuration is specified via the following attributes
+ * (settable via zonecfg):
+ * attr name: audio
+ * attr type: boolean
+ *
+ * attr name: audio-inputdev
+ * attr type: string
+ * attr values: "none" | [0-9]+
+ *
+ * attr name: audio-outputdev
+ * attr type: string
+ * attr values: "none" | [0-9]+
+ *
+ * The user can enable linux brand audio device (ie /dev/dsp and /dev/mixer)
+ * for a zone by setting the "audio" attribute to true. (The absence of
+ * this attribute leads to an assumed value of false.)
+ *
+ * If the "audio" attribute is set to true and "audio-inputdev" and
+ * "audio-outputdev" are not set, then when a linux applications access
+ * audio devices these access will be mapped to the system default audio
+ * device, ie /dev/audio and/dev/audioctl.
+ *
+ * If "audio-inputdev" is set to none, then audio input will be disabled.
+ * If "audio-inputdev" is set to an integer, then when a Linux application
+ * attempts to access audio devices these access will be mapped to
+ * /dev/sound/<audio-inputdev attribute value>. The same behavior will
+ * apply to the "audio-outputdev" attribute for linux audio output
+ * device accesses.
+ *
+ * If "audio-inputdev" or "audio-outputdev" exist but the audio attribute
+ * is missing (or set to false) audio will not be enabled for the zone.
+ */
+static void
+lxs_init_audio(char *idev, char *odev)
+{
+ int err, fd;
+ lxa_zone_reg_t lxa_zr;
+
+ /* sanity check the input and output device properties */
+ if (!lxs_iodev_ok(idev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-inputdev");
+
+ if (!lxs_iodev_ok(odev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-outputdev");
+
+ /* initialize the zone name in the ioctl request */
+ bzero(&lxa_zr, sizeof (lxa_zr));
+ (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename,
+ sizeof (lxa_zr.lxa_zr_zone_name));
+
+ /* initialize the input device property in the ioctl request */
+ (void) strlcpy(lxa_zr.lxa_zr_inputdev, idev,
+ sizeof (lxa_zr.lxa_zr_inputdev));
+ if (lxa_zr.lxa_zr_inputdev[0] == '\0') {
+ /*
+ * if no input device was specified, set the input device
+ * to "default"
+ */
+ (void) strlcpy(lxa_zr.lxa_zr_inputdev, "default",
+ sizeof (lxa_zr.lxa_zr_inputdev));
+ }
+
+ /* initialize the output device property in the ioctl request */
+ (void) strlcpy(lxa_zr.lxa_zr_outputdev, odev,
+ sizeof (lxa_zr.lxa_zr_outputdev));
+ if (lxa_zr.lxa_zr_outputdev[0] == '\0') {
+ /*
+ * if no output device was specified, set the output device
+ * to "default"
+ */
+ (void) strlcpy(lxa_zr.lxa_zr_outputdev, "default",
+ sizeof (lxa_zr.lxa_zr_outputdev));
+ }
+
+ /* open the audio device control node */
+ if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0)
+ lxs_err(gettext("error accessing lx_audio device"));
+
+ /* enable audio for this zone */
+ err = ioctl(fd, LXA_IOC_ZONE_REG, &lxa_zr);
+ (void) close(fd);
+ if (err != 0)
+ lxs_err(gettext("error configuring lx_audio device"));
+}
+
+static int
+lxs_boot()
+{
+ zoneid_t zoneid;
+ zone_dochandle_t zdh;
+ boolean_t audio, restart;
+ char *idev, *odev, *kvers;
+
+ lxs_make_initctl();
+ lxs_remove_autofsck();
+
+ if ((zdh = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("unable to initialize zone handle"));
+
+ if (zonecfg_get_handle((char *)zonename, zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("unable to load zone configuration"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(zdh, &restart, &audio, &idev, &odev, &kvers);
+ zonecfg_fini_handle(zdh);
+
+ /* Configure the zone's audio support (if any). */
+ if (audio == B_TRUE)
+ lxs_init_audio(idev, odev);
+
+ /*
+ * Let the kernel know whether or not this zone's init process
+ * should be automatically restarted on its death.
+ */
+ if ((zoneid = getzoneidbyname(zonename)) < 0)
+ lxs_err(gettext("unable to get zoneid"));
+ if (zone_setattr(zoneid, LX_ATTR_RESTART_INIT, &restart,
+ sizeof (boolean_t)) == -1)
+ lxs_err(gettext("error setting zone's restart_init property"));
+
+ if (kvers != NULL) {
+ /* Backward compatability with incomplete version attr */
+ if (strcmp(kvers, "2.4") == 0) {
+ kvers = "2.4.21";
+ } else if (strcmp(kvers, "2.6") == 0) {
+ kvers = "2.6.18";
+ }
+
+ if (zone_setattr(zoneid, LX_KERN_VERSION_NUM, kvers,
+ strlen(kvers)) < 0)
+ lxs_err(gettext("unable to set kernel version"));
+ }
+
+ return (0);
+}
+
+static int
+lxs_halt()
+{
+ lxa_zone_reg_t lxa_zr;
+ int fd, rv;
+
+ /*
+ * We don't bother to check if audio is configured for this zone
+ * before issuing a request to unconfigure it. There's no real
+ * reason to do this, it would require looking up the xml zone and
+ * brand configuration information (which could have been changed
+ * since the zone was booted), and it would involve more library
+ * calls there by increasing chances for failure.
+ */
+
+ /* initialize the zone name in the ioctl request */
+ bzero(&lxa_zr, sizeof (lxa_zr));
+ (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename,
+ sizeof (lxa_zr.lxa_zr_zone_name));
+
+ /* open the audio device control node */
+ if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0)
+ lxs_err(gettext("error accessing lx_audio device"));
+
+ /*
+ * disable audio for this zone
+ *
+ * we ignore ENOENT errors here because it's possible that
+ * audio is not configured for this zone. (either it was
+ * already unconfigured or someone could have added the
+ * audio resource to this zone after it was booted.)
+ */
+ rv = ioctl(fd, LXA_IOC_ZONE_UNREG, &lxa_zr);
+ (void) close(fd);
+ if ((rv == 0) || (errno == ENOENT))
+ return (0);
+ lxs_err(gettext("error unconfiguring lx_audio device: %s"),
+ strerror(errno));
+ /*NOTREACHED*/
+ return (0);
+}
+
+static int
+lxs_verify(char *xmlfile)
+{
+ zone_dochandle_t handle;
+ boolean_t audio, restart;
+ char *idev, *odev, *kvers;
+ char hostidp[HW_HOSTID_LEN];
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("internal libzonecfg.so.1 error"), 0);
+
+ if (zonecfg_get_xml_handle(xmlfile, handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ /*
+ * Check to see whether the zone has hostid emulation enabled.
+ */
+ if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) == Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support hostid emulation"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(handle, &restart, &audio, &idev, &odev, &kvers);
+ zonecfg_fini_handle(handle);
+
+ if (audio) {
+ /* sanity check the input and output device properties */
+ if (!lxs_iodev_ok(idev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-inputdev");
+
+ if (!lxs_iodev_ok(odev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-outputdev");
+ }
+ if (kvers) {
+ if (strlen(kvers) > (LX_VERS_MAX - 1) ||
+ (strncmp(kvers, "2.4", 3) != 0 &&
+ strncmp(kvers, "2.6", 3) != 0 &&
+ strncmp(kvers, "3.", 2) != 0))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "kernel-version");
+ }
+ return (0);
+}
+
+static void
+usage()
+{
+
+ (void) fprintf(stderr,
+ gettext("usage:\t%s boot <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s halt <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s verify <xml file>\n\n"), bname);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+
+ bname = basename(argv[0]);
+
+ if (argc < 3)
+ usage();
+
+ if (strcmp(argv[1], "boot") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_boot());
+ }
+
+ if (strcmp(argv[1], "halt") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_halt());
+ }
+
+ if (strcmp(argv[1], "verify") == 0) {
+ if (argc != 3)
+ lxs_err(gettext("usage: %s verify <xml file>"),
+ bname);
+ return (lxs_verify(argv[2]));
+ }
+
+ usage();
+ /*NOTREACHED*/
+}
diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile b/usr/src/lib/brand/lx/lx_thunk/Makefile
new file mode 100644
index 0000000000..f69dcec561
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/Makefile
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS = $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile.com b/usr/src/lib/brand/lx/lx_thunk/Makefile.com
new file mode 100644
index 0000000000..75629a6d61
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/Makefile.com
@@ -0,0 +1,74 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_thunk.a
+VERS = .1
+
+COBJS = lx_thunk.o
+OBJECTS = $(COBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+#
+# Since our name doesn't start with "lib", Makefile.lib incorrectly
+# calculates LIBNAME. Therefore, we set it here.
+#
+LIBNAME = lx_thunk
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+ASFLAGS += -P -D_ASM
+LDLIBS += -lc
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I ../../lx_brand \
+ -I$(UTSBASE)/common/brand/lx
+
+# lx_think.so.1 interposes on a number of libc.so.1 routines.
+DYNFLAGS += $(MAPOPTS) $(ZINTERPOSE)
+
+LIBS = $(DYNLIB)
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+.KEEP_STATE:
+
+all: $(DYNLIB)
+
+lint: $(LINTLIB) lintcheck
+
+include ../../../../Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile
new file mode 100644
index 0000000000..dbb283dff1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c
new file mode 100644
index 0000000000..c81bae913b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c
@@ -0,0 +1,1115 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The BrandZ Linux thunking library.
+ *
+ * The interfaces defined in this file form the client side of a bridge
+ * to allow native Solaris process to access Linux services. Currently
+ * the Linux services that is made accessible by these interfaces here
+ * are:
+ * - Linux host <-> address naming services
+ * - Linux service <-> port naming services
+ * - Linux syslog
+ *
+ * Currently, to use this library it must be LD_PRELOADed into the
+ * application that needs to access Linux services. Once loaded
+ * Linux services are accessed by the client application in two
+ * different ways:
+ *
+ * - Direct library calls:
+ * lxt_gethostbyname_r
+ * lxt_gethostbyaddr_r
+ * lxt_getservbyname_r
+ * lxt_getservbyport_r
+ * lxt_debug
+ *
+ * These library functions are used by the BrandZ lx name services
+ * translation library (lx_nametoaddr.so) to handle libnsl.so name
+ * service requests.
+ *
+ * - Intercepted library calls:
+ * openlog(3c)
+ * syslog(3c)
+ * vsyslog(3c)
+ * closelog(3c)
+ *
+ * Via the LD_PRELOAD mechanism this library interposes itself on
+ * these interfaces and when the application calls these interfaces
+ * (either directly or indirectly via any libraries the program may
+ * be linked against) this library intercepts the request and passes
+ * it onto a Linux process to handle the request.
+ *
+ * Once this library receives a request that needs to be serviced by a
+ * Linux process, it packs up that request and attempts to send it
+ * to a doors server. The door server interfaces are defined in
+ * lx_thunk_server.h. If the doors server is not running or not
+ * responding, this library will attempt to spawn a new doors server
+ * by forking and executing the following shell script (which runs as
+ * a native /bin/sh Linux process):
+ * /native/usr/lib/brand/lx/lx_thunk
+ *
+ * Notes:
+ * - This library also intercepts the following system calls:
+ * close(2) - We intercept close(2) to prevent the caller from
+ * accidentally closing any of the file descriptors we
+ * need to do our work.
+ *
+ * setppriv(2) - We intercept setppriv(2) to prevent a process
+ * from dropping any of the privileges we'll need to create
+ * a new lx_thunk server process and to deal with service
+ * requests.
+ *
+ * - To facilitate the running of native Solaris programs and libraries
+ * when this library is preloaded into an application it will chroot()
+ * into /native. This way the Solaris application and libraries can
+ * access files via their expected paths and we can avoid having to
+ * either do path mapping or modifying all libraries to make them
+ * aware of "/native" so that they can pre-pend it to all their
+ * filesystem operations.
+ *
+ * - This library can only be used with processes that are initially
+ * run by root in a zone. The reason is that we use the chroot()
+ * system call and this requires the PRIV_PROC_CHROOT privilege,
+ * which non-root users don't have.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netdir.h>
+#include <priv.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <synch.h>
+#include <sys/brand.h>
+#include <sys/fcntl.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/lx_thunk.h>
+#include <sys/mman.h>
+#include <sys/priv_impl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <thread.h>
+#include <unistd.h>
+#include <sys/varargs.h>
+
+#define LXT_DOOR_DIR "/tmp"
+#define LXT_DOOR_PREFIX "lxt"
+#define LXT_MSG_MAXLEN (128 + MAXPATHLEN)
+
+#pragma init(init)
+
+typedef uintptr_t (*fp1_t)(uintptr_t);
+typedef uintptr_t (*fp3_t)(uintptr_t, uintptr_t, uintptr_t);
+
+static char *lxt_debug_path = NULL; /* debug output file path */
+static char lxt_debug_path_buf[MAXPATHLEN];
+static int debug_fd = -1;
+
+void lxt_debug(const char *msg, ...);
+
+void
+init(void)
+{
+ if (getenv("LX_DEBUG") != NULL) {
+
+ /* check if there's a debug log file specified */
+ lxt_debug_path = getenv("LX_DEBUG_FILE");
+ if (lxt_debug_path == NULL) {
+ /* send all debugging output to /dev/tty */
+ lxt_debug_path = "/dev/tty";
+ }
+
+ (void) strlcpy(lxt_debug_path_buf, lxt_debug_path,
+ sizeof (lxt_debug_path_buf));
+ lxt_debug_path = lxt_debug_path_buf;
+
+ /*
+ * Open the debugging output file. We need to open it
+ * and hold it open because we're going to call chroot()
+ * in just a second, so we won't be able to open it later.
+ */
+ if ((debug_fd = open(lxt_debug_path,
+ O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY,
+ 0666)) != -1) {
+ (void) fchmod(debug_fd, 0666);
+ }
+ }
+ lxt_debug("lxt_init: executing native process");
+
+ /*
+ * Now, so that we can avoid having to do path mapping,
+ * just chdir() and chroot() into /native.
+ */
+ if (chdir("/native") != 0) {
+ lxt_debug("lxt_init(): "
+ "failed to chdir to /native: %s", strerror(errno));
+ exit(-1);
+ }
+ if (chroot("/native") != 0) {
+ lxt_debug("lxt_init(): "
+ "failed to chroot to /native: %s", strerror(errno));
+ exit(-1);
+ }
+}
+
+/*
+ * Linux Thunking Interfaces - Client Side
+ */
+static mutex_t lxt_door_lock = DEFAULTMUTEX;
+static int lxt_door_fd = -1;
+
+static void
+lxt_server_exec(int fifo_wr, int fifo_rd)
+{
+ extern const char **environ;
+ char *nullist[] = { NULL };
+
+ lxt_debug("lxt_server_exec: server starting");
+
+ /*
+ * First we need to dup our fifos to the file descriptors
+ * the brand library is expecting them to be at.
+ */
+
+ /* Check if the write fifo needs to be moved aside */
+ if ((fifo_wr == LXT_SERVER_FIFO_RD_FD) &&
+ ((fifo_wr = dup(fifo_wr)) < 0))
+ return;
+
+ /* Check if the read fifo needs to be moved aside */
+ if ((fifo_rd == LXT_SERVER_FIFO_WR_FD) &&
+ ((fifo_rd = dup(fifo_rd)) < 0))
+ return;
+
+ if ((fifo_wr != LXT_SERVER_FIFO_WR_FD) &&
+ (dup2(fifo_wr, LXT_SERVER_FIFO_WR_FD) < 0))
+ return;
+ if ((fifo_rd != LXT_SERVER_FIFO_RD_FD) &&
+ (dup2(fifo_rd, LXT_SERVER_FIFO_RD_FD) < 0))
+ return;
+
+ /*
+ * We're about to execute a branded Linux process.
+ * Since we've been loaded into a native Solaris process with
+ * LD_PRELOAD and LD_LIBRARY_PATH we should clear these
+ * variables from the environment before calling exec.
+ */
+ (void) unsetenv("LD_PRELOAD");
+ (void) unsetenv("LD_LIBRARY_PATH");
+
+ /*
+ * Now we need to exec the thunk server process. This is a
+ * branded Linux process that will act as a doors server and
+ * service our requests to perform native Linux operations.
+ * Since we're currently running as a native Solaris process,
+ * to start up the server we'll use the brand system call to
+ * tell the kernel that the target of the exec will be a branded
+ * process.
+ */
+ lxt_debug("lxt_server_exec: execing as Linux process");
+ (void) syscall(SYS_brand, B_EXEC_BRAND,
+ LXT_SERVER_BINARY, nullist, environ);
+}
+
+
+static void *
+lxt_door_waitpid(void *arg)
+{
+ pid_t child_pid = (pid_t)(uintptr_t)arg;
+ int stat;
+
+ (void) waitpid(child_pid, &stat, 0);
+ return (NULL);
+}
+
+static char *
+lxt_door_mkfifo()
+{
+ char *path;
+
+ for (;;) {
+ path = tempnam(LXT_DOOR_DIR, LXT_DOOR_PREFIX);
+ if (path == NULL)
+ return (NULL);
+ if (mkfifo(path, S_IWUSR | S_IRUSR) != 0) {
+ if (errno != EEXIST) {
+ free(path);
+ return (NULL);
+ }
+ /* This file path exists, pick a new name. */
+ free(path);
+ continue;
+ }
+ /* We successfully created the fifo */
+ break;
+ }
+ return (path);
+}
+
+#define BRK_CHROOT_DIR "tmp/.brkchroot"
+
+static void
+breakout_chroot()
+{
+ int i;
+ struct stat sb;
+
+ if (stat(BRK_CHROOT_DIR, &sb) < 0) {
+ if (errno != ENOENT)
+ exit(-1);
+ if (mkdir(BRK_CHROOT_DIR, 0755) < 0)
+ exit(-1);
+ } else if (!S_ISDIR(sb.st_mode)) {
+ exit(-1);
+ }
+
+ if (chroot(BRK_CHROOT_DIR) < 0)
+ exit(-1);
+
+ for (i = 0; i < 32; i++)
+ chdir("..");
+
+ chroot(".");
+}
+
+static void
+lxt_door_init()
+{
+ char *fifo1_path = NULL, *fifo2_path = NULL;
+ char fifo1_path_native[MAXPATHLEN];
+ int fifo1_rd = -1, fifo1_wr = -1;
+ int fifo2_rd = -1, fifo2_wr = -1;
+ int junk;
+ pid_t child_pid;
+ thread_t tid;
+
+ lxt_debug("lxt_door_init: preparint to start server");
+
+ /* Create two new fifos. */
+ if (((fifo1_path = lxt_door_mkfifo()) == NULL) ||
+ ((fifo2_path = lxt_door_mkfifo()) == NULL))
+ goto fail;
+
+ (void) snprintf(fifo1_path_native, sizeof (fifo1_path_native),
+ "/native%s", fifo1_path);
+
+ /*
+ * Open both fifos for reading and writing. We have to open
+ * the read side of the fifo first (because the write side will
+ * fail to open if there is no reader) and we have to use the
+ * O_NONBLOCK flag (because the read open with hang without it).
+ */
+ if (((fifo1_rd = open(fifo1_path, O_RDONLY | O_NONBLOCK)) < 0) ||
+ ((fifo1_wr = open(fifo1_path, O_WRONLY)) < 0) ||
+ ((fifo2_rd = open(fifo2_path, O_RDONLY | O_NONBLOCK)) < 0) ||
+ ((fifo2_wr = open(fifo2_path, O_WRONLY)) < 0))
+ goto fail;
+
+ /*
+ * Now we have to close the read side of fifo1 and fifo2 and re-open
+ * them without the O_NONBLOCK flag. This is because we're using
+ * the fifos for synchronization and when we actually try to read
+ * from them we want to block.
+ */
+ (void) close(fifo1_rd);
+ if ((fifo1_rd = open(fifo1_path, O_RDONLY)) < 0)
+ goto fail;
+ (void) close(fifo2_rd);
+ if ((fifo2_rd = open(fifo2_path, O_RDONLY)) < 0)
+ goto fail;
+
+ /*
+ * Once fifo2 is opened no one will ever need to open it again
+ * so delete it now.
+ */
+ (void) unlink(fifo2_path);
+ free(fifo2_path);
+ fifo2_path = NULL;
+
+ /* Attempt to fork and start the door server */
+ lxt_debug("lxt_door_init: starting server");
+ switch (child_pid = fork1()) {
+ case -1:
+ /* fork1() failed. */
+ goto fail;
+ case 0:
+ /* Child process - new door server. */
+ (void) close(fifo1_rd);
+ (void) close(fifo2_wr);
+
+ /* Need to chroot back to the real root directory */
+ breakout_chroot();
+
+ /* Start the server */
+ lxt_server_exec(fifo1_wr, fifo2_rd);
+ lxt_debug("lxt_server_exec: server init failed");
+ exit(-1);
+ /*NOTREACHED*/
+ }
+ /* Parent process - door client. */
+
+ /*
+ * fifo2 is used to send the door path to the child.
+ * (We can't simply pass it via the address space since the
+ * child will need to exec.) We'll write the name of the door
+ * file to fifo2 before we close the read end of the fifo2 so
+ * that if the child has exited for some reason we won't get
+ * a SIGPIPE. Note that we're reusing the name of fifo1 as
+ * the door path. Also note that we've pre-pended /native
+ * to the fifo/door path. The reason is that we're chroot'ed
+ * to /native, but when the thunking server executes it will
+ * be chroot'ed back to the real root directory.
+ */
+ (void) write(fifo2_wr,
+ fifo1_path_native, strlen(fifo1_path_native) + 1);
+ (void) close(fifo2_wr);
+ (void) close(fifo2_rd);
+
+ /*
+ * Start up a thread that will perfom a waitpid() on the child
+ * door server process. We do this because if the calling
+ * application that is using our interfaces is forking it's own
+ * children and using wait(), then it won't expect to see our
+ * children. We take advantage of the fact that if there are
+ * wait() and a waitpid() calls in progress at the same time
+ * when a child exists, preference will be given to any
+ * waitpid() calls that are explicity waiting for that child.
+ * There is of course a window of time where the child could
+ * exit after we've forked it but before we've called waitpid()
+ * where another wait() in this process could collect the result.
+ * There's nothing we can really do to prevent this short of
+ * stopping all the other threads in this process.
+ */
+ (void) thr_create(NULL, 0,
+ lxt_door_waitpid, (void *)(uintptr_t)child_pid, THR_DAEMON, &tid);
+
+ /*
+ * fifo1 is used for the child process to signal us that the
+ * door server is ready to take requests.
+ */
+ (void) close(fifo1_wr);
+ (void) read(fifo1_rd, &junk, 1);
+ (void) close(fifo1_rd);
+
+ /* If there was a door that was open, close it now. */
+
+ if (lxt_door_fd >= 0)
+ (void) close(lxt_door_fd);
+ /*
+ * The server should be started up by now and fattach()ed the door
+ * server to the fifo/door path. so if we re-open that path now we
+ * should get a fd to the door server.
+ */
+ lxt_door_fd = open(fifo1_path, O_RDWR);
+
+ lxt_debug("lxt_door_init: new server door = %d", lxt_door_fd);
+
+ /* We don't need the fifo/door anymore so delete it. */
+ (void) unlink(fifo1_path);
+ free(fifo1_path);
+ return;
+
+fail:
+ if (fifo1_path != NULL)
+ (void) unlink(fifo1_path);
+ if (fifo2_path != NULL)
+ (void) unlink(fifo2_path);
+ if (fifo1_rd != -1)
+ (void) close(fifo1_rd);
+ if (fifo1_wr != -1)
+ (void) close(fifo1_wr);
+ if (fifo2_rd != -1)
+ (void) close(fifo2_rd);
+ if (fifo2_wr != -1)
+ (void) close(fifo2_wr);
+}
+
+static int
+lxt_door_call(door_arg_t *door_arg, int lock_held)
+{
+ int fd;
+
+ if (!lock_held)
+ (void) mutex_lock(&lxt_door_lock);
+
+ /* Get a copy of lxt_door_fd */
+ fd = lxt_door_fd;
+
+ if (!lock_held)
+ (void) mutex_unlock(&lxt_door_lock);
+
+ if (fd == -1) {
+ lxt_debug("lxt_door_call: no door available");
+ return (-1);
+ }
+
+ if (door_call(fd, door_arg) != 0) {
+ lxt_debug("lxt_door_call: call failed");
+ return (-1);
+ }
+ if (door_arg->rbuf == NULL) {
+ lxt_debug("lxt_door_call: call returned NULL");
+ return (-1);
+ }
+ return (0);
+}
+
+static int
+lxt_door_request(door_arg_t *door_arg)
+{
+ door_arg_t door_ping;
+ lxt_server_arg_t ping_request, *ping_result;
+ int rv, ping_success = 0;
+
+ /* First just try the door call. */
+ lxt_debug("lxt_door_request: calling server");
+ if (lxt_door_call(door_arg, 0) == 0)
+ return (0);
+
+ /* Prepare a door server ping request. */
+ bzero(&door_ping, sizeof (door_ping));
+ bzero(&ping_request, sizeof (ping_request));
+ door_ping.data_ptr = (char *)&ping_request;
+ door_ping.data_size = sizeof (ping_request);
+ ping_request.lxt_sa_op = LXT_SERVER_OP_PING;
+
+ (void) mutex_lock(&lxt_door_lock);
+
+ /* Ping the doors server. */
+ lxt_debug("lxt_door_request: pinging server");
+ if (lxt_door_call(&door_ping, 1) == 0) {
+ /*LINTED*/
+ ping_result = (lxt_server_arg_t *)door_ping.rbuf;
+ ping_success = ping_result->lxt_sa_success;
+ (void) munmap(door_ping.rbuf, door_ping.rsize);
+ }
+
+ if (!ping_success) {
+ /* The server is not responding so start up a new one. */
+ lxt_door_init();
+ }
+ (void) mutex_unlock(&lxt_door_lock);
+
+ /* Retry the original request */
+ lxt_debug("lxt_door_request: calling server, retry");
+ if ((rv = lxt_door_call(door_arg, 0)) == 0)
+ return (0);
+ return (rv);
+}
+
+static struct hostent *
+lxt_gethost(int op, const char *token, int token_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ door_arg_t door_arg;
+ lxt_gethost_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size, errno_tmp, i;
+
+ lxt_debug("lxt_gethost: request caught");
+
+ request_size = sizeof (*request) + sizeof (*data) +
+ token_len + buf_len - 1;
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_gethost: calloc() failed");
+ *h_errnop = TRY_AGAIN;
+ return (NULL);
+ }
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = op;
+ data->lxt_gh_type = type;
+ data->lxt_gh_token_len = token_len;
+ data->lxt_gh_buf_len = buf_len;
+ data->lxt_gh_storage_len = token_len + token_len;
+ bcopy(token, &data->lxt_gh_storage[0], token_len);
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_gethost: door_call() failed");
+ /* Don't know what caused the error so clear errno. */
+ errno = 0;
+ *h_errnop = ND_SYSTEM;
+ free(request);
+ return (NULL);
+ }
+
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_gethost: door_call() returned NULL");
+ /* Don't know what caused the error so clear errno. */
+ errno = 0;
+ *h_errnop = ND_SYSTEM;
+ return (NULL);
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_gethost: remote function call failed");
+ errno_tmp = request->lxt_sa_errno;
+ *h_errnop = data->lxt_gh_h_errno;
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+ errno = errno_tmp;
+ return (NULL);
+ }
+
+ /* Copy out the results and output buffer. */
+ bcopy(&data->lxt_gh_result, result, sizeof (*result));
+ bcopy(&data->lxt_gh_storage[token_len], buf, buf_len);
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+
+ /* Now go through the results and convert all offsets to pointers */
+ result->h_name = LXT_OFFSET_TO_PTR(result->h_name, buf);
+ result->h_aliases = LXT_OFFSET_TO_PTR(result->h_aliases, buf);
+ result->h_addr_list = LXT_OFFSET_TO_PTR(result->h_addr_list, buf);
+ for (i = 0; result->h_aliases[i] != NULL; i++) {
+ result->h_aliases[i] =
+ LXT_OFFSET_TO_PTR(result->h_aliases[i], buf);
+ }
+ for (i = 0; result->h_addr_list[i] != NULL; i++) {
+ result->h_addr_list[i] =
+ LXT_OFFSET_TO_PTR(result->h_addr_list[i], buf);
+ }
+
+ return (result);
+}
+
+static struct servent *
+lxt_getserv(int op, const char *token, const int token_len, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ door_arg_t door_arg;
+ lxt_getserv_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size, errno_tmp, i;
+
+ lxt_debug("lxt_getserv: request caught");
+
+ request_size = sizeof (*request) + sizeof (*data) +
+ token_len + buf_len - 1;
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_getserv: calloc() failed");
+ return (NULL);
+ }
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = op;
+ data->lxt_gs_token_len = token_len;
+ data->lxt_gs_buf_len = buf_len;
+ data->lxt_gs_storage_len = token_len + token_len;
+ bcopy(token, &data->lxt_gs_storage[0], token_len);
+
+ bzero(data->lxt_gs_proto, sizeof (data->lxt_gs_proto));
+ if (proto != NULL)
+ (void) strncpy(data->lxt_gs_proto, proto,
+ sizeof (data->lxt_gs_proto));
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_getserv: door_call() failed");
+ /* Don't know what caused the error so clear errno */
+ errno = 0;
+ free(request);
+ return (NULL);
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_getserv: door_call() returned NULL");
+ /* Don't know what caused the error so clear errno */
+ errno = 0;
+ return (NULL);
+ }
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_getserv: remote function call failed");
+ errno_tmp = request->lxt_sa_errno;
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+ errno = errno_tmp;
+ return (NULL);
+ }
+
+ /* Copy out the results and output buffer. */
+ bcopy(&data->lxt_gs_result, result, sizeof (*result));
+ bcopy(&data->lxt_gs_storage[token_len], buf, buf_len);
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+
+ /*
+ * Now go through the results and convert all offsets to pointers.
+ * See the comments in lxt_server_getserv() for why we need
+ * to subtract 1 from each offset.
+ */
+ result->s_name = LXT_OFFSET_TO_PTR(result->s_name, buf);
+ result->s_proto = LXT_OFFSET_TO_PTR(result->s_proto, buf);
+ result->s_aliases = LXT_OFFSET_TO_PTR(result->s_aliases, buf);
+ for (i = 0; result->s_aliases[i] != NULL; i++) {
+ result->s_aliases[i] =
+ LXT_OFFSET_TO_PTR(result->s_aliases[i], buf);
+ }
+
+ return (result);
+}
+
+static void
+lxt_openlog(const char *ident, int logopt, int facility)
+{
+ door_arg_t door_arg;
+ lxt_openlog_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size;
+
+ request_size = sizeof (*request) + sizeof (*data);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_openlog: calloc() failed");
+ return;
+ }
+ /*LINTED*/
+ data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_OPENLOG;
+ data->lxt_ol_facility = facility;
+ data->lxt_ol_logopt = logopt;
+ (void) strlcpy(data->lxt_ol_ident, ident, sizeof (data->lxt_ol_ident));
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_openlog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_openlog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_openlog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_vsyslog(int priority, const char *message, va_list va)
+{
+ door_arg_t door_arg;
+ lxt_syslog_arg_t *data;
+ lxt_server_arg_t *request;
+ psinfo_t p;
+ char procfile[PRFNSZ], *buf = NULL, *estr;
+ int buf_len, buf_i, estr_len, request_size, procfd;
+ int i, key, err_count = 0, tok_count = 0;
+ int errno_backup = errno;
+
+ /*
+ * Here we're going to use vsnprintf() to expand the message
+ * string passed in before we hand it off to a Linux process.
+ * Before we can call vsnprintf() we'll need to do modify the
+ * string to deal with certain special tokens.
+ *
+ * syslog() supports a special '%m' format token that expands to
+ * the error message string associated with the current value
+ * of errno. Unfortunatly if we pass this token to vsnprintf()
+ * it will choke so we need to expand that token manually here.
+ *
+ * We also need to expand any "%%" characters into "%%%%".
+ * The reason is that we'll be calling vsnprintf() which will
+ * translate "%%%%" back to "%%", which is safe to pass to the
+ * Linux version if syslog. If we didn't do this then vsnprintf()
+ * would translate "%%" to "%" and then the Linux syslog would
+ * attempt to intrepret "%" and whatever character follows it
+ * as a printf format style token.
+ */
+ for (key = i = 0; message[i] != '\0'; i++) {
+ if (!key && message[i] == '%') {
+ key = 1;
+ continue;
+ }
+ if (key && message[i] == '%')
+ tok_count++;
+ if (key && message[i] == 'm')
+ err_count++;
+ key = 0;
+ }
+
+ /* We found some tokens that we need to expand. */
+ if (err_count || tok_count) {
+ estr = strerror(errno_backup);
+ estr_len = strlen(estr);
+ assert(estr_len >= 2);
+
+ /* Allocate a buffer to hold the expanded string. */
+ buf_len = i + 1 +
+ (tok_count * 2) + (err_count * (estr_len - 2));
+ if ((buf = calloc(1, buf_len)) == NULL) {
+ lxt_debug("lxt_vsyslog: calloc() failed");
+ return;
+ }
+
+ /* Finally, expand %% and %m. */
+ for (key = buf_i = i = 0; message[i] != '\0'; i++) {
+ assert(buf_i < buf_len);
+ if (!key && message[i] == '%') {
+ buf[buf_i++] = '%';
+ key = 1;
+ continue;
+ }
+ if (key && message[i] == 'm') {
+ (void) bcopy(estr, &buf[buf_i - 1], estr_len);
+ buf_i += estr_len - 1;
+ } else if (key && message[i] == '%') {
+ (void) bcopy("%%%%", &buf[buf_i - 1], 4);
+ buf_i += 4 - 1;
+ } else {
+ buf[buf_i++] = message[i];
+ }
+ key = 0;
+ }
+ assert(buf[buf_i] == '\0');
+ assert(buf_i == (buf_len - 1));
+
+ /* Use the expanded buffer as our format string. */
+ message = buf;
+ }
+
+ /* Allocate the request we're going to send to the server */
+ request_size = sizeof (*request) + sizeof (*data);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_vsyslog: calloc() failed");
+ return;
+ }
+
+ /*LINTED*/
+ data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_SYSLOG;
+ data->lxt_sl_priority = priority;
+ data->lxt_sl_pid = getpid();
+ (void) vsnprintf(data->lxt_sl_message, sizeof (data->lxt_sl_message),
+ message, va);
+
+ /* If we did token expansion then free the intermediate buffer. */
+ if (err_count || tok_count)
+ free(buf);
+
+ /* Add the current program name into the request */
+ (void) sprintf(procfile, "/proc/%u/psinfo", (int)getpid());
+ /* (void) sprintf(procfile, "/native/proc/%u/psinfo", (int)getpid()); */
+ if ((procfd = open(procfile, O_RDONLY)) >= 0) {
+ if (read(procfd, &p, sizeof (psinfo_t)) >= 0) {
+ (void) strncpy(data->lxt_sl_progname, p.pr_fname,
+ sizeof (data->lxt_sl_progname));
+ }
+ (void) close(procfd);
+ }
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_vsyslog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_vsyslog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_vsyslog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_closelog(void)
+{
+ door_arg_t door_arg;
+ lxt_server_arg_t *request;
+ int request_size;
+
+ request_size = sizeof (*request);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_closelog: calloc() failed");
+ return;
+ }
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_CLOSELOG;
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_closelog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_closelog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_closelog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_pset_keep(priv_op_t op, priv_ptype_t type, priv_set_t *pset,
+ const char *priv)
+{
+ if (priv_ismember(pset, priv) == B_TRUE) {
+ if (op == PRIV_OFF) {
+ (void) priv_delset(pset, priv);
+ lxt_debug("lxt_pset_keep: "
+ "preventing drop of \"%s\" from \"%s\" set",
+ priv, type);
+ }
+ } else {
+ if (op == PRIV_SET) {
+ (void) priv_addset(pset, priv);
+ lxt_debug("lxt_pset_keep: "
+ "preventing drop of \"%s\" from \"%s\" set",
+ priv, type);
+ }
+ }
+}
+
+/*
+ * Public interfaces - used by lx_nametoaddr
+ */
+void
+lxt_vdebug(const char *msg, va_list va)
+{
+ char buf[LXT_MSG_MAXLEN + 1];
+ int rv, n;
+
+ if (debug_fd == -1)
+ return;
+
+ /* Prefix the message with pid/tid. */
+ if ((n = snprintf(buf, sizeof (buf), "%u/%u: ",
+ getpid(), thr_self())) == -1)
+ return;
+
+ /* Format the message. */
+ if (vsnprintf(&buf[n], sizeof (buf) - n, msg, va) == -1)
+ return;
+
+ /* Add a carrige return if there isn't one already. */
+ if ((buf[strlen(buf) - 1] != '\n') &&
+ (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf)))
+ return;
+
+ /* We retry in case of EINTR */
+ do {
+ rv = write(debug_fd, buf, strlen(buf));
+ } while ((rv == -1) && (errno == EINTR));
+}
+
+void
+lxt_debug(const char *msg, ...)
+{
+ va_list va;
+ int errno_backup;
+
+ if (debug_fd == -1)
+ return;
+
+ errno_backup = errno;
+ va_start(va, msg);
+ lxt_vdebug(msg, va);
+ va_end(va);
+ errno = errno_backup;
+}
+
+struct hostent *
+lxt_gethostbyaddr_r(const char *addr, int addr_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ lxt_debug("lxt_gethostbyaddr_r: request recieved");
+ return (lxt_gethost(LXT_SERVER_OP_ADDR2HOST,
+ addr, addr_len, type, result, buf, buf_len, h_errnop));
+}
+
+struct hostent *
+lxt_gethostbyname_r(const char *name,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ lxt_debug("lxt_gethostbyname_r: request recieved");
+ return (lxt_gethost(LXT_SERVER_OP_NAME2HOST,
+ name, strlen(name) + 1, 0, result, buf, buf_len, h_errnop));
+}
+
+struct servent *
+lxt_getservbyport_r(int port, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ lxt_debug("lxt_getservbyport_r: request recieved");
+ return (lxt_getserv(LXT_SERVER_OP_PORT2SERV,
+ (const char *)&port, sizeof (int), proto, result, buf, buf_len));
+}
+
+struct servent *
+lxt_getservbyname_r(const char *name, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ lxt_debug("lxt_getservbyname_r: request recieved");
+ return (lxt_getserv(LXT_SERVER_OP_NAME2SERV,
+ name, strlen(name) + 1, proto, result, buf, buf_len));
+}
+
+/*
+ * "Public" interfaces - used to override public existing interfaces
+ */
+int
+_setppriv(priv_op_t op, priv_ptype_t type, const priv_set_t *pset)
+{
+ static fp3_t fp = NULL;
+ priv_set_t *pset_new;
+ int rv;
+
+ lxt_debug("_setppriv: request caught");
+
+ if (fp == NULL)
+ fp = (fp3_t)dlsym(RTLD_NEXT, "_setppriv");
+
+ while ((pset_new = priv_allocset()) == NULL)
+ (void) sleep(1);
+
+ priv_copyset(pset, pset_new);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_EXEC);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_FORK);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_CHROOT);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_READ);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_WRITE);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_SEARCH);
+
+ rv = fp(op, (uintptr_t)type, (uintptr_t)pset_new);
+ priv_freeset(pset_new);
+ return (rv);
+}
+
+void
+openlog(const char *ident, int logopt, int facility)
+{
+ lxt_debug("openlog: request caught");
+ lxt_openlog(ident, logopt, facility);
+}
+
+void
+syslog(int priority, const char *message, ...)
+{
+ va_list va;
+
+ lxt_debug("syslog: request caught");
+ va_start(va, message);
+ lxt_vsyslog(priority, message, va);
+ va_end(va);
+}
+
+void
+vsyslog(int priority, const char *message, va_list va)
+{
+ lxt_debug("vsyslog: request caught");
+ lxt_vsyslog(priority, message, va);
+}
+
+void
+closelog(void)
+{
+ lxt_debug("closelog: request caught");
+ lxt_closelog();
+}
diff --git a/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers
new file mode 100644
index 0000000000..fc245c3360
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers
@@ -0,0 +1,57 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ lxt_vdebug;
+ lxt_debug;
+ lxt_gethostbyaddr_r;
+ lxt_gethostbyname_r;
+ lxt_getservbyport_r;
+ lxt_getservbyname_r;
+ _setppriv;
+ openlog;
+ syslog;
+ vsyslog;
+ closelog;
+
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_thunk/i386/Makefile b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile
new file mode 100644
index 0000000000..c4b6c71027
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h
new file mode 100644
index 0000000000..b19c91873a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_THUNK_H
+#define _LX_THUNK_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hostent *lxt_gethostbyaddr_r(const char *addr, int addr_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop);
+struct hostent *lxt_gethostbyname_r(const char *name,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop);
+struct servent *lxt_getservbyport_r(int port, const char *proto,
+ struct servent *result, char *buf, int buf_len);
+struct servent *lxt_getservbyname_r(const char *name, const char *proto,
+ struct servent *result, char *buf, int buf_len);
+
+void openlog(const char *ident, int logopt, int facility);
+void syslog(int priority, const char *message, ...);
+void closelog(void);
+
+void lxt_debug(const char *msg, ...);
+void lxt_vdebug(const char *msg, va_list va);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_THUNK_H */
diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile b/usr/src/lib/brand/lx/lx_vdso/Makefile
new file mode 100644
index 0000000000..50383bf164
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/Makefile
@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS =
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS =
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile.com b/usr/src/lib/brand/lx/lx_vdso/Makefile.com
new file mode 100644
index 0000000000..cac7093219
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/Makefile.com
@@ -0,0 +1,71 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = lx_vdso.a
+VERS = .1
+
+COBJS = lx_vdso.o
+OBJECTS = $(COBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+#
+# Since our name doesn't start with "lib", Makefile.lib incorrectly
+# calculates LIBNAME. Therefore, we set it here.
+#
+LIBNAME = lx_vdso
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+ASOBJS = lx_vdso.o
+OBJECTS = $(ASOBJS)
+
+ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s)
+SRCS = $(ASSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+LIBS = $(DYNLIB)
+DYNFLAGS += $(DYNFLAGS_$(CLASS))
+DYNFLAGS += $(MAPOPTS)
+LDLIBS +=
+ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM
+
+
+LIBS = $(DYNLIB)
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+ $(ELFEDIT) -e "dyn:value -add VERSYM $$(elfedit \
+ -e 'shdr:dump .SUNW_versym' $(DYNLIB) | \
+ $(AWK) '{ if ($$1 == "sh_addr:") { print $$2 } }')" $(DYNLIB)
+ $(ELFEDIT) -e 'ehdr:ei_osabi ELFOSABI_NONE' $(DYNLIB)
+ $(ELFEDIT) -e 'ehdr:ei_abiversion 0' $(DYNLIB)
+
+lint: $(LINTLIB) lintcheck
+
+include ../../../../Makefile.targ
+
+pics/%.o: $(ISASRCDIR)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile
new file mode 100644
index 0000000000..cdf6eaa62d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+ISASRCDIR=.
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+#
+# You might ask, why aren't we overriding BUILD.SO in Makefile.com.
+# That's a sad story. The answer is that Makefile.lib.64 includes
+# Makefile.master.64 which redefines BUILD.SO, leaving us in an
+# unfortunate jumble. Therefore we have to redefine it in the
+# lower-level Makefile.
+#
+BUILD.SO = $(LD) -o $@ $(GSHARED) $(DYNFLAGS) $(PICS) $(LDLIBS)
+
+ASSYMDEP_OBJS = lx_vdso.o
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s b/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s
new file mode 100644
index 0000000000..6d9e36a1c5
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s
@@ -0,0 +1,78 @@
+/*
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+
+/*
+ * lx vDSO emulation library
+ *
+ * This so needs to look like the correct Linux vDSO elf library. We cannot
+ * use any native symbols or link with any native libraries, particularly libc.
+ */
+
+#define LX_SYS_gettimeofday 96
+#define LX_SYS_time 201
+#define LX_SYS_clock_gettime 228
+#define LX_SYS_getcpu 309
+
+#if defined(lint)
+int
+__vdso_gettimeofday(void *tp, void *tz)
+{}
+
+time_t
+__vdso_time(void *tp)
+{}
+
+time_t
+__vdso_clock_gettime(uintptr_t id, void *tp)
+{}
+
+int
+__vdso_getcpu(void *cpu, void *np, void *cp)
+{}
+
+#else /* lint */
+
+ /*
+ * We know the arguments are already in the correct registers (e.g. arg0
+ * already in %rdi, arg1 already in %rsi, etc.). %rax has result of call.
+ */
+ ENTRY_NP(__vdso_gettimeofday)
+ movq $LX_SYS_gettimeofday, %rax
+ syscall
+ ret
+ SET_SIZE(__vdso_gettimeofday)
+
+ ENTRY_NP(__vdso_time)
+ movq $LX_SYS_time, %rax
+ syscall
+ ret
+ SET_SIZE(__vdso_time)
+
+ ENTRY_NP(__vdso_clock_gettime)
+ movq $LX_SYS_clock_gettime, %rax
+ syscall
+ ret
+ SET_SIZE(__vdso_clock_gettime)
+
+ ENTRY_NP(__vdso_getcpu)
+ movq $LX_SYS_getcpu, %rax
+ syscall
+ ret
+ SET_SIZE(__vdso_getcpu)
+#endif
diff --git a/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers b/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers
new file mode 100644
index 0000000000..11690ce7d6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers
@@ -0,0 +1,59 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION LINUX_2.6 {
+ global:
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
+ __vdso_getcpu;
+ __vdso_time;
+ local:
+ *;
+};
+
+#
+# The vDSO module in GNU/Linux must only have a single PT_LOAD section.
+# Further, we should not have any data sections at all. Therefore, we go
+# through and explicitly disable several of the writeable sections that
+# might commonly show up.
+#
+LOAD_SEGMENT data {
+ disable;
+};
+
+LOAD_SEGMENT ldata {
+ disable;
+};
+
+LOAD_SEGMENT bss {
+ disable;
+};
+
diff --git a/usr/src/lib/brand/lx/netfiles/Makefile b/usr/src/lib/brand/lx/netfiles/Makefile
new file mode 100644
index 0000000000..47be18db0f
--- /dev/null
+++ b/usr/src/lib/brand/lx/netfiles/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+TXTS = etc_netconfig etc_default_nfs
+NFS_DFL = ../../../../cmd/fs.d/nfs/etc/nfs.dfl
+
+all: $(TXTS)
+
+include ../Makefile.lx
+
+lint:
+
+install: $(ROOTTXTS)
+
+clean:
+ -$(RM) etc_default_nfs
+
+clobber: clean
+ -$(RM) $(ROOTXMLDOCS) $(ROOTTXTS)
+
+etc_default_nfs: $(NFS_DFL)
+ $(RM) $@
+ $(CP) $(NFS_DFL) $@
diff --git a/usr/src/lib/brand/lx/netfiles/etc_netconfig b/usr/src/lib/brand/lx/netfiles/etc_netconfig
new file mode 100644
index 0000000000..56222abf56
--- /dev/null
+++ b/usr/src/lib/brand/lx/netfiles/etc_netconfig
@@ -0,0 +1,38 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# The "Network Configuration" File.
+#
+# Each entry is of the form:
+#
+# <network_id> <semantics> <flags> <protofamily> <protoname> \
+# <device> <nametoaddr_libs>
+#
+# For running solaris daemons in a linux zone we use this non-default
+# /etc/netconfig. The reason is that all name resolution has to be
+# done linux name service interfaces. To do this we specify a custom
+# nametoaddr library that libnsl will invoke to do name service lookups.
+#
+udp tpi_clts v inet udp /dev/udp lx_nametoaddr.so.1
+tcp tpi_cots_ord v inet tcp /dev/tcp lx_nametoaddr.so.1
diff --git a/usr/src/lib/brand/lx/testing/Makefile b/usr/src/lib/brand/lx/testing/Makefile
new file mode 100644
index 0000000000..cce870eeb5
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/Makefile
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+TXTS = ltp_skiplist
+
+all:
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.lx
+
+all := TARGET= all
+install := TARGET= install
+clobber := TARGET= clobber
+
+install: $(ROOTTXTS)
+
+_msg:
+
+lint:
+
+clean:
+
+clobber:
+ -$(RM) $(ROOTTXTS)
diff --git a/usr/src/lib/brand/lx/testing/Readme_ltp b/usr/src/lib/brand/lx/testing/Readme_ltp
new file mode 100644
index 0000000000..2242ae1db2
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/Readme_ltp
@@ -0,0 +1,314 @@
+The Linux Test Project (LTP) provides the basis for testing the lx API
+implementation. The project webpage is at http://linux-test-project.github.io/
+and the source is available on GitHub at
+https://github.com/linux-test-project/ltp.git.
+
+LTP should be built and run from within an lx zone.
+
+To build
+--------
+As root first make sure you have the tools installed:
+ apt-get install build-essential autoconf automake git quota
+
+As a normal user:
+ git clone https://github.com/linux-test-project/ltp.git
+ cd ltp
+ make autotools
+ ./configure
+ make all
+
+As root:
+ make install
+
+The tests can be built in both a zone that has been installed with a 32-bit
+version of Linux and another zone that has been installed with a 64-bit version
+of the same release. When testing the 64-bit zone, a copy of the 32-bit build
+can be run in the 64-bit zone to ensure that 32-bit applications work
+correctly on a 64-bit install.
+
+Running the tests
+-----------------
+The LTP source tree provides detailed documentation on using the test suite, so
+this readme only give a short summary.
+
+Because many of the tests are targetted at kernel functionality which does not
+apply to Illumos, or test capabilities which are not available from within a
+zone, or test system call functionality which has not yet been completed, a
+skip list is used during the test run to bypass tests which are known to fail.
+
+The skip list lives in this directory and is delivered on the system. It is
+available from within the lx zone as /native/usr/lib/brand/lx/ltp_skiplist. As
+new functionality is completed, the skip list should be updated to remove tests
+which now work.
+
+As root:
+ cd /opt/ltp
+ /opt/ltp/runltp -f syscalls,nptl,ipc,pipes,pty,math,crashme \
+ -S /native/usr/lib/brand/lx/ltp_skiplist -p >~/test.log
+
+When the test run has finished, the results will be logged in a date/time
+stamped file under /opt/ltp/results. The summary at the end of the log file
+should show "Total Failures: 0". If not, something is wrong.
+
+Running tests for development
+-----------------------------
+The source for the tests can be found under the testcases directory. The
+largest and most useful set for lx live under testcases/kernel/syscalls.
+
+For development purposes, an individual test (or tests) can be run by listing
+them in a command file, one per line. For example, with a command file named
+~/tcmds, to run the read01 test you setup the file so it looks like this:
+ read01 read01
+
+You can run that specific test as follows:
+ /opt/ltp/runltp -f ~/tcmds -p -l ~/read.log
+
+Test status
+-----------
+This section provides a short summary of the rationale for why tests are being
+skipped.
+
+LTP groups tests into command files (i.e. syscalls, nptl, etc. provided with
+the -f option in the runltp command shown above). A complete list of the groups
+can be seen in LTP source tree under the runtest directory. Some of these
+groups are obviously not applicable when running in an lx zone. The remaining
+groups still need work before they can be run. The groups shown in the runltp
+command above are expected to work when the skip list is used. The 'syscalls'
+command file runs the majority of the actual system call tests which we are
+interested in.
+
+The following table indicates why specific subtests are being skipped. Also
+note that the following tests pass in a 64-bit lx zone, but fail in a zone
+installed with a 32-bit Linux build: mmap15, open12, openat02 and sendfile09.
+
+ Legend:
+ x = never in a zone
+ * = fails on kvm and bare metal too
+ # = emulation not implemented yet
+ - = could enable with a test zone config change
+
+- access06 wants a block device
+x acct01 enables bsd process accounting
+# add_key01
+# add_key02
+x adjtimex01
+x adjtimex02
+x bdflush01
+x cacheflush01
+x chmod03 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- chmod06 needs dev
+x chmod07 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- chown04 needs dev
+- chown04_16 needs dev
+# clone02
+# clone08
+- creat06 wants to mount a ro fs
+x creat07 we don't behave this way for ETXTBSY
+x creat08 sets euid to 'nobody', loses PRIV_FILE_SETID to set sgid
+# eventfd01
+# eventfd2_01
+# eventfd2_02
+# eventfd2_03
+x execve04 we don't behave this way for ETXTBSY
+# fallocate01
+# fallocate02
+# fallocate03
+x fchmod02 need PRIV_SYS_CONFIG to set sticky bit on reg file
+x fchmod03 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- fchmod06 needs dev
+# fchown04 mounts
+# fchown04_16
+# fcntl06 not supported on linux
+# fcntl06_64
+# fcntl23 leases not implemented
+# fcntl23_64 "
+# fcntl24 "
+# fcntl24_64 "
+# fcntl25 "
+# fcntl25_64 "
+# fcntl26 "
+# fcntl26_64 "
+# fcntl30
+# fcntl30_64
+# fcntl31 setown/getown not impl
+# fcntl31_64
+# flock01 OS-2868
+# flock03 "
+# flock04 "
+# flock06 "
+# fork05 asm into %fs reg
+- fork09 needs a swap cap of ~9GB
+# fork13 decided not to support this
+# fork14 "
+# ftruncate04 need a mnt with mandatory locking
+# ftruncate04_64
+x getcpu01
+# getdents02 wrong errno on test 4 - perf. impact too high
+# getdents02_64
+# get_mempolicy01
+# get_robust_list01 OS-3224
+x getrusage03 we don't fill in the ru_maxrss field
+- getxattr01 need attr/xattr.h at build time
+- getxattr02
+- getxattr03
+# ioctl03 needs /dev/net/tun
+# io_cancel01 libaio stuff not done
+# io_destroy01
+# io_getevents01
+# io_setup01
+# io_submit01
+# inotify_init1_01
+# inotify_init1_02
+# inotify01
+# inotify02
+# inotify03
+# inotify04
+# inotify05
+# fanotify01 don't have fanotify
+# fanotify02
+# fanotify03
+# fanotify04
+# fanotify05
+# keyctl01 no kernel keyring support
+- lchown03 needs to mount ro fs
+- lchown03_16
+- linkat02 needs dev
+- link08 needs dev
+x mem01 crashme test which expects OOM killer to save the day
+- mkdir03 needs dev
+- mkdirat02 needs dev
+x mknod01 makes block and chr devs
+- mknod07 needs dev
+- mknodat02 needs dev
+# mmap13 expects "invalid access" to SIGBUS
+- mount01 needs dev
+- mount02 needs dev
+x mount03 mounts ext2
+- mount04 needs dev
+x mount05 mounts ext2
+x mount06 mounts ext2
+# mq_notify01
+# mq_notify02
+# mq_open01
+# mq_timedreceive01
+# mq_timedsend01
+# mq_unlink01
+x mremap01
+x mremap02
+x mremap03
+x mremap04
+x mremap05
+# msgctl12 uses MSG_STAT
+# msgrcv07 MSG_EXCEPT subtest - not avail.
+x open01 need PRIV_SYS_CONFIG to set sticky bit on reg file
+# open02 expects NOATIME to cause err for unpriv user
+# open10 setgid on sgid subdir behavior
+x open11 makes device
+# ppoll01
+# prctl01 get/set deathsig
+# prctl02 more deathsig
+# process_vm_readv01
+# process_vm_readv02
+# process_vm_readv03
+# process_vm_writev01
+# process_vm_writev02
+# prot_hsymlinks /proc/sys/fs/protected_hardlinks
+x ptrace04 not supp on our arch
+# ptrace05 OS-3307
+# read02 checks errno for O_DIRECT
+# readahead01
+# readahead02
+# readdir21 dir perf. issue
+- rename11 needs dev
+- renameat01 needs dev
+- rmdir02 needs dev
+x sched_getparam01 assumes Linux SCHED_OTHER return value
+x sched_getparam02 assumes Linux SCHED_OTHER return value
+# sched_rr_get_interval01
+# sched_rr_get_interval02
+# sched_rr_get_interval03
+x sched_setparam02 tries to set Linux policies
+x sched_setparam03 tries to set Linux policies
+# sched_getscheduler01
+# sched_getscheduler02
+x sched_setscheduler01
+x sched_setaffinity01
+x sched_getaffinity01
+# semctl01 all pass but SEM_STAT - linux specific
+# semop02 last test fails errno - expensive
+# sendfile02 OS-3296
+# sendfile02_64 "
+# sendfile04 "
+# sendfile04_64 "
+# sendfile06 "
+# sendfile06_64 "
+# sendfile07 "
+# sendfile07_64 "
+sendmsg01 OS-3295 - tests actually pass
+x setfsuid04 no real equiv. and only for NFS server
+x setfsuid04_16
+# sgetmask01 obsolete
+# setgroups04_16 expects sig11 for certain err
+# setns01
+# setns02
+# setpgid02 all pass but one, expects pid 0 to be there
+* setregid02 fails on bare metal, expects to lookup group "nobody" which
+* setregid02_16 doesn't exist. it is "nogroup" on ubuntu at least
+# setrlimit01 all pass but one, expects to set proc limit
+# set_robust_list01 OS-3224
+x settimeofday01
+# setxattr01
+# setxattr02
+# setxattr03
+# shmget05 OS-3326
+# sighold02 fails on sig 63 OS-3307
+# signalfd01
+# signalfd4_01
+# signalfd4_02
+# sigrelse01 fails on sig 63 OS-3307
+# splice01
+# splice02
+# splice03
+# tee01
+# tee02
+# ssetmask01 obsolete
+x stime01
+x switch01
+# sync_file_range01
+# sysconf01 most pass but see OS-3305
+# sysctl01 the build compiled this out,
+# sysctl03 this syscall is basically obsolete
+# sysctl04 obsolete
+# sysctl05
+# syslog01
+# syslog02
+# syslog03
+# syslog04
+# syslog05
+# syslog06
+# syslog07
+# syslog08
+# syslog09
+# syslog10
+# syslog11
+# syslog12
+# timerfd01
+# timerfd02
+# timerfd03
+# timerfd_create01
+# timerfd_gettime01
+# timerfd_settime01
+# unshare01
+# unshare02
+- umount01 needs dev
+- umount02 needs dev
+- umount03 needs dev
+x ustat01 obsolete call to stat FS
+x ustat02 obsolete call to stat FS
+- utime06 needs dev
+- utimes01 needs dev
+# utimensat01 many subtests pass but see OS-3328 for the rest
+# vmsplice01
+# vmsplice02
+# perf_event_open01
+# perf_event_open02
diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist
new file mode 100644
index 0000000000..d4003547e7
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/ltp_skiplist
@@ -0,0 +1,256 @@
+# tests functionality not allowed in a zone
+accept4_01
+acct01
+adjtimex01
+adjtimex02
+bdflush01
+cacheflush01
+chmod03
+chmod07
+creat07
+creat08
+execve04
+fchmod02
+fchmod03
+getrusage03
+getcpu01
+ioperm01
+ioperm02
+iopl01
+iopl02
+isofs
+mbind01
+mem01
+migrate_pages01
+migrate_pages02
+mknod01
+mlockall02
+mlockall03
+mlock02
+mmap12
+mount03
+mount05
+mount06
+modify_ldt01
+modify_ldt02
+move_pages01
+move_pages02
+move_pages03
+move_pages04
+move_pages05
+move_pages06
+move_pages07
+move_pages08
+move_pages09
+move_pages10
+move_pages11
+mremap01
+mremap02
+mremap03
+mremap04
+mremap05
+open01
+open11
+ptrace04
+quotactl01
+quotactl02
+remap_file_pages01
+remap_file_pages02
+sched_getparam01
+sched_getparam02
+sched_setscheduler01
+sched_setaffinity01
+sched_getaffinity01
+sched_setparam02
+sched_setparam03
+setfsuid04
+setfsuid04_16
+settimeofday01
+stime01
+swapoff01
+swapoff02
+swapon01
+swapon02
+swapon03
+switch01
+ustat01
+ustat02
+
+# needs a config with a dev or mounting
+chmod06
+chown04
+chown04_16
+creat06
+fchmod06
+fchown04
+fchown04_16
+inotify03
+lchown03
+lchown03_16
+linkat02
+link08
+mkdir03
+mkdirat02
+mknod07
+mknodat02
+mount01
+mount02
+mount04
+rename11
+renameat01
+rmdir02
+umount01
+umount02
+umount03
+utime06
+utimes01
+
+# tests functionality not implemented yet
+access06
+add_key01
+add_key02
+clone02
+clone08
+eventfd01
+eventfd2_01
+eventfd2_02
+eventfd2_03
+fallocate01
+fallocate01
+fallocate01
+fanotify01
+fanotify02
+fanotify03
+fanotify04
+fanotify05
+fcntl06
+fcntl23
+fcntl23_64
+fcntl24
+fcntl24_64
+fcntl25
+fcntl25_64
+fcntl26
+fcntl26_64
+fcntl30
+fcntl30_64
+fcntl31
+fcntl31_64
+flock01
+flock03
+flock04
+flock06
+fork05
+fork09
+fork13
+fork14
+ftruncate04
+ftruncate04_64
+getdents02
+getdents02_64
+get_mempolicy01
+get_robust_list01
+ioctl03
+io_cancel01
+io_destroy01
+io_getevents01
+io_setup01
+io_submit01
+keyctl01
+mmap13
+mq_notify01
+mq_notify02
+mq_open01
+mq_timedreceive01
+mq_timedsend01
+mq_unlink01
+msgctl12
+msgrcv07
+open02
+open10
+perf_event_open01
+perf_event_open02
+ppoll01
+prctl01
+prctl02
+process_vm_readv01
+process_vm_readv02
+process_vm_readv03
+process_vm_writev01
+process_vm_writev02
+prot_hsymlinks
+ptrace05
+read02
+readahead01
+readahead02
+readdir2
+readdir21
+sched_rr_get_interval01
+sched_rr_get_interval02
+sched_rr_get_interval03
+sched_getscheduler01
+sched_getscheduler02
+semctl01
+semop02
+sendfile02
+sendfile02_64
+sendfile04
+sendfile04_64
+sendfile06
+sendfile06_64
+sendfile07
+sendfile07_64
+sendmsg01
+setgroups04_16
+setns01
+setns02
+setpgid02
+setregid02
+setregid02_16
+setrlimit01
+set_robust_list01
+setxattr01
+setxattr02
+setxattr03
+sgetmask01
+shmget05
+sighold02
+signalfd01
+signalfd4_01
+signalfd4_02
+sigrelse01
+splice01
+splice02
+splice03
+ssetmask01
+sync_file_range01
+sysconf01
+sysctl01
+sysctl03
+sysctl04
+sysctl05
+syslog01
+syslog02
+syslog03
+syslog04
+syslog05
+syslog06
+syslog07
+syslog08
+syslog09
+syslog10
+syslog11
+syslog12
+tee01
+tee02
+timerfd01
+timerfd02
+timerfd03
+timerfd_create01
+timerfd_gettime01
+timerfd_settime01
+unshare01
+utimensat01
+unshare02
+vmsplice01
+vmsplice02
diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile
new file mode 100644
index 0000000000..d72586f9e1
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/Makefile
@@ -0,0 +1,75 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROGS = lx_install lx_distro_install lx_init_zone lx_boot
+PROGS += lx_init_zone_debian lx_init_zone_redhat lx_init_zone_ubuntu
+PROGS += lx_networking lx_boot_zone_ubuntu
+SUBDIRS = distros
+XMLDOCS = config.xml platform.xml
+TEMPLATES = SUNWlx.xml SUNWlx26.xml
+
+all: $(PROGS)
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.lx
+
+all := TARGET= all
+install := TARGET= install
+clobber := TARGET= clobber
+
+POFILES= $(PROGS:%=%.po)
+POFILE= lx_zone.po
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(BUILDPO.pofiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+install: $(PROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) $(ROOTPROGS) $(SUBDIRS)
+ mkdir -p $(ROOT)/usr/lib/brand/lx/ld/64
+ crle -c $(ROOT)/usr/lib/brand/lx/ld/ld.config \
+ -l /native/lib:/native/usr/lib \
+ -s /native/lib/secure:/native/usr/lib/secure
+ crle -64 -c $(ROOT)/usr/lib/brand/lx/ld/64/ld.config \
+ -l /native/lib/64:/native/usr/lib/64 \
+ -s /native/lib/secure/64:/native/usr/lib/secure/64
+
+lint:
+
+clean:
+ -$(RM) $(PROGS)
+
+clobber: clean $(SUBDIRS)
+ -$(RM) $(ROOTXMLDOCS) $(ROOTPROGS) $(ROOTTEMPLATES)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx.xml b/usr/src/lib/brand/lx/zone/SUNWlx.xml
new file mode 100644
index 0000000000..04c38873de
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx26.xml b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
new file mode 100644
index 0000000000..9bd8af4d92
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+ <attr name="kernel-version" type="string" value="2.6"/>
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml
new file mode 100644
index 0000000000..c8ad559870
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/config.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright 2014, Joyent, Inc. All rights reserved.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN"
+ "file:///usr/share/lib/xml/dtd/brand.dtd.1">
+
+<brand name="lx">
+ <modname>lx_brand</modname>
+
+ <initname>/sbin/init</initname>
+ <login_cmd>/bin/login -h zone:%Z %u</login_cmd>
+ <forcedlogin_cmd>/bin/login -h zone:%Z -f %u</forcedlogin_cmd>
+ <user_cmd>/usr/bin/getent passwd %u</user_cmd>
+
+ <install>/usr/lib/brand/lx/lx_install %z %R</install>
+ <installopts>d:hsvX</installopts>
+ <boot>/usr/lib/brand/lx/lx_boot %z %R</boot>
+ <halt>/usr/lib/brand/lx/lx_support halt %R %z</halt>
+ <verify_cfg>/usr/lib/brand/lx/lx_support verify</verify_cfg>
+ <verify_adm></verify_adm>
+ <postclone></postclone>
+ <postinstall></postinstall>
+
+ <privilege set="default" name="contract_event" />
+ <privilege set="default" name="contract_identity" />
+ <privilege set="default" name="contract_observer" />
+ <privilege set="default" name="dtrace_proc" />
+ <privilege set="default" name="dtrace_user" />
+ <privilege set="default" name="file_chown" />
+ <privilege set="default" name="file_chown_self" />
+ <privilege set="default" name="file_dac_execute" />
+ <privilege set="default" name="file_dac_read" />
+ <privilege set="default" name="file_dac_search" />
+ <privilege set="default" name="file_dac_write" />
+ <privilege set="default" name="file_owner" />
+ <privilege set="default" name="file_setid" />
+ <privilege set="default" name="ipc_dac_read" />
+ <privilege set="default" name="ipc_dac_write" />
+ <privilege set="default" name="ipc_owner" />
+ <privilege set="default" name="net_bindmlp" />
+ <privilege set="default" name="net_icmpaccess" />
+ <privilege set="default" name="net_mac_aware" />
+ <privilege set="default" name="net_privaddr" />
+ <privilege set="default" name="net_rawaccess" ip-type="exclusive" />
+ <privilege set="default" name="proc_chroot" />
+ <privilege set="default" name="sys_audit" />
+ <privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
+ <privilege set="default" name="proc_owner" />
+ <privilege set="default" name="proc_setid" />
+ <privilege set="default" name="proc_prioup" />
+ <privilege set="default" name="proc_taskid" />
+ <privilege set="default" name="sys_acct" />
+ <privilege set="default" name="sys_admin" />
+ <privilege set="default" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_iptun_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_mount" />
+ <privilege set="default" name="sys_nfs" />
+ <privilege set="default" name="sys_resource" />
+ <privilege set="default" name="sys_ppp_config" ip-type="exclusive" />
+
+ <privilege set="prohibited" name="dtrace_kernel" />
+ <privilege set="prohibited" name="proc_zone" />
+ <privilege set="prohibited" name="sys_config" />
+ <privilege set="prohibited" name="sys_devices" />
+ <privilege set="prohibited" name="sys_ip_config" ip-type="shared" />
+ <privilege set="prohibited" name="sys_linkdir" />
+ <privilege set="prohibited" name="sys_net_config" />
+ <privilege set="prohibited" name="sys_ppp_config" ip-type="shared" />
+ <privilege set="prohibited" name="sys_res_config" />
+ <privilege set="prohibited" name="sys_suser_compat" />
+ <privilege set="prohibited" name="xvm_control" />
+ <privilege set="prohibited" name="virt_manage" />
+
+ <privilege set="required" name="proc_exec" />
+ <privilege set="required" name="proc_fork" />
+ <privilege set="required" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="required" name="sys_mount" />
+</brand>
diff --git a/usr/src/lib/brand/lx/zone/distros/Makefile b/usr/src/lib/brand/lx/zone/distros/Makefile
new file mode 100644
index 0000000000..7b5a600c94
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../../Makefile.lx
+
+DISTROS = centos35.distro centos36.distro centos37.distro \
+ centos38.distro rhel35.distro rhel36.distro rhel37.distro \
+ rhel38.distro rhel_centos_common
+
+ROOTDISTRODIR= $(ROOTBRANDDIR)/distros
+ROOTDISTROS= $(DISTROS:%=$(ROOTDISTRODIR)/%)
+
+$(ROOTDISTROS) := FILEMODE = 444
+
+$(ROOTDISTRODIR):
+ $(INS.dir)
+
+$(ROOTDISTRODIR)/%: %
+ $(INS.file)
+
+install: $(ROOTDISTROS)
+
+lint clean all:
+
+clobber:
+ -$(RM) $(ROOTDISTROS)
+
diff --git a/usr/src/lib/brand/lx/zone/distros/centos35.distro b/usr/src/lib/brand/lx/zone/distros/centos35.distro
new file mode 100644
index 0000000000..cb5c2add9f
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos35.distro
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.5 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1118161135.08
+distro_version="3.5"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.5 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms=$delta_desktop_rpms
+delta_all_rpms=$delta_developer_rpms
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos36.distro b/usr/src/lib/brand/lx/zone/distros/centos36.distro
new file mode 100644
index 0000000000..8dbc4307ac
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos36.distro
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.6 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1130453594.8
+distro_version="3.6"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.6 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms="$delta_desktop_rpms gd-progs"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos37.distro b/usr/src/lib/brand/lx/zone/distros/centos37.distro
new file mode 100644
index 0000000000..f8ac5e0fb1
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos37.distro
@@ -0,0 +1,65 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.7 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1144177644.47
+distro_version="3.7"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.7 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss"
+delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos38.distro b/usr/src/lib/brand/lx/zone/distros/centos38.distro
new file mode 100644
index 0000000000..22ae2e43b2
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos38.distro
@@ -0,0 +1,79 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.8 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1155307611.42
+distro_version="3.8"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.8 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ expectk \
+ seamonkey \
+ seamonkey-chat \
+ seamonkey-mail \
+ seamonkey-nspr \
+ seamonkey-nss \
+ tcl-html \
+ tcllib"
+delta_developer_rpms="$delta_desktop_rpms \
+ gd-progs \
+ freetype-demos \
+ freetype-utils \
+ glibc-debug \
+ irb \
+ python-docs \
+ ruby-docs \
+ ruby-tcltk \
+ seamonkey-dom-inspector \
+ seamonkey-js-debugger \
+ seamonkey-devel \
+ seamonkey-nspr-devel \
+ seamonkey-nss-devel"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel35.distro b/usr/src/lib/brand/lx/zone/distros/rhel35.distro
new file mode 100644
index 0000000000..0b6b23ae52
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel35.distro
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 5 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1115874580.003298
+distro_version="Update 5"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.5 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms=$delta_desktop_rpms
+delta_all_rpms="$delta_developer_rpms comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ krb5-server \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel36.distro b/usr/src/lib/brand/lx/zone/distros/rhel36.distro
new file mode 100644
index 0000000000..51c80832ff
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel36.distro
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 6 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1127323691.616555
+distro_version="Update 6"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.6 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms="$delta_desktop_rpms gd-progs"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel37.distro b/usr/src/lib/brand/lx/zone/distros/rhel37.distro
new file mode 100644
index 0000000000..2c3b81d82b
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel37.distro
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 7 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1141679045.364586
+distro_version="Update 7"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.7 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss"
+delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel38.distro b/usr/src/lib/brand/lx/zone/distros/rhel38.distro
new file mode 100644
index 0000000000..5255ac206a
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel38.distro
@@ -0,0 +1,109 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 8 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1152738297.776178
+distro_version="Update 8"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.8 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ seamonkey \
+ seamonkey-chat \
+ seamonkey-mail \
+ seamonkey-nspr \
+ seamonkey-nss"
+delta_developer_rpms="$delta_desktop_rpms \
+ gd-progs \
+ irb \
+ ruby-docs \
+ ruby-tcltk \
+ seamonkey-dom-inspector \
+ seamonkey-js-debugger \
+ seamonkey-devel \
+ seamonkey-nspr-devel \
+ seamonkey-nss-devel"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
+
+#
+# Identify the packages that need to be set aside for installation after
+# all the other packages are installed.
+#
+deferred_rpms="openoffice.org openoffice.org-i18n openoffice.org-libs"
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel_centos_common b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common
new file mode 100644
index 0000000000..583264b723
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common
@@ -0,0 +1,1016 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This file contains the basic cluster contents shared by all of the
+# Linux distros we support. Each distro has its own .distro file that
+# expands on the basic cluster lists provided here.
+#
+
+#
+# Required packages for the install miniroot, these are the minimum packages a
+# system must have installed in order to run rpm (which is then used from
+# within the zone to perform the balance of the installation.)
+#
+common_miniroot_rpms="SysVinit \
+ basesystem \
+ bash \
+ beecrypt \
+ bzip2-libs \
+ coreutils \
+ elfutils \
+ elfutils-libelf \
+ filesystem \
+ glibc \
+ glibc-common \
+ gpm \
+ initscripts \
+ iptables \
+ iptables-ipv6 \
+ kernel-utils \
+ laus-libs \
+ libacl \
+ libattr \
+ libgcc \
+ libtermcap \
+ ncurses \
+ pam \
+ popt \
+ rpm \
+ rpm-libs \
+ setup \
+ termcap \
+ zlib"
+
+#
+# This starts a listing of RPMs comprising a variety of install package options
+# for a distribution.
+#
+# The supported package clusters are:
+#
+# + core
+# + server
+# + desktop
+# + developer
+# + system
+#
+# The RPMs needed to install each cluster are listed in the shell variable
+#
+# distro_<level>_rpms
+#
+# This file provides "common_<level>_rpms", which are lists of the packages
+# in each cluster that are common to all distros.
+#
+# The package names are listed alphabetically for readability. rpm will
+# reorder the list to ensure that each package's dependencies are installed
+# before it is.
+#
+# Note: Since the distro_install script uses a regular expression to expand
+# RPM package names to filenames, there may be some tweaking required to
+# guarantee a unique match between a package name and a corresponding RPM
+# file on the install media.
+#
+# One such example below is the package "XFree86-4." The official name of
+# the package is "XFree86," but the regular expression in the script
+# matches that package name to the XFree86-100dpi-fonts and
+# XFree86-75dpi-fonts package RPMs in addition to the proper XFree86 RPM.
+# Therefore the "XFree86" package name was modified to be "XFree86-4",
+# which does result in a unique package name to RPM file match.
+#
+common_core_rpms="GConf2 \
+ Glide3 \
+ ORBit \
+ ORBit2 \
+ XFree86-Mesa-libGL \
+ XFree86-Mesa-libGLU \
+ XFree86-libs \
+ XFree86-libs-data \
+ Xaw3d \
+ ash \
+ at \
+ atk \
+ audiofile \
+ autofs \
+ bc \
+ binutils \
+ bonobo-activation \
+ bzip2 \
+ chkconfig \
+ compat-pwdb \
+ cpio \
+ cpp \
+ cracklib \
+ cracklib-dicts \
+ crontabs \
+ cups-libs \
+ cyrus-sasl \
+ cyrus-sasl-md5 \
+ db4 \
+ desktop-file-utils \
+ dev \
+ diffutils \
+ diskdumputils \
+ e2fsprogs \
+ ed \
+ ethtool \
+ expat \
+ file \
+ findutils \
+ finger \
+ fontconfig \
+ freetype \
+ ftp \
+ gail \
+ gawk \
+ gdbm \
+ gdk-pixbuf \
+ gettext \
+ glib \
+ glib2 \
+ glibc-headers \
+ glibc-kernheaders \
+ gmp \
+ gnupg \
+ grep \
+ groff \
+ gtk+ \
+ gtk2 \
+ gzip \
+ hesiod \
+ hwdata \
+ indexhtml \
+ info \
+ iproute \
+ iputils \
+ kernel \
+ kernel-BOOT \
+ krb5-libs \
+ krb5-workstation \
+ kudzu \
+ laus \
+ less \
+ libaio \
+ libart_lgpl \
+ libbonobo \
+ libcap \
+ libgcj \
+ libgcj-ssa \
+ libglade2 \
+ libgnomecanvas \
+ libjpeg \
+ libmng \
+ libogg \
+ libpng \
+ libpng10 \
+ libstdc++ \
+ libtiff \
+ libtool-libs \
+ libungif \
+ libusb \
+ libuser \
+ libvorbis \
+ libwnck \
+ libxml \
+ libxml2 \
+ libxml2-python \
+ libxslt \
+ linc \
+ lockdev \
+ logrotate \
+ losetup \
+ lsof \
+ lvm \
+ lynx \
+ m4 \
+ mailcap \
+ make \
+ man \
+ man-pages \
+ mingetty \
+ mkinitrd \
+ mkisofs \
+ mktemp \
+ modutils \
+ mount \
+ mtools \
+ nc \
+ net-snmp \
+ net-snmp-libs \
+ net-tools \
+ netdump \
+ newt \
+ nfs-utils \
+ nscd \
+ nss_db \
+ nss_ldap \
+ ntp \
+ ntsysv \
+ openldap \
+ openssh \
+ openssh-clients \
+ openssh-server \
+ openssl \
+ pango \
+ passwd \
+ patch \
+ pax \
+ pcre \
+ pdksh \
+ perl \
+ perl-CGI \
+ perl-DateManip \
+ perl-Filter \
+ perl-HTML-Parser \
+ perl-HTML-Tagset \
+ perl-Parse-Yapp \
+ perl-URI \
+ perl-XML-Dumper \
+ perl-XML-Encoding \
+ perl-XML-Grove \
+ perl-XML-Parser \
+ perl-XML-Twig \
+ perl-libwww-perl \
+ perl-libxml-enno \
+ perl-libxml-perl \
+ portmap \
+ procmail \
+ procps \
+ psacct \
+ psmisc \
+ pspell \
+ pygtk2 \
+ pygtk2-libglade \
+ python \
+ pyxf86config \
+ readline \
+ redhat-logos \
+ redhat-menus \
+ rhpl \
+ rpm-python \
+ rpmdb-redhat \
+ rsh \
+ rsync \
+ rusers \
+ rwho \
+ sed \
+ setarch \
+ sgml-common \
+ shadow-utils \
+ slang \
+ startup-notification \
+ sudo \
+ sysklogd \
+ syslinux \
+ tar \
+ tcl \
+ tcp_wrappers \
+ tcsh \
+ telnet \
+ time \
+ traceroute \
+ ttmkfdir \
+ tzdata \
+ units \
+ unix2dos \
+ unzip \
+ usermode \
+ utempter \
+ util-linux \
+ vim-common \
+ vim-minimal \
+ vixie-cron \
+ wget \
+ which \
+ words \
+ xinetd \
+ xml-common \
+ yp-tools \
+ ypbind \
+ zip"
+
+common_server_rpms="$common_core_rpms \
+ 4Suite \
+ MyODBC \
+ MySQL-python \
+ Omni \
+ Omni-foomatic \
+ PyXML \
+ VFlib2 \
+ XFree86-4 \
+ XFree86-base-fonts \
+ XFree86-font-utils \
+ XFree86-truetype-fonts \
+ XFree86-xauth \
+ XFree86-xdm \
+ XFree86-xfs \
+ acl \
+ alchemist \
+ amanda \
+ amanda-server \
+ arts \
+ aspell \
+ aspell-config \
+ at-spi \
+ authd \
+ bcel \
+ bind \
+ bind-chroot \
+ bind-libs \
+ bind-utils \
+ bitmap-fonts \
+ caching-nameserver \
+ chkfontpath \
+ commons-beanutils \
+ commons-collections \
+ commons-digester \
+ commons-logging \
+ commons-modeler \
+ compat-db \
+ compat-libstdc++ \
+ crypto-utils \
+ cup-v10k \
+ cups \
+ curl \
+ cyrus-sasl-gssapi \
+ cyrus-sasl-plain \
+ dhcp \
+ distcache \
+ distcache-devel \
+ esound \
+ expect \
+ fam \
+ finger-server \
+ foomatic \
+ freeradius \
+ gd \
+ ghostscript \
+ ghostscript-fonts \
+ gimp-print \
+ gnome-libs \
+ gnome-mime-data \
+ gnome-python2 \
+ gnome-python2-bonobo \
+ gnome-python2-canvas \
+ gnome-python2-gtkhtml2 \
+ gnome-vfs2 \
+ gnuplot \
+ gtkhtml2 \
+ htmlview \
+ httpd \
+ hwcrypto \
+ imap \
+ imap-utils \
+ imlib \
+ inews \
+ inn \
+ jakarta-regexp \
+ krb5-server \
+ krbafs \
+ libIDL \
+ libbonoboui \
+ libdbi \
+ libdbi-dbd-mysql \
+ libgnome \
+ libgnomeprint22 \
+ libgnomeprintui22 \
+ libgnomeui \
+ libgsf \
+ libole2 \
+ logwatch \
+ mailman \
+ mailx \
+ mod_auth_mysql \
+ mod_auth_pgsql \
+ mod_authz_ldap \
+ mod_perl \
+ mod_python \
+ mod_ssl \
+ mpage \
+ mtr \
+ mx \
+ mx4j \
+ mysql \
+ mysql-bench \
+ mysql-devel \
+ net-snmp-utils \
+ netdump-server \
+ newt-perl \
+ openldap-servers \
+ openssl-perl \
+ pam_krb5 \
+ perl-DBD-MySQL \
+ perl-DBD-Pg \
+ perl-DBI \
+ perl-DB_File \
+ perl-Digest-HMAC \
+ perl-Digest-SHA1 \
+ perl-Net-DNS \
+ perl-Time-HiRes \
+ php \
+ php-imap \
+ php-ldap \
+ php-mysql \
+ php-odbc \
+ php-pgsql \
+ pnm2ppa \
+ postfix \
+ postgresql-odbc \
+ pxe \
+ pyorbit \
+ qt \
+ qt-MySQL \
+ qt-ODBC \
+ quagga \
+ radvd \
+ rdist \
+ redhat-config-bind \
+ redhat-config-httpd \
+ redhat-config-printer \
+ redhat-config-printer-gui \
+ redhat-config-samba \
+ redhat-config-securitylevel \
+ redhat-config-securitylevel-tui \
+ redhat-config-services \
+ redhat-java-rpm-scripts \
+ redhat-switch-mail \
+ redhat-switch-mail-gnome \
+ rh-postgresql \
+ rh-postgresql-contrib \
+ rh-postgresql-docs \
+ rh-postgresql-jdbc \
+ rh-postgresql-libs \
+ rh-postgresql-python \
+ rh-postgresql-server \
+ rh-postgresql-tcl \
+ rh-postgresql-test \
+ rhdb-utils \
+ rsh-server \
+ rusers-server \
+ samba \
+ samba-client \
+ samba-common \
+ samba-swat \
+ sendmail \
+ sendmail-cf \
+ slocate \
+ spamassassin \
+ squid \
+ squirrelmail \
+ switchdesk \
+ sysreport \
+ telnet-server \
+ tftp-server \
+ tmpwatch \
+ tux \
+ unixODBC \
+ unixODBC-kde \
+ urw-fonts \
+ usermode-gtk \
+ vsftpd \
+ webalizer \
+ xalan-j \
+ xerces-j \
+ xinitrc \
+ ypserv"
+
+common_desktop_rpms="$common_server_rpms \
+ Canna-libs \
+ FreeWnn-libs \
+ Gtk-Perl \
+ ImageMagick \
+ ImageMagick-perl \
+ SDL \
+ XFree86-100dpi-fonts \
+ XFree86-75dpi-fonts \
+ XFree86-Xnest \
+ XFree86-Xvfb \
+ XFree86-doc \
+ XFree86-tools \
+ XFree86-twm \
+ a2ps \
+ am-utils \
+ amanda-client \
+ anacron \
+ apel-xemacs \
+ aumix \
+ authconfig \
+ authconfig-gtk \
+ autorun \
+ cdparanoia-alpha9.8 \
+ cdparanoia-libs-alpha9.8 \
+ cdrecord \
+ cipe \
+ ckermit \
+ comps-extras \
+ control-center \
+ ctags \
+ desktop-backgrounds-basic \
+ desktop-printing \
+ dialog \
+ docbook-dtds \
+ docbook-style-dsssl \
+ docbook-style-xsl \
+ docbook-utils \
+ docbook-utils-pdf \
+ dtach \
+ dvd+rw-tools \
+ dvdrecord \
+ eel2 \
+ elinks \
+ enscript \
+ eog \
+ evolution \
+ evolution-connector \
+ fetchmail \
+ file-roller \
+ firstboot \
+ fontilus \
+ gaim \
+ gconf-editor \
+ gdm \
+ gedit \
+ gftp \
+ ggv \
+ gimp \
+ gimp-data-extras \
+ gimp-perl \
+ gimp-print-cups \
+ gimp-print-plugin \
+ gimp-print-utils \
+ gnome-applets \
+ gnome-audio \
+ gnome-desktop \
+ gnome-games \
+ gnome-icon-theme \
+ gnome-media \
+ gnome-panel \
+ gnome-pilot \
+ gnome-python2-applet \
+ gnome-session \
+ gnome-spell \
+ gnome-system-monitor \
+ gnome-terminal \
+ gnome-themes \
+ gnome-user-docs \
+ gnome-utils \
+ gnome-vfs2-extras \
+ gnomemeeting \
+ gphoto2 \
+ gsl \
+ gstreamer \
+ gstreamer-plugins \
+ gstreamer-tools \
+ gtk-engines \
+ gtk2-engines \
+ gtkam \
+ gtkam-gimp \
+ gtkglarea \
+ gtkhtml3 \
+ guile \
+ hotplug \
+ hpijs \
+ hpoj \
+ htdig \
+ hwbrowser \
+ intltool \
+ itcl \
+ jadetex \
+ kdeaddons \
+ kdeartwork \
+ kdebase \
+ kdegames \
+ kdegraphics \
+ kdelibs \
+ kdemultimedia \
+ kdenetwork \
+ kdepim \
+ kdeutils \
+ lftp \
+ libao \
+ libf2c \
+ libgail-gnome \
+ libgal2 \
+ libghttp \
+ libglade \
+ libgtop2 \
+ libmrproject \
+ libpcap \
+ libraw1394 \
+ librsvg2 \
+ libsoup \
+ linuxdoc-tools \
+ lm_sensors \
+ magicdev \
+ metacity \
+ mikmod \
+ mrproject \
+ mrtg \
+ mutt \
+ nautilus \
+ nautilus-cd-burner \
+ nautilus-media \
+ netpbm \
+ netpbm-progs \
+ open \
+ openh323 \
+ openjade \
+ openldap-clients \
+ openmotif \
+ openmotif21 \
+ openoffice.org \
+ openoffice.org-i18n \
+ openoffice.org-libs \
+ openssh-askpass \
+ openssh-askpass-gnome \
+ parted \
+ passivetex \
+ perl-PDL \
+ perl-SGMLSpm \
+ perl-suidperl \
+ pilot-link \
+ printman \
+ psutils \
+ pwlib \
+ pyOpenSSL \
+ python-optik \
+ redhat-artwork \
+ redhat-config-date \
+ redhat-config-keyboard \
+ redhat-config-kickstart \
+ redhat-config-language \
+ redhat-config-mouse \
+ redhat-config-network \
+ redhat-config-network-tui \
+ redhat-config-nfs \
+ redhat-config-packages \
+ redhat-config-proc \
+ redhat-config-rootpassword \
+ redhat-config-soundcard \
+ redhat-config-users \
+ redhat-config-xfree86 \
+ redhat-logviewer \
+ rhn-applet \
+ rhnlib \
+ sane-backends \
+ sane-frontends \
+ screen \
+ scrollkeeper \
+ shapecfg \
+ sharutils \
+ sox \
+ star \
+ switchdesk-gnome \
+ switchdesk-kde \
+ sysstat \
+ talk \
+ tclx \
+ tetex \
+ tetex-afm \
+ tetex-dvips \
+ tetex-fonts \
+ tetex-latex \
+ tetex-xdvi \
+ tix \
+ tk \
+ tkinter \
+ transfig \
+ ttfprint \
+ umb-scheme \
+ up2date \
+ up2date-gnome \
+ usbutils \
+ uucp \
+ vim-enhanced \
+ vlock \
+ vnc \
+ vnc-server \
+ vorbis-tools \
+ vte \
+ w3c-libwww \
+ xchat \
+ xdelta \
+ xemacs \
+ xemacs-el \
+ xemacs-info \
+ xfig \
+ xhtml1-dtds \
+ xloadimage \
+ xmltex \
+ xmlto \
+ xmms \
+ xpdf \
+ xsane \
+ xsane-gimp \
+ xscreensaver \
+ xsri \
+ xterm \
+ yelp \
+ zsh"
+
+common_developer_rpms="$common_desktop_rpms \
+ ElectricFence \
+ GConf2-devel \
+ ORBit-devel \
+ ORBit2-devel \
+ SDL-devel \
+ XFree86-devel \
+ ant \
+ ant-libs \
+ arts-devel \
+ at-spi-devel \
+ atk-devel \
+ audiofile-devel \
+ autoconf \
+ autoconf213 \
+ automake \
+ automake14 \
+ automake15 \
+ bison \
+ blas \
+ bonobo-activation-devel \
+ bug-buddy \
+ byacc \
+ cdecl \
+ cproto \
+ crash \
+ cscope \
+ cups-devel \
+ cvs \
+ ddd \
+ dejagnu \
+ dev86 \
+ diffstat \
+ doxygen \
+ eel2-devel \
+ emacs \
+ emacs-el \
+ emacs-leim \
+ esound-devel \
+ flex \
+ fontconfig-devel \
+ freetype-devel \
+ gail-devel \
+ gcc \
+ gcc-c++ \
+ gcc-c++-ssa \
+ gcc-g77 \
+ gcc-g77-ssa \
+ gcc-gnat \
+ gcc-java \
+ gcc-java-ssa \
+ gcc-objc \
+ gcc-objc-ssa \
+ gcc-ssa \
+ gd-devel \
+ gdb \
+ gdk-pixbuf-devel \
+ gdk-pixbuf-gnome \
+ glade2 \
+ glib-devel \
+ glib2-devel \
+ glibc-devel \
+ glibc-profile \
+ glibc-utils \
+ gnome-desktop-devel \
+ gnome-libs-devel \
+ gnome-vfs2-devel \
+ gperf \
+ gtk+-devel \
+ gtk-doc \
+ gtk2-devel \
+ gtkhtml2-devel \
+ httpd-devel \
+ im-sdk \
+ imlib-devel \
+ indent \
+ jaf \
+ javamail \
+ joe \
+ jpackage-utils \
+ junit \
+ kdebase-devel \
+ kdegraphics-devel \
+ kdelibs-devel \
+ kdenetwork-devel \
+ kdepim-devel \
+ kdesdk \
+ kdesdk-devel \
+ kdeutils-devel \
+ kdevelop \
+ kdoc \
+ kernel-doc \
+ kernel-source \
+ lam \
+ lapack \
+ lha \
+ libIDL-devel \
+ libacl-devel \
+ libart_lgpl-devel \
+ libattr-devel \
+ libbonobo-devel \
+ libbonoboui-devel \
+ libgcc-ssa \
+ libgcj-devel \
+ libgcj-ssa-devel \
+ libglade2-devel \
+ libgnat \
+ libgnome-devel \
+ libgnomecanvas-devel \
+ libgnomeprint22-devel \
+ libgnomeprintui22-devel \
+ libgnomeui-devel \
+ libjpeg-devel \
+ libmng-devel \
+ libmudflap \
+ libmudflap-devel \
+ libobjc \
+ libole2-devel \
+ libpng-devel \
+ librsvg2-devel \
+ libstdc++-devel \
+ libstdc++-ssa \
+ libstdc++-ssa-devel \
+ libtiff-devel \
+ libtool \
+ libungif-devel \
+ libxml2-devel \
+ libxslt-devel \
+ linc-devel \
+ ltrace \
+ memprof \
+ nasm \
+ ncurses-devel \
+ nedit \
+ netpbm-devel \
+ openmotif-devel \
+ oprofile \
+ pango-devel \
+ patchutils \
+ pcre-devel \
+ perl-CPAN \
+ perl-Crypt-SSLeay \
+ pilot-link-devel \
+ pkgconfig \
+ pstack \
+ pygtk2-devel \
+ python-devel \
+ python-tools \
+ qt-designer \
+ qt-devel \
+ rcs \
+ redhat-rpm-config \
+ rpm-build \
+ ruby \
+ ruby-libs \
+ ruby-mode \
+ sane-backends-devel \
+ sip \
+ sip-devel \
+ splint \
+ startup-notification-devel \
+ strace \
+ swig \
+ texinfo \
+ tora \
+ vim-X11 \
+ vte-devel \
+ zlib-devel"
+
+common_all_rpms="$common_developer_rpms \
+ Canna
+ FreeWnn \
+ ImageMagick-c++-5.5.6 \
+ Wnn6-SDK \
+ ami \
+ amtu \
+ anaconda \
+ anaconda-help \
+ anaconda-images \
+ anaconda-product \
+ anaconda-runtime \
+ apmd \
+ arptables_jf \
+ attr \
+ bg5ps \
+ bitmap-fonts-cjk \
+ bogl \
+ bogl-bterm \
+ bootparamd \
+ booty \
+ bridge-utils \
+ busybox \
+ busybox-anaconda \
+ compat-gcc \
+ compat-gcc-c++ \
+ compat-glibc-7.x \
+ compat-libstdc++-devel \
+ compat-slang \
+ db4-java \
+ db4-utils \
+ dbskkd-cdb \
+ desktop-backgrounds-extra \
+ devlabel \
+ dhclient \
+ dietlibc \
+ dos2unix \
+ dosfstools \
+ dump \
+ eject \
+ emacspeak \
+ ethereal \
+ ethereal-gnome \
+ fbset \
+ festival \
+ grub \
+ h2ps \
+ hdparm \
+ ipsec-tools \
+ irda-utils \
+ iscsi-initiator-utils \
+ isdn4k-utils \
+ jfsutils \
+ jisksp14 \
+ jisksp16 \
+ jwhois \
+ kappa20 \
+ kbd \
+ kernel-pcmcia-cs \
+ knm_new \
+ kon2 \
+ kon2-fonts \
+ libtabe \
+ libwvstreams \
+ lilo \
+ linuxwacom \
+ lslk \
+ mdadm \
+ mgetty \
+ minicom \
+ mkbootdisk \
+ mt-st \
+ mtx \
+ nano \
+ ncompress \
+ net-snmp-perl \
+ netconfig \
+ nhpf \
+ nmap \
+ octave \
+ openssl096b \
+ pam_passwdqc \
+ pam_smb \
+ pinfo \
+ ppp \
+ prelink \
+ psgml \
+ pvm \
+ quota \
+ rdate \
+ rdesktop \
+ redhat-config-netboot \
+ rhgb \
+ rmt \
+ rootfiles \
+ rp-pppoe \
+ schedutils \
+ setserial \
+ setuptool \
+ sg3_utils \
+ skkdic \
+ skkinput
+ specspo \
+ stunnel \
+ tcpdump \
+ tftp \
+ tn5250 \
+ tsclient \
+ vconfig \
+ wireless-tools \
+ wvdial \
+ x3270 \
+ x3270-text \
+ x3270-x11 \
+ xcin"
diff --git a/usr/src/lib/brand/lx/zone/lx_boot.ksh b/usr/src/lib/brand/lx/zone/lx_boot.ksh
new file mode 100644
index 0000000000..3aff4c6b16
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot.ksh
@@ -0,0 +1,212 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
+#
+# lx boot script.
+#
+# The arguments to this script are the zone name and the zonepath.
+#
+
+. /usr/lib/brand/shared/common.ksh
+
+ZONENAME=$1
+ZONEPATH=$2
+ZONEROOT=$ZONEPATH/root
+
+w_missing=$(gettext "Warning: \"%s\" is not installed in the global zone")
+
+arch=`uname -p`
+if [ "$arch" = "i386" ]; then
+ ARCH32=i86
+ ARCH64=amd64
+else
+ echo "Unsupported architecture: $arch"
+ exit 2
+fi
+
+#
+# Run the lx_support boot hook.
+#
+/usr/lib/brand/lx/lx_support boot $ZONEPATH $ZONENAME
+if (( $? != 0 )) ; then
+ exit 1
+fi
+
+BRANDDIR=/native/usr/lib/brand/lx;
+EXIT_CODE=1
+
+# All native executables must be run using the native linker. By default, the
+# kernel runs the linker at /lib/ld.so.1, which doesn't exist in an lx zone.
+# In lx, the linker is ld-linux.so.N. Hence when we run the native executable
+# from the wrappers, we explicitly specify /native/lib/ld.so.1 as our 32-bit
+# linker (or /native/lib/64/ld.so.1 as our 64-bit linker).
+
+# Setup a native command which uses the thunk library to access the Linux
+# nameservices within the zone.
+#
+# $1 is lx cmd, $2 is native cmd, $3 is an optional inclusion in the script
+# the lx cmd path must have already be verified with safe_dir
+#
+setup_native_thunk_cmd() {
+ cmd_name=$ZONEROOT/$1
+
+ if [ -h $cmd_name -o \( -e $cmd_name -a ! -f $cmd_name \) ]; then
+ logger -p daemon.err "dangerous zone cmd: $ZONENAME, $1"
+ return
+ fi
+
+ cat <<-DONE >$ZONEROOT/$1
+ #!/bin/sh
+
+ $3
+ exec /native/usr/lib/brand/lx/lx_native \
+ /native/lib/ld.so.1 -e LD_NOENVIRON=1 -e LD_NOCONFIG=1 \
+ -e LD_PRELOAD_32=/native/usr/lib/brand/lx/lx_thunk.so.1 \
+ -e LD_LIBRARY_PATH_32="/native/lib:/native/usr/lib" $2 "\$@"
+ DONE
+
+ chmod 755 $ZONEROOT/$1
+}
+
+# $1 is lx cmd, $2 is native cmd
+# the lx cmd path must have already be verified with safe_dir
+setup_native_cmd() {
+ cmd_name=$ZONEROOT/$1
+
+ if [ -h $cmd_name -o \( -e $cmd_name -a ! -f $cmd_name \) ]; then
+ logger -p daemon.err "dangerous zone cmd: $ZONENAME, $1"
+ return
+ fi
+
+ cat <<-DONE >$ZONEROOT/$1
+ #!/bin/sh
+
+ exec /native/usr/lib/brand/lx/lx_native \
+ /native/lib/ld.so.1 -e LD_NOENVIRON=1 -e LD_NOCONFIG=1 \
+ -e LD_LIBRARY_PATH_32="/native/lib:/native/usr/lib" $2 "\$@"
+ DONE
+
+ chmod 755 $ZONEROOT/$1
+}
+
+#
+# Before we boot we validate and fix, if necessary, the required files within
+# the zone. These modifications can be lost if a patch or upgrade is applied
+# within the zone, so we validate and fix the zone every time it boots.
+#
+
+#
+# Determine the distro.
+#
+distro=""
+if [[ $(zonecfg -z $ZONENAME info attr name=docker) =~ "value: true" ]]; then
+ distro="docker"
+elif [[ -f $ZONEROOT/etc/redhat-release ]]; then
+ distro="redhat"
+elif [[ -f $ZONEROOT/etc/lsb-release ]]; then
+ if egrep -s Ubuntu $ZONEROOT/etc/lsb-release; then
+ distro="ubuntu"
+ elif [[ -f $ZONEROOT/etc/debian_version ]]; then
+ distro="debian"
+ fi
+elif [[ -f $ZONEROOT/etc/debian_version ]]; then
+ distro="debian"
+fi
+
+[[ -z $distro ]] && fatal "Unsupported distribution!"
+
+#
+# BINARY REPLACEMENT
+#
+# This section of the boot script is responsible for replacing Linux
+# binaries within the booting zone with native binaries. This is a two-step
+# process: First, the directory structure of the zone is validated to ensure
+# that binary replacement will proceed safely. Second, the Linux binaries
+# are replaced with native binaries.
+#
+# Here's an example. Suppose that you want to replace /usr/bin/zcat with the
+# native /usr/bin/zcat binary. Then you should do the following:
+#
+# 1. Go to the section below labeled "STEP ONE" and add the following
+# two lines:
+#
+# safe_dir /usr
+# safe_dir /usr/bin
+#
+# These lines ensure that both /usr and /usr/bin are directories
+# within the booting zone that can be safely accessed by the global
+# zone.
+# 2. Go to the section below labeled "STEP TWO" and add the following
+# line:
+#
+# setup_native_cmd /usr/bin/zcat /native/usr/bin/zcat
+#
+
+#
+# STEP ONE
+#
+# Validate that the zone filesystem looks like we expect it to.
+#
+safe_dir /lib
+safe_dir /bin
+safe_dir /sbin
+safe_dir /etc
+safe_opt_dir /etc/init
+safe_opt_dir /etc/update-motd.d
+
+#
+# STEP TWO
+#
+# Replace Linux binaries with native binaries.
+#
+# XXX The lx_thunk code will perform a chroot, so these commands will not work
+# if they are run by a non-privileged user.
+#
+setup_native_thunk_cmd /sbin/ipmgmtd /native/lib/inet/ipmgmtd \
+ "export SMF_FMRI=\"svc:/network/ip-interface-management:default\""
+setup_native_thunk_cmd /sbin/ifconfig-native /native/sbin/ifconfig
+setup_native_thunk_cmd /sbin/dladm /native/usr/sbin/dladm
+
+setup_native_cmd /sbin/route /native/usr/sbin/route
+setup_native_cmd /bin/netstat /native/usr/bin/netstat
+
+#
+# When plumbing and configuring the NICs the native ifconfig needs to use
+# the thunk library, so we setup for that as ifconfig-native. However, the
+# thunk library does a chroot so that means a non-root user can't use the
+# native ifconfig to look at the NICs. Thus, we provide a basic shell script
+# which can display NICs in the Linux style.
+#
+if [ ! -h $ZONEROOT/sbin/ifconfig ]; then
+ cp /usr/lib/brand/lx/ifconfig $ZONEROOT/sbin/ifconfig
+fi
+
+#
+# STEP THREE
+#
+# Perform distro-specific customization.
+#
+. $(dirname $0)/lx_boot_zone_${distro}
+
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh
new file mode 100644
index 0000000000..e10a53449d
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh
@@ -0,0 +1,222 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Customisation for Ubuntu-based distributions. Assumes to have been
+# sourced from lx_boot.
+#
+
+# Generate the networking.conf upstart script
+setup_net()
+{
+ [ -f /etc/defaultrouter ] && defroute=`cat /etc/defaultrouter`
+
+ zonecfg -z $ZONENAME info net | awk -v defroute=$defroute '
+ BEGIN {
+ printf("description\t\"configure virtual network devices\"\n\n")
+ printf("emits static-network-up\n")
+ printf("emits net-device-up\n\n")
+
+ printf("start on local-filesystems\n\n")
+
+ printf("task\n\n")
+
+ printf("pre-start exec mkdir -p /run/network\n\n")
+
+ printf("script\n")
+ printf(" /sbin/ipmgmtd || true\n")
+ printf(" /sbin/ifconfig-native lo0 plumb\n")
+ printf(" /sbin/ifconfig-native lo0 up\n")
+ printf(" /sbin/initctl emit --no-wait net-device-up IFACE=lo LOGICAL=lo ADDRFAM=inet METHOD=loopback || true\n")
+
+ } {
+ if ($1 == "physical:") {
+ phys = $2
+ } else if ($1 == "property:") {
+ split($2, a, ",")
+ split(a[1], k, "=")
+ split(a[2], v, "=")
+
+ val = substr(v[2], 2)
+ val = substr(val, 1, length(val) - 2)
+
+ if (k[2] == "ip")
+ ip = val
+ else if (k[2] == "netmask")
+ mask = val
+ else if (k[2] == "primary")
+ prim = val
+ }
+
+ if ($1 == "net:" && phys != "") {
+ printf(" /sbin/ifconfig-native %s plumb || true\n", phys)
+ printf(" /sbin/ifconfig-native %s %s netmask %s up || true\n",
+ phys, ip, mask)
+ printf(" /sbin/initctl emit --no-wait net-device-up IFACE=%s\n",
+ phys)
+
+ phys = ""
+ prim = ""
+ }
+ }
+ END {
+ printf(" /sbin/ifconfig-native %s plumb || true\n", phys)
+ printf(" /sbin/ifconfig-native %s %s netmask %s up || true\n",
+ phys, ip, mask)
+ printf(" /sbin/initctl emit --no-wait net-device-up IFACE=%s\n",
+ phys)
+
+ printf(" /sbin/initctl emit --no-wait static-network-up\n")
+ if (length(defroute) > 0)
+ printf(" /sbin/route add default %s\n", defroute)
+ printf("end script\n")
+ }' > $fnm
+}
+
+RMSVCS="acpid.conf
+ control-alt-delete.conf
+ console-setup.conf
+ dmesg.conf
+ hwclock.conf
+ hwclock-save.conf
+ irqbalance.conf
+ module-init-tools.conf
+ mounted-dev.conf
+ mounted-debugfs.conf
+ network-interface-security.conf
+ plymouth.conf
+ plymouth-log.conf
+ plymouth-splash.conf
+ plymouth-stop.conf
+ plymouth-upstart-bridge.conf
+ setvtrgb.conf
+ tty1.conf
+ tty2.conf
+ tty3.conf
+ tty4.conf
+ tty5.conf
+ tty6.conf
+ upstart-udev-bridge.conf
+ udev.conf
+ udevmonitor.conf
+ udevtrigger.conf
+ udev-fallback-graphics.conf
+ udev-finish.conf
+ ureadahead-other.conf
+ ureadahead.conf
+ whoopsie.conf"
+
+
+#
+# Now customize upstart
+#
+
+for f in $RMSVCS
+do
+ fnm=$ZONEROOT/etc/init/$f
+ [[ ! -h $fnm && -f $fnm ]] && rm -f $fnm
+done
+
+fnm=$ZONEROOT/etc/init/console.conf
+if [[ ! -h $fnm && -f $fnm ]] then
+ sed -e 's/lxc/zones/' $fnm > /tmp/console.conf.$$
+ mv /tmp/console.conf.$$ $fnm
+fi
+
+fnm=$ZONEROOT/etc/init/container-detect.conf
+if [[ ! -h $fnm && -f $fnm ]] then
+ cat <<'DONE' > $fnm
+description "Track if upstart is running in a container"
+
+start on mounted MOUNTPOINT=/run
+
+env container
+env LIBVIRT_LXC_UUID
+
+emits container
+
+pre-start script
+ container=zones
+ echo "$container" > /run/container_type || true
+ initctl emit --no-wait container CONTAINER=$container
+ exit 0
+end script
+DONE
+fi
+
+# XXX need to add real mounting into this svc definition
+
+fnm=$ZONEROOT/etc/init/mountall.conf
+if [[ ! -h $fnm && -f $fnm ]] then
+ cat <<'DONE' > $fnm
+description "Mount filesystems on boot"
+
+start on startup
+
+task
+
+emits virtual-filesystems
+emits local-filesystems
+emits remote-filesystems
+emits all-swaps
+emits filesystem
+emits mounted
+
+script
+ echo "/ / zfs rw 0 0" > /etc/mtab
+ echo "proc /proc proc rw,noexec,nosuid,nodev 0 0" >> /etc/mtab
+
+ /sbin/initctl emit --no-wait virtual-filesystems
+ /bin/mount -t tmpfs tmpfs /run || true
+ /sbin/initctl emit --no-wait mounted MOUNTPOINT=/run TYPE=tmpfs
+ /sbin/initctl emit --no-wait local-filesystems
+ /sbin/initctl emit --no-wait all-swaps
+ /sbin/initctl emit --no-wait filesystem
+end script
+DONE
+fi
+
+iptype=`/usr/sbin/zonecfg -z $ZONENAME info ip-type | cut -f2 -d' '`
+
+if [[ "$iptype" == "exclusive" ]]; then
+ fnm=$ZONEROOT/etc/init/networking.conf
+ if [[ ! -h $fnm && -f $fnm ]] then
+ setup_net
+ fi
+fi
+
+fnm=$ZONEROOT/etc/init/plymouth-ready.conf
+if [[ ! -h $fnm && -f $fnm ]] then
+ cat <<'DONE' > $fnm
+description "Send an event to indicate plymouth is up"
+
+task
+start on startup
+instance $UPSTART_EVENTS
+
+emits plymouth-ready
+
+script
+ initctl emit --no-wait plymouth-ready
+end script
+DONE
+fi
+
+#
+# upstart modifications are complete
+#
+
+# Hand control back to lx_boot
diff --git a/usr/src/lib/brand/lx/zone/lx_distro_install.ksh b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh
new file mode 100644
index 0000000000..44c954dda8
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh
@@ -0,0 +1,2772 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This script is called from /usr/lib/brand/lx/lx_install.
+#
+# options passed down from lx_install:
+# -z $ZONENAME
+# -r $LINUX_ROOT
+#
+# options passed down from zoneadm -z <zone-name> install
+# -d <Linux-archives-dir>
+# [core | server | desktop | development | all]
+#
+# The desktop cluster will be installed by default.
+#
+
+# Restrict executables to /bin, /usr/bin and /usr/sbin
+PATH=/bin:/usr/bin:/usr/sbin
+export PATH
+
+
+# Setup i18n output
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+# Log passed arguments to file descriptor 2
+log()
+{
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+#
+# Send the provided printf()-style arguments to the screen and to the
+# logfile.
+#
+screenlog()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+ [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2
+}
+
+# Print and log provided text if the shell variable "verbose_mode" is set
+verbose()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2
+}
+
+#
+# Print to the screen if the shell variable "verbose_mode" is set, but always
+# send the output to the log.
+#
+verboselog()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+bad_rpmdir=$(gettext "'%s' is not a valid RPM directory!")
+
+mb_req=$(gettext "(%s MB required, %s MB available)")
+no_space=$(gettext "Not enough free space available in '%s'")
+
+inst_clust=$(gettext "Installing cluster '%s'")
+unknown_clust=$(gettext "ERROR: Unknown cluster name: '%s'")
+
+unknown_media=$(gettext "Unknown or unreadable media loaded in %s")
+
+eject_fail=$(gettext "Attempt to eject '%s' failed.")
+
+lofi_failed=$(gettext "Attempt to add '%s' as lofi device FAILED.")
+lofs_failed=$(gettext "Attempt to lofs mount '%s' on '%s' FAILED.")
+
+media_spec=$(gettext "the provided media (%s)")
+
+distro_mediafail=\
+$(gettext "Attempt to determine Linux distribution from\n %s FAILED.")
+
+mini_bootfail=$(gettext "Attempt to boot miniroot for zone '%s' FAILED.")
+mini_copyfail=$(gettext "Attempt to copy miniroot for zone '%s' FAILED.")
+mini_initfail=$(gettext "Attempt to initialize miniroot for zone '%s' FAILED.")
+mini_instfail=$(gettext "Attempt to install RPM '%s' to miniroot FAILED.")
+mini_mediafail=$(gettext "Install of zone '%s' miniroot from\n %s FAILED.")
+mini_setfail=$(gettext "Attempt to setup miniroot for zone '%s' FAILED.")
+
+mini_mntfsfail=\
+$(gettext "Attempt to mount miniroot filesystems for zone '%s' FAILED.")
+
+rpm_initfail=\
+$(gettext "Attempt to initialize RPM database for zone '%s' FAILED.")
+
+symlink_failed=$(gettext "Attempt to symbolically link '%s' to '%s' FAILED.")
+
+discinfo_nofile=$(gettext "ERROR: Discinfo file '%s' not found!")
+discinfo_notreadable=$(gettext "ERROR: Discinfo file '%s': not readable!")
+discinfo_wrongarch=\
+$(gettext "ERROR: '%s': disc architecture is '%s'; install requires 'i386'!")
+
+wrong_serial=$(gettext "Incorrect serial number found on provided %s.")
+wrong_ser_expect=$(gettext " (found #%s, expected #%s)")
+
+wrong_cd=$(gettext "Incorrect CD inserted (found %s, wanted %s)")
+
+zone_initrootfail=\
+$(gettext "Attempt to initialize root filesystem for zone '%s' FAILED.")
+
+zone_haltfail=$(gettext "Unable to halt zone '%s'!")
+zone_instfail=$(gettext "Install of zone '%s' from '%s' FAILED '%s'.")
+zone_mediafail=$(gettext "Install of zone '%s' from\n %s FAILED.")
+
+zone_rootfail=\
+$(gettext "ERROR: The specified zone root directory '%s' could not be created.")
+zone_rootsub=\
+$(gettext "ERROR: The specified zone root subdirectory '%s' does not exist.")
+
+mk_mntfail=$(gettext "Could not create the mount directory '%s'")
+mountfail=$(gettext "Mount of '%s' on '%s' FAILED.")
+
+insert_discmsg=\
+$(gettext "Please insert %s, or a\n %s DVD in the removable media")
+
+mount_proper_iso1=$(gettext "Please mount the ISO for %s or a")
+mount_proper_iso2=$(gettext "%s DVD on device '%s'")
+
+silent_nodisc=$(gettext "ERROR: Cannot install from CDs in silent mode.")
+silent_nolofi=\
+$(gettext "ERROR: Cannot install from lofi-based CD ISOs in silent mode.")
+
+install_msg=$(gettext "Installing zone '%s' from\n %s.")
+install_ndiscs=\
+$(gettext "You will need CDs 1 - %s (or the equivalent DVD) to")
+install_nisos=\
+$(gettext "You will need ISO images representing CDs 1 - %s (or the equivalent")
+
+locate_npkgs=$(gettext "Attempting to locate %s packages...")
+
+install_one_rpm=$(gettext "Installing 1 %spackage.")
+install_nrpms_few=\
+$(gettext "Installing %s %spackages; this may take a few minutes...")
+install_nrpms_several=\
+$(gettext "Installing %s %spackages; this may take several minutes...")
+
+install_longwait=\
+$(gettext "NOTE: There may be a long delay before you see further output.")
+
+install_defmkfail=$(gettext "Could not create the temporary directory '%s'")
+install_defcpfail=$(gettext "Could not make a local copy of deferred RPM '%s'")
+install_dist=$(gettext "Installing distribution '%s'...")
+install_zonefail=$(gettext "Attempt to install zone '%s' FAILED.")
+
+no_distropath=$(gettext "ERROR: Distribution path '%s' doesn't exist.")
+
+install_done=$(gettext "Installation of %s to zone\n '%s' completed %s.")
+install_failed=$(gettext "Installation of %s to zone\n '%s' FAILED %s.")
+
+eject_final_msg=\
+$(gettext "Would you like the system to eject the %sinstall %s when")
+eject_final_prompt=$(gettext "installation of '%s' is complete? (%s)")
+eject_final_status=$(gettext "The %sinstall %s %s be ejected.")
+
+#
+# Get the device underlying a specified mounted file system and return it in
+# the shell variable "mount_dev"
+#
+# Returns 0 on success, 1 on failure.
+#
+get_mountdev()
+{
+ typeset mount_dir="$1"
+ typeset device
+ unset mount_dev
+
+ #
+ # Obtain information on the specified mounted device.
+ #
+ device=`{ df -k "$mount_dir" | egrep "^/" ; } 2>/dev/null` || return 1
+ mount_dev=$(echo $device | awk -e '{print $1}' 2>/dev/null)
+
+ [[ "`echo $mount_dev | cut -c 1`" = "/" ]] && return 0
+
+ unset mount_dev
+ return 1
+}
+
+#
+# Get the directory name a specified device is mounted as and return it in
+# the shell variable "mount_dir"
+#
+# Returns 0 on success, 1 on failre.
+#
+get_mountdir()
+{
+ typeset mount_dev="$1"
+ typeset dir
+ unset mount_dir
+
+ [[ -b "$mount_dev" ]] || return 1
+
+ #
+ # Obtain information on the specified mounted device.
+ #
+ dir=`{ df -k "$mount_dev" | egrep "^/" ; } 2>/dev/null` || return 1
+ mount_dir=$(echo $dir | awk -e '{print $6}' 2>/dev/null)
+
+ [[ "`echo $mount_dir | cut -c 1`" = "/" ]] && return 0
+
+ unset mount_dir
+ return 1
+}
+
+#
+# Check the free disk space of the passed filesystem against the passed
+# argument.
+#
+# Returns 0 on success, 1 on failure.
+#
+check_mbfree()
+{
+ typeset dir="$1"
+ typeset mb_required=$2
+
+ #
+ # Return free space in partition containing passed argument in MB
+ #
+ typeset mbfree=`{ LC_ALL=C df -k "$dir" | \
+ egrep -v Filesystem ; } 2>/dev/null` || return 1
+ mbfree=$(echo $mbfree | awk -e '{print $4}' 2>/dev/null)
+
+ ((mbfree /= 1024))
+ if ((mbfree < mb_required)); then
+ screenlog "$no_space" "$zoneroot"
+ screenlog "$mb_req" "$mb_required" "$mb_free"
+ return 1
+ fi
+ return 0
+}
+
+#
+# Find packages by attempting to expand passed RPM names to their full filenames
+# in the passed RPM directory.
+#
+# Arguments:
+#
+# Argument 1: Path to mounted install media
+# Arguments [2 - n]: RPM names to process
+#
+# The expanded filenames are returned in the shell array "rpm_names."
+#
+# For example:
+#
+# find_packages /mnt/iso dev kernel tetex redhat-menus
+#
+# would return something like:
+#
+# rpms_found[0]: dev-3.3.12.3-1.centos.0.i386.rpm
+# rpms_found[1]: kernel-2.4.21-32.EL.i586.rpm
+# rpms_found[2]: tetex-1.0.7-67.7.i386.rpm
+# rpms_found[3]: redhat-menus-0.39-1.noarch.rpm
+#
+# The routine returns 0 on success, 1 on an error.
+#
+find_packages()
+{
+ typeset found=0
+ typeset left=0
+
+ typeset rpmdir="$1/$rd_rpmdir"
+ typeset curdir=${PWD:=$(pwd)}
+
+ typeset arch
+ typeset procinfo
+ typeset rpmglob
+ typeset rpmfile
+
+ unset rpms_found
+ unset rpms_left
+
+ shift
+ cd "$rpmdir"
+
+ typeset rpmcheck="$(echo *.rpm)"
+
+ if [[ "$rpmcheck" = "*.rpm" ]]; then
+ screenlog "$bad_rpmdir" "$rpmdir"
+ cd "$curdir"
+ return 1
+ fi
+
+ #
+ # If the miniroot is booted, and the archs list isn't already set,
+ # ask the zone's rpm command for the list of compatible architectures.
+ #
+ if [[ -n $miniroot_booted && -z $archs ]]; then
+ procinfo=$(zlogin "$zonename" /bin/rpm --showrc | \
+ grep "^compatible archs")
+
+ [[ $? -eq 0 ]] &&
+ archs=$(echo $procinfo | sed 's/^compatible archs : //')
+
+ [[ -n $archs ]] &&
+ log "RPM-reported compatible architectures: $archs"
+ fi
+
+ #
+ # Either the miniroot isn't booted or asking rpm for the information
+ # failed for some reason, so make some reasonable assumptions.
+ #
+ if [[ -z $archs ]]; then
+ procinfo=$(LC_ALL=C psrinfo -vp | grep family)
+
+ #
+ # Check for additional processor capabilities
+ #
+ if [[ "$procinfo" = *" family 6 "* ||
+ "$procinfo" = *" family 15 "* ||
+ "$procinfo" = *" family 16 "* ||
+ "$procinfo" = *" family 17 "* ]]; then
+ if [[ "$procinfo" = *AuthenticAMD* ]]; then
+ #
+ # Linux gives "athlon" packages precedence
+ # over "i686" packages, so duplicate that
+ # here.
+ #
+ archs="athlon i686"
+ else
+ archs="i686"
+ fi
+ fi
+
+ archs="$archs i586 i486 i386 noarch"
+
+ log "Derived compatible architectures: $archs"
+ fi
+
+ verboselog "RPM source directory:\n \"$rpmdir\"\n"
+
+ if [[ $# -eq 1 ]]; then
+ msg=$(gettext "Attempting to locate 1 package...")
+ screenlog "$msg"
+ else
+ screenlog "$locate_npkgs" "$#"
+ fi
+
+ for rpm in "$@"; do
+ #
+ # Search for the appropriate RPM, using the compatible
+ # architecture list contained in "archs" to look for the best
+ # match.
+ #
+ # For example, if the processor is an i686, and the rpm is
+ # "glibc", the script will look for the files (in order):
+ #
+ # glibc[.-][0-9]*.i686.rpm
+ # glibc[.-][0-9]*.i586.rpm
+ # glibc[.-][0-9]*.i486.rpm
+ # glibc[.-][0-9]*.i386.rpm
+ # glibc[.-][0-9]*.noarch.rpm
+ # glibc[.-][0-9]*.fat.rpm
+ #
+ # and will stop when it finds the first match.
+ #
+ # TODO: Once the miniroot is booted, we should verify that
+ # the rpm name has been expanded to "$rpmfile" properly
+ # by comparing "$rpm" and the output of:
+ #
+ # zlogin -z <zone> /bin/rpm --qf '%{NAME}' -qp $rpmfile
+ #
+ for arch in $archs; do
+ #
+ # Use the filename globbing functionality of ksh's
+ # echo command to search for the file we want.
+ #
+ # If no matching file is found, echo will simply
+ # return the passed string.
+ #
+ rpmglob="$rpm[.-][0-9]*.$arch.rpm"
+ rpmfile="$(echo $rpmglob)"
+
+ [[ "$rpmfile" != "$rpmglob" ]] && break
+
+ unset rpmfile
+ done
+
+ if [[ -z $rpmfile ]]; then
+ rpms_left[$left]="$rpm"
+ ((left += 1))
+ else
+ rpms_found[$found]="$rpmfile"
+ ((found += 1))
+ fi
+ done
+
+ cd "$curdir"
+ log "\"$rpmdir\": matched $found of $# packages."
+ log "\"$rpmdir\": $left RPMs remaining."
+ return 0
+}
+
+#
+# Build the rpm lists used to install a machine.
+#
+# The first argument is the number of discs in the distribution. The
+# second, optional, argument is the metacluster to install.
+#
+# The array "distro_rpm[]" is built from the individual package RPM arrays
+# read in from an individual distribution definition file.
+#
+build_rpm_list()
+{
+ # Default to a desktop installation
+ typeset cluster=desktop
+ typeset cnt=0
+ typeset pkgs
+
+ for clust in "$@"; do
+ ((cnt += 1))
+ case $clust in
+ core) cluster=core ;;
+ desk*) cluster=desktop ;;
+ serv*) cluster=server ;;
+ dev*) cluster=developer ;;
+ all) cluster=all
+ break;;
+ *) screenlog "$unknown_clust" "$clust"
+ exit $ZONE_SUBPROC_USAGE ;;
+ esac
+ done
+
+ if [ $cnt -gt 1 ]; then
+ msg=$(gettext "Too many install clusters specified")
+ screenlog "$msg"
+ exit $ZONE_SUBPROC_USAGE
+ fi
+
+ screenlog "$inst_clust" $cluster
+
+ case $cluster in
+ core) distro_rpms=$distro_core_rpms ;;
+ desktop) distro_rpms=$distro_desktop_rpms ;;
+ server) distro_rpms=$distro_server_rpms ;;
+ developer) distro_rpms=$distro_developer_rpms ;;
+ all) distro_rpms=$distro_all_rpms ;;
+ esac
+
+ # The RPMs in the miniroot must all be installed properly as well
+ distro_rpms="$distro_miniroot_rpms $distro_rpms"
+}
+
+#
+# Install the "miniroot" minimal Linux environment that is booted single-user
+# to complete the install.
+#
+# This works by doing feeding the RPM list needed for the installation one
+# by one to rpm2cpio(1).
+#
+# Usage:
+# install_miniroot <mounted media dir> <names of RPMS to install>
+#
+#
+install_miniroot()
+{
+ typeset mediadir="$1"
+ typeset rpm
+
+ shift
+
+ #
+ # There's a quirk in our version of ksh that sometimes resets the
+ # trap handler for the shell. Since RPM operations will be the
+ # longest part of any given install, make sure that an interrupt while
+ # the command is running will bring the miniroot down and clean up
+ # the interrupted install.
+ #
+ trap trap_cleanup INT
+
+ if [[ $# -eq 1 ]]; then
+ msg=$(gettext "Installing %s miniroot package...")
+ else
+ msg=$(gettext "Installing %s miniroot packages...")
+ fi
+
+ screenlog "\n$msg" "$#"
+
+ for rpm in "$@"; do
+ verboselog "\nInstalling \"$rpm\" to miniroot at\n" \
+ " \"$zoneroot\"..."
+
+ rpm2cpio "$mediadir/$rd_rpmdir/$rpm" | \
+ ( cd "$rootdir" && cpio -idu ) 1>&2
+
+ if [[ $? -ne 0 ]]; then
+ screenlog "$mini_instfail" "$rpm"
+ return 1
+ fi
+ done
+
+ screenlog ""
+ return 0
+}
+
+#
+# Install the zone from the mounted disc image by feeding a list of RPMs to
+# install from this image to RPM running on the zone via zlogin(1).
+#
+# Usage:
+# install_zone <path to mounted install media> [<names of RPMS to install>]
+#
+# If the caller doesn't supply a list of RPMs to install, we install any
+# we previously stashed away in the deferred RPMs directory.
+#
+install_zone()
+{
+ #
+ # Convert the passed install media pathname to a zone-relative path
+ # by stripping $rootpath from the head of the path.
+ #
+ typeset zonerpmdir="${1##$rootdir}/$rd_rpmdir"
+
+ typeset defdir="$rootdir/var/lx_install/deferred_rpms"
+ typeset mounted_root="$1"
+ typeset rpmopts="-i"
+
+ typeset defer
+ typeset deferred_found
+ typeset install_rpms
+ typeset nrpms
+ typeset rpm
+ typeset rpmerr
+
+ shift
+
+ #
+ # If the caller provided a list of RPMs, determine which of them
+ # should be installed now, and which should be deferred until
+ # later.
+ #
+ if [[ $# -gt 0 ]]; then
+ if [[ -n $deferred_rpms ]]; then
+ [[ -d $defdir ]] || if ! mkdir -p $defdir; then
+ screenlog "$install_defmkfail" "$mntdir"
+ return 1
+ fi
+
+ msg=$(gettext "Checking for deferred packages...")
+ screenlog "$msg"
+
+ find_packages "$mounted_root" $deferred_rpms
+ deferred_found="${rpms_found[@]}"
+ numdeferred=${#rpms_found[@]}
+ else
+ deferred_found=""
+ fi
+
+ install_rpms="$@"
+ nrpms=$#
+
+ #
+ # If this distro has any deferred RPMs, we want to simply
+ # copy them into the zone instead of installing them. We
+ # then remove them from the list of RPMs to be installed on
+ # this pass.
+ #
+ for rpm in $deferred_found; do
+ if echo "$install_rpms" | egrep -s "$rpm"; then
+ verboselog "Deferring installation of \"$rpm\""
+
+ #
+ # Remove the RPM from the install_rpms list
+ # and append it to the deferred_saved array
+ #
+ install_rpms=$(echo "$install_rpms " |
+ sed "s/ $rpm / /g")
+
+ # remove trailing spaces, if any
+ install_rpms=${install_rpms%%+( )}
+
+ deferred_saved[${#deferred_saved[@]}]="$rpm"
+
+ if ! cp "$mounted_root/$rd_rpmdir/$rpm" \
+ "$defdir"; then
+ screenlog "$install_defcpfail" "$rpm"
+ return 1
+ fi
+ fi
+
+ #
+ # If we've deferred the installation of EVERYTHING,
+ # simply return success
+ #
+ [[ -z $install_rpms ]] && return 0
+ done
+
+ [[ -n $deferred_found ]] & verbose ""
+ elif [[ -z $deferred_saved ]]; then
+ # There are no deferred RPMs to install, so we're done.
+ return 0
+ else
+ # Install the RPMs listed in the deferred_saved array
+ install_rpms=${deferred_saved[@]}
+ nrpms=${#deferred_saved[@]}
+ zonerpmdir=/var/lx_install/deferred_rpms
+ defer="deferred "
+ fi
+
+ #
+ # There's a quirk in our version of ksh that sometimes resets the
+ # trap handler for the shell. Since RPM operations will be the
+ # longest part of any given install, make sure that an interrupt while
+ # the command is running will bring the miniroot down and clean up
+ # the interrupted install.
+ #
+ trap trap_cleanup INT
+
+ #
+ # Print a message depending on how many RPMS we have to install.
+ #
+ # 25 RPMS seems like a reasonable boundary between when an install may
+ # take a "few" or "several" minutes; this may be tuned if needed.
+ #
+ screenlog ""
+
+ if [[ $nrpms -eq 1 ]]; then
+ screenlog "$install_one_rpm" "$defer"
+ elif [[ $nrpms -lt 25 ]]; then
+ screenlog "$install_nrpms_few" "$nrpms" "$defer"
+ else
+ screenlog "$install_nrpms_several" "$nrpms" "$defer"
+
+ #
+ # For installs of over 600 packages or so, it can take rpm a
+ # really, REALLY long time to output anything, even when
+ # running in verbose mode.
+ #
+ # For example, when doing an "all" install from a DVD or DVD
+ # ISO, depending on the speed of the optical drive and the
+ # speed of the machine's CPU(s), it may be up to TEN MINUTES or
+ # MORE before rpm prints out its "Processing..." message even
+ # though it is, in fact, processing the entire package list,
+ # checking for dependencies (something it is unfortunately
+ # entirely silent about.)
+ #
+ # Since the user might otherwise think the install was hung
+ # when running in verbose mode, warn them that it could be
+ # quite a while before they see any further output from the
+ # installer.
+ #
+ #
+ [[ $nrpms -gt 600 ]] && verbose "$install_longwait"
+ fi
+
+ log ""
+ log "Installing: $install_rpms"
+ log ""
+ log "NOTE: Any messages appearing below prefixed with \"warning:\""
+ log " and/or that do not cause the installer to abort the"
+ log " installation process may safely be ignored."
+ log ""
+
+ echo
+
+ # If verbose mode is selected, run rpm in verbose mode as well.
+ [[ -n $verbose_mode ]] && rpmopts="-ivh"
+
+ #
+ # LX_INSTALL must be defined when running this command in order to
+ # enable switches built into various emulated system calls to allow
+ # the dev package (which may not actually write to /dev) to function.
+ #
+ zlogin "$zonename" "( cd "$zonerpmdir" ; LX_INSTALL=1 \
+ /bin/rpm $rpmopts --force --aid --nosignature --root /a \
+ $install_rpms )"
+
+ rpmerr=$?
+
+ if [[ $rpmerr -ne 0 ]]; then
+ log ""
+ log "Zone rpm install command exited abnormally, code $rpmerr"
+ log ""
+
+ screenlog "$zone_instfail" "$zonename" "$zonerpmdir" "$rpmerr"
+ return 1
+ fi
+
+ log ""
+ log "$nrpms package(s) installed."
+
+ return 0
+}
+
+#
+# Attempt to unmount all file systems passed on the command line
+#
+# Returns 0 if all umounts succeeded, otherwise the number of umount failures
+#
+umount_list()
+{
+ typeset failures=0
+ typeset mounted
+
+ unset umount_failures
+
+ for mounted in "$@"; do
+ if ! umount "$mounted"; then
+ umount_failures="$umount_failures $mounted"
+ ((failures += 1))
+ fi
+ done
+
+ return $failures
+}
+
+#
+#
+# Set up lofi mounts required for chroot(1M) to work on a new root directory
+# located in /a within a zone.
+#
+newroot_lofimnt()
+{
+ typeset dev
+ typeset mounted
+ typeset target
+
+ unset newroot_mounted
+
+ #
+ # /usr and /lib get lofs mounted in the zone on /native read-only
+ #
+ # $zoneroot/dev gets lofs mounted on /native/dev read/write to allow
+ # the use of native devices.
+ #
+ mount -F lofs -r /lib "$rootdir/a/native/lib" || return 1
+ newroot_mounted="$rootdir/a/native/lib"
+
+ if ! mount -F lofs -r /usr "$rootdir/a/native/usr"; then
+ umount "$rootdir/a/native/lib"
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $rootdir/a/native/usr"
+
+ if ! mount -F lofs "$zoneroot/root/native/dev" \
+ "$rootdir/a/native/dev"; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $rootdir/a/native/dev"
+
+ #
+ # This is a bit ugly; to provide device access within the chrooted
+ # environment RPM will use for its install, we will create the same
+ # symlinks "$rootdir/dev" contains in the new dev directory, and will
+ # lofs mount the balance of "$rootdir/dev" into the same locations in
+ # /dev in the new filesystem we're installing to.
+ #
+ for dev in "$zoneroot"/root/dev/*
+ do
+ if [[ "$dev" = "$zoneroot/root/dev/*" ]]; then
+ log "ERROR: No files found in $zoneroot/root/dev"
+ umount_list $newroot_mounted
+ return 1
+ fi
+
+ target="$rootdir/a/dev/$(basename $dev)"
+
+ #
+ # If the device file is a symbolic link, create a new link
+ # in the target directory with the same source.
+ #
+ # If the device file is any other file or directory, lofs
+ # mount it from the device directory into the target directory.
+ #
+ if [[ -h $dev ]]; then
+ typeset source=$(LC_ALL=C file -h "$dev")
+
+ #
+ # Remove extraneous text from the output of file(1) so
+ # we're left only with the target path of the symbolic
+ # link.
+ #
+ source="${source##*link to }"
+
+ [[ -a "$target" ]] && /bin/rm -f "$target"
+
+ if ! ln -s "$source" "$target"; then
+ screenlog "$symlink_failed" "$source" "$target"
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+ else
+ [[ ! -a "$target" ]] && touch "$target"
+
+ if ! mount -F lofs "$dev" "$target"; then
+ screenlog "$lofs_failed" "$dev" "$target"
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $target"
+ fi
+
+ done
+
+ return 0
+}
+
+#
+# Replace the root directory of a zone with the duplicate previously created
+# in the zone's /a directory.
+#
+replace_miniroot()
+{
+ #
+ # The zoneadm halt will automatically unmount any file systems
+ # mounted via lofs in the zone, so that saves us from having to
+ # methodically unmount each one.
+ #
+ if ! zoneadm -z "$zonename" halt; then
+ screenlog "$zone_haltfail" "$zonename"
+ return 1
+ fi
+
+ unset miniroot_booted
+ unset newroot_mounted
+
+ [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a"
+ [[ -d "$zoneroot/oldroot" ]] && rm -rf "$zoneroot/oldroot"
+
+ #
+ # Copy the logfile or we'll lose all details of the install into the
+ # new root directory, so strip "$zoneroot" off the pathname of the
+ # current logfile and use it to generate the pathname of the log file
+ # in the new root directory.
+ #
+ [[ -n $logfile && -f "$logfile" ]] &&
+ cp "$logfile" "$rootdir/a${logfile##$rootdir}"
+
+ mv -f "$rootdir/a" "$zoneroot/a" || return 1
+ mv -f "$rootdir" "$zoneroot/oldroot" || return 1
+ mv -f "$zoneroot/a" "$rootdir" || return 1
+
+ #
+ # After the directory munging above, we've moved the new copy of the
+ # logfile atop the logfile we WERE writing to, so if we don't reopen
+ # the logfile here the shell will continue writing to the old logfile's
+ # inode, meaning we would lose all log information from this point on.
+ #
+ [[ -n $logfile ]] && exec 2>>"$logfile"
+
+ rm -rf "$zoneroot/oldroot"
+
+ #
+ # Remove the contents of the /dev directory created by the install.
+ #
+ # We don't technically need to do this, but the zone infrastructure
+ # will mount $zoneroot/dev atop $rootdir/dev anyway, hiding its
+ # contents so we may as well clean up after ourselves.
+ #
+ # The extra checks are some basic paranoia due to the potentially
+ # dangerous nature of this command but are not intended to catch all
+ # malicious cases
+ #
+ [[ "$rootdir" != "" && "$rootdir" != "/" ]] && rm -rf "$rootdir"/dev/*
+
+ return 0
+}
+
+setup_miniroot()
+{
+ unset miniroot_booted
+
+ if ! "$cwd/lx_init_zone" "$zonename" "$rootdir" mini; then
+ screenlog "$mini_initfail" "$zonename"
+ return 1
+ fi
+
+ if ! copy_miniroot; then
+ screenlog "$mini_copyfail" "$zonename"
+ return 1
+ fi
+
+ #
+ # zoneadm gets upset if the zone root directory is group or world
+ # readable or executable, so make sure it isn't before proceeding.
+ #
+ chmod 0700 "$zoneroot"
+
+ msg=$(gettext "Booting zone miniroot...")
+ screenlog "$msg"
+
+ if ! zoneadm -z "$zonename" boot -f; then
+ screenlog "$mini_bootfail" "$zonename"
+ return 1
+ fi
+
+ miniroot_booted=1
+
+ #
+ # Now that the miniroot is booted, unset the compatible architecture
+ # list that find_packages was using for the miniroot so that it will
+ # get the list from rpm for the full install.
+ #
+ unset archs
+
+ #
+ # Mount all the filesystems needed to install the new root
+ # directory.
+ #
+ if ! newroot_lofimnt; then
+ screenlog "$mini_mntfsfail" "$zonename"
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+ return 1
+ fi
+
+ #
+ # Attempt to initialize the RPM database for the new zone
+ #
+ if ! zlogin "$zonename" /bin/rpm --initdb --root /a; then
+ screenlog "$rpm_initfail" "$zonename"
+ return 1
+ fi
+
+ msg=$(gettext "Miniroot zone setup complete.")
+ screenlog "$msg"
+ return 0
+}
+
+finish_install()
+{
+ #
+ # Perform some last cleanup tasks on the newly installed zone.
+ #
+ # Note that the zlogin commands aren't checked for errors, as the
+ # newly installed zone will still boot even if the commands fail.
+ #
+ typeset file
+
+ typeset defdir=$rootdir/var/lx_install/deferred_rpms
+
+ msg=$(gettext "Completing installation; this may take a few minutes.")
+ screenlog "$msg"
+
+ if [[ -d $defdir ]]; then
+ rm -f $defdir/*.rpm
+ rmdir $defdir
+ fi
+
+ # Run ldconfig in the new root
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/ldconfig -f /etc/ld.so.conf
+
+ #
+ # Create the /etc/shadow and /etc/gshadow files if they don't already
+ # exist
+ #
+ [[ -a "$rootdir/a/etc/shadow" ]] ||
+ zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/pwconv
+
+ [[ -a "$rootdir/a/etc/gshadow" ]] ||
+ zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/grpconv
+
+ #
+ # Make sure all init.d and rc[0-6].d links are set up properly.
+ #
+ for file in `ls "$rootdir/a/etc/init.d"`; do
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/chkconfig --del $file > /dev/null 2>&1
+
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/chkconfig --add $file > /dev/null 2>&1
+ done
+
+ replace_miniroot
+
+ rmdir -ps "$media_mntdir"
+
+ if ! "$cwd/lx_init_zone" "$zonename" "$rootdir"; then
+ screenlog "$zone_initrootfail" "$zonename"
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Duplicate the installed "miniroot" image in a subdirectory of the base
+# directory of the zone.
+#
+# This is done so that a new root directory can be created that will be used
+# as the root of a chrooted directory that RPM running on the zone will install
+# into.
+#
+copy_miniroot()
+{
+ #
+ # Create the directory $zoneroot/a if it doesn't already exist
+ #
+ [[ -d "$zoneroot/a" ]] ||
+ { mkdir -p "$zoneroot/a" || return 1 ; }
+
+ msg=$(gettext "Duplicating miniroot; this may take a few minutes...")
+ screenlog "$msg"
+
+ #
+ # Duplicate the miniroot to /a, but don't copy over any /etc/rc.d or
+ # lxsave_ files.
+ #
+ ( cd "$rootdir"; find . -print | egrep -v "/etc/rc\.d|lxsave_" | \
+ cpio -pdm ../a )
+
+ [[ -d "$rootdir/a" ]] && rm -rf "$rootdir/a" 2>/dev/null
+ mv -f "$zoneroot/a" "$rootdir/a" || return 1
+
+ return 0
+}
+
+#
+# Read the first six lines of the .discinfo file from the root of the passed
+# disc directory (which should either be a mounted disc or ISO file.)
+#
+# The read lines will be used to set appropriate shell variables on success:
+#
+# rd_line[0]: Disc Set Serial Number (sets rd_serial)
+# rd_line[1]: Distribution Release Name (sets rd_release)
+# rd_line[2]: Distribution Architecture (sets rd_arch)
+# rd_line[3]: Disc Number$[s] in Distribution (sets rd_cdnum)
+# rd_line[4]: "base" directory for disc (currently unused)
+# rd_line[5]: RPM directory for disc (sets rd_rpmdir)
+#
+# Returns 0 on success, 1 on failure.
+#
+read_discinfo()
+{
+ typeset rd_file="$1/.discinfo"
+
+ unset rd_arch
+ unset rd_cdnum
+ unset rd_disctype
+ unset rd_pers
+ unset rd_release
+ unset rd_rpmdir
+ unset rd_serial
+
+ #
+ # If more than one argument was passed to read_discinfo, the second
+ # is a flag meaning that we should NOT print a warning message if
+ # we don't find a .discinfo file, as this is just a test to see if
+ # a distribution ISO is already mounted on the passed mount point.
+ #
+ if [[ ! -f "$rd_file" ]]; then
+ [[ $# -eq 1 ]] &&
+ screenlog "$discinfo_nofile" "$rd_file"
+ return 1
+ fi
+
+ verbose "Attempting to read \"$rd_file\"..."
+
+ if [[ ! -r "$rd_file" ]]; then
+ screenlog "$discinfo_notreadable" "$rd_file"
+ return 1
+ fi
+
+ typeset rd_line
+ typeset linenum=0
+
+ while read -r rd_line[$linenum]; do
+ #
+ # If .discinfo architecture isn't "i386," fail here as
+ # we only support i386 distros at this time.
+ #
+ if [[ $linenum = 2 && "${rd_line[2]}" != "i386" ]]; then
+ screenlog "$discinfo_wrongarch" "$rd_file" \
+ "${rd_line[2]}"
+ return 1
+ fi
+
+ #
+ # We've successfully read the first six lines of .discinfo
+ # into $rd_line, so do the appropriate shell variable munging.
+ #
+ if ((linenum == 5)); then
+ rd_serial=${rd_line[0]}
+ rd_release=${rd_line[1]}
+
+ # CentOS names their releases "final"
+ [[ "$rd_release" = "final" ]] && rd_release="CentOS"
+
+ #
+ # Line four of the .discinfo file contains either a
+ # single disc number for a CD or a comma delimited list
+ # representing the CDs contained on a particular DVD.
+ #
+ rd_cdnum=${rd_line[3]}
+
+ if [[ "$rd_cdnum" = *,* ]]; then
+ rd_disctype="DVD"
+ else
+ rd_disctype="CD"
+ fi
+
+ rd_rpmdir=${rd_line[5]}
+
+ #
+ # If the specified RPM directory doesn't exist, this is
+ # not a valid binary RPM disc (it's most likely a
+ # source RPM disc), so don't add it to the list of
+ # valid ISO files.
+ #
+ [[ ! -d "$1/$rd_rpmdir" ]] && return 1
+
+ if [[ "$rd_cdnum" = "1" &&
+ "$rd_release" = "Red Hat"* ]]; then
+ typeset rh_glob
+
+ #
+ # If this is a Red Hat release, get its
+ # personality name from the name of the
+ # redhat-release RPM package.
+ #
+ # Start by looking for the file
+ # "redhat-release-*.rpm" in the directory
+ # RedHat/RPMS of the ISO we're examining by
+ # using ksh's "echo" command to handle
+ # filename globbing.
+ #
+ # If no matching file is found, echo will
+ # simply return the passed string.
+ #
+ rh_glob="$1/RedHat/RPMS/redhat-release-*.rpm"
+ rd_pers="$(echo $rh_glob)"
+
+ if [[ "$rd_pers" != "$rh_glob" ]]; then
+ #
+ # An appropriate file was found, so
+ # extract the personality type from the
+ # filename.
+ #
+ # For example, the presence of the file:
+ #
+ # redhat-release-3WS-13.5.1.i386.rpm
+ #
+ # would indicate the ISO either
+ # represents a "WS" personality CD or
+ # a "WS" installation DVD.
+ #
+ # Start the extraction by deleting the
+ # pathname up to the personality type.
+ #
+ rh_glob="*/redhat-release-[0-9]"
+ rd_pers="${rd_pers##$rh_glob}"
+
+ #
+ # Now remove the trailing portion of the
+ # pathname to leave only the personality
+ # type, such as "WS" or "ES."
+ #
+ rd_pers="${rd_pers%%-*\.rpm}"
+ else
+ unset rd_pers
+ fi
+ fi
+
+ return 0
+ fi
+
+ ((linenum += 1))
+ done < "$rd_file"
+
+ #
+ # The file didn't have at least six lines, so indicate that parsing
+ # failed.
+ #
+ return 1
+}
+
+#
+# Mount install media within the zone.
+#
+# The media will be mounted at $zoneroot/root/media, either via a loopback
+# mount (if it's a managed removable disc) or directly (if the media is an ISO
+# file or if the specified filename is a block device.)
+#
+# Returns 0 on success, 1 on failure, 2 if no disc was available
+#
+mount_install_media()
+{
+ typeset device="$1"
+ typeset mount_err
+
+ unset removable
+ unset zone_mounted
+
+ [[ -z $mntdir ]] && return 1
+
+ [[ -d $mntdir ]] || if ! mkdir -p $mntdir; then
+ screenlog "$mk_mntfail" "$mntdir"
+ unset mntdir
+ return 1
+ fi
+
+ if [[ "$install_media" = "disc" && "$managed_removable" = "1" ]]; then
+ #
+ # The removable disc device is an automatically managed one,
+ # so just wait for the device mounter to notice a disc has been
+ # inserted into the drive and for the disc to appear at the
+ # mount point.
+ #
+ typeset mount_interval=2
+ typeset mount_timeout=10
+ typeset mount_timer=0
+
+ typeset nickname=$(basename $device)
+
+ eject -q "$nickname" > /dev/null 2>&1 || return 2
+ removable="$nickname"
+
+ #
+ # Double check that the device was mounted. If it wasn't, that
+ # usually means the disc in the drive isn't in a format we can
+ # read or the physical disc is unreadable in some way.
+ #
+ # The mount_timer loop is needed because the "eject -q" above
+ # may report a disc is available before the mounter associated
+ # with the drive actually gets around to mounting the device,
+ # so we need to give it a chance to do so. The mount_interval
+ # allows us to short-circuit the timer loop as soon as the
+ # device is mounted.
+ #
+ while ((mount_timer < mount_timeout)); do
+ [[ -d "$device" ]] && break
+
+ sleep $mount_interval
+ ((mount_timer += mount_interval))
+ done
+
+ if [[ ! -d "$device" ]]; then
+ screenlog "\n$unknown_media" "$device"
+ return 2
+ fi
+
+ mount -F lofs -r "$device" "$mntdir"
+ mount_err=$?
+ else
+ #
+ # Attempt to mount the media manually.
+ #
+ # First, make sure the passed device name really IS a device.
+ #
+ [[ -b "$device" ]] || return 2
+
+ #
+ # Now check to see if the device is already mounted and lofi
+ # mount the existing mount point into the zone if it is.
+ #
+ if get_mountdir "$device"; then
+ mount -F lofs -r "$mount_dir" "$mntdir"
+ mount_err=$?
+ else
+ [[ "$install_media" = "disc" ]] && removable="$device"
+
+ # It wasn't mounted, so go ahead and try to do so.
+ mount -F hsfs -r "$device" "$mntdir"
+ mount_err=$?
+ fi
+
+ # A mount_err of 33 means no suitable media was found
+ ((mount_err == 33)) && return 2
+ fi
+
+ if ((mount_err != 0)); then
+ screenlog "$mountfail" "$device" "$mntdir"
+ unset mntdir
+ return 1
+ fi
+
+ zone_mounted="$mntdir"
+ verbose "Mount of \"$device\" on \"$mntdir\" succeeded."
+ return 0
+}
+
+# Eject the disc mounted on the passed directory name
+eject_removable_disc()
+{
+ screenlog ""
+ verbose " (Attempting to eject '$removable'... \c"
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if ! eject "$removable"; then
+ verbose "failed.)\n"
+ screenlog "$eject_fail" "$removable"
+
+ msg=$(gettext "Please eject the disc manually.")
+ screenlog "$msg"
+ else
+ verbose "done.)\n"
+ fi
+
+ unset removable
+}
+
+#
+# Ask for the user to provide a disc or ISO.
+#
+# Returns 0 on success, 1 on failure.
+#
+prompt_for_media()
+{
+ # No prompting is allowed in silent mode.
+ if [[ -n $silent_mode ]]; then
+ log "$silent_err_msg"
+ return 1
+ fi
+
+ if [[ "$1" != "" ]]; then
+ msg="$release_name, CD $1"
+ else
+ typeset disc=$(gettext "disc")
+
+ msg=$(gettext "any")
+ msg="$msg $release_name $disc"
+ fi
+
+ if [[ "$install_media" = "disc" ]]; then
+ screenlog "$insert_discmsg" "$msg" "$release_name"
+
+ msg=$(gettext "drive and press <RETURN>.")
+ screenlog " $msg"
+
+ [[ -n $removable ]] && eject_removable_disc
+ else
+ if [[ -n $zone_mounted ]]; then
+ umount "$mntdir"
+ unset zone_mounted
+ fi
+
+ #
+ # This is only be printed in the case of a user
+ # specifying a device name as an install medium.
+ # This is handy for testing the installer or if the user
+ # has ISOs stored in some strange way that somehow
+ # breaks the "install from ISO" mechanism, as ISOs
+ # can be manually added using lofiadm(1M) command and
+ # the resulting lofi device name passed to the
+ # installer.
+ #
+ screenlog "$mount_proper_iso1" "$msg"
+ screenlog " $mount_proper_iso2" "$release_name" "$mntdev"
+
+ msg=$(gettext "and press <RETURN>.")
+ screenlog " $msg"
+ fi
+
+ read && return 0
+
+ return 1
+}
+
+#
+# Get a particular CD of a multi-disc set.
+#
+# This basically works by doing the following:
+#
+# 1) Mount the disc
+# 2) Read the disc's .discinfo file to see which CD it is or represents
+# 3) If it doesn't contain the desired CD, ask the user for a disc
+# containing the CD we wanted.
+#
+# Returns 0 on success, 1 on failure.
+#
+get_cd()
+{
+ typeset mntdev="$1"
+
+ typeset cdnum
+ typeset discname
+ typeset enter
+ typeset mount_err
+ typeset prompted
+
+
+ if [[ $# -eq 2 ]]; then
+ # Caller specified a particular CD to look for
+ cdnum="$2"
+ discname="$release_name, CD $cdnum"
+ else
+ # Caller wanted any disc
+ discname="a $release_name disc"
+ fi
+
+ verboselog "\nChecking for $discname on device"
+ verboselog " \"$mntdev\"\n"
+
+ while :; do
+ # Check to see if a distro disc is already mounted
+ mntdir="$media_mntdir"
+
+ unset rd_disctype
+ if ! read_discinfo "$mntdir" "test"; then
+ mount_install_media "$mntdev"
+ mount_err=$?
+
+ #
+ # If the mount succeeded, continue on in the main
+ # script
+ #
+ if ((mount_err == 0)); then
+ read_discinfo "$mntdir"
+ elif ((mount_err == 2)); then
+ # No medium was found, so prompt for one.
+ prompt_for_media "$cdnum" && prompted=1 continue
+
+ unset mntdir
+ return 1
+ else
+ # mount failed
+ unset mntdir
+ return 1
+ fi
+ fi
+
+ if [[ -n $distro_serial &&
+ "$rd_serial" != "$distro_serial" ]]; then
+ screenlog "$wrong_serial" "$install_disctype"
+ screenlog " $wrong_ser_expect" "$rd_serial" \
+ "$distro_serial"
+
+ #
+ # If we're installing from ISOs, don't prompt the user
+ # if the wrong serial number is present, as there's
+ # nothing they can do about it.
+ #
+ [[ "$install_media" = "ISO" ]] && return 1
+
+ prompt_for_media "$cdnum" && continue
+
+ umount "$mntdir"
+ unset zone_mountdir
+ return 1
+ fi
+
+ #
+ # Make sure that the mounted media is CD $cdnum.
+ #
+ # If it is, return to the caller, otherwise eject the
+ # disc and try again.
+ #
+ if [[ "$rd_disctype" = "CD" ]]; then
+ verboselog "Found CD #$rd_cdnum," \
+ "Serial #$rd_serial"
+ verboselog "Release Name \"$rd_release\""
+
+ [[ -n $rd_pers ]] &&
+ verboselog "Detected RedHat Personality" \
+ "\"$rd_pers\""
+
+ verboselog ""
+
+ # If we didn't care which CD it was, return success
+ [[ "$cdnum" = "" ]] && return 0
+
+ # Return if the CD number read is a match
+ [[ "$rd_cdnum" = "$cdnum" ]] && return 0
+ else
+ verboselog "\nFound DVD (representing CDs" \
+ "$rd_cdnum), Serial #$rd_serial"
+ verboselog "Release Name \"$rd_release\"\n"
+
+ [[ -n $rd_pers ]] &&
+ verboselog "Detected RedHat Personality" \
+ "\"$rd_pers\""
+
+ verboselog ""
+
+ # If we didn't care which CD it was, return success
+ [[ "$cdnum" = "" ]] && return 0
+
+ #
+ # Since a DVD represents multiple CDs, make sure the
+ # DVD inserted represents the CD we want.
+ #
+ { echo "$rd_cdnum," | egrep -s "$cdnum," ; } &&
+ return 0
+ fi
+
+ if [[ -n $prompted ]]; then
+ if [[ "$rd_disctype" = "CD" ]]; then
+ screenlog "$wrong_cd" "$rd_cdnum" "$cdnum"
+ else
+ msg=$(gettext "Incorrect DVD inserted.")
+ screenlog "$msg"
+
+ log "(DVD represented CDs $rd_cdnum," \
+ " wanted CD $cdnum)"
+ fi
+ fi
+
+ #
+ # If we're installing from ISOs, don't prompt the user if the
+ # wrong CD is mounted, as there's nothing they can do about it.
+ #
+ [[ "$install_media" = "ISO" ]] && return 1
+
+ prompt_for_media "$cdnum" && prompted=1 && continue
+
+ umount "$mntdir"
+ unset zone_mountdir
+ return 1
+ done
+}
+
+#
+# Find out which distro the mounted disc belongs to by comparing the
+# mounted disc's serial number against those contained in the various
+# distro files.
+#
+# When a match is found, the shell variable "distro_file" will be set to
+# the name of the matching file. Since that will have been the last file
+# sourced by the shell, there's no need for the caller to do it again; the
+# variable is only set in case it's of some use later.
+#
+# Returns 0 on success, 1 on failure.
+#
+get_disc_distro()
+{
+ typeset distro
+ typeset distro_files="$(echo $distro_dir/*.distro)"
+
+ unset distro_file
+
+ [[ "$distro_files" = "$distro_dir/*.distro" ]] && return 1
+
+ for distro in $distro_files; do
+ [[ ! -f "$distro" ]] && continue
+
+ verbose "Checking for disc distro \"$distro\"..."
+
+ . "$distro" > /dev/null
+
+ [[ "$rd_serial" != "$distro_serial" ]] && continue
+
+ distro_file="$distro"
+ release_name="$rd_release $distro_version"
+ distro_ncds=${#distro_cdorder[@]}
+
+ return 0
+ done
+
+ return 1
+}
+
+#
+# Iterate through the install media to install the miniroot and full zone
+#
+# The install media may be physical discs, a lofi mounted ISO file, or
+# iso files located in a directory specified by the user.
+#
+# All installations, regardless of media type, use a CD as their basic media
+# unit. DVDs or ISOs representing DVDs actually contain multiple "CDs" of
+# installation packages.
+#
+# The variable "distro_ncds," as set elsewhere, represents the number
+# of CDs required to install the distribution. Whether the installation
+# actually requires multiple physical discs or ISOs depends upon their content.
+#
+# Returns 0 on success, 1 on failure.
+#
+iterate_media()
+{
+ typeset cdnum=1
+ typeset cds
+ typeset disc_rpms
+ typeset err_media
+ typeset err_msg
+ typeset install_type="$1"
+ typeset ldevs
+ typeset mountdev
+ typeset rh_pers
+
+ shift
+
+ if [[ "$install_type" = "miniroot" ]]; then
+ typeset i
+
+ disc_rpms=$distro_miniroot_rpms
+ err_msg="$mini_mediafail"
+
+ # For miniroot installs, ask for CDs in numerical order
+ cds[0]="zero_pad"
+
+ for i in ${distro_cdorder[@]}; do
+ cds[$cdnum]=$cdnum
+ ((cdnum += 1))
+ done
+
+ cdnum=1
+ else
+ disc_rpms=$distro_rpms
+ err_msg="$zone_mediafail"
+
+ #
+ # For full zone installs, ask for CDs in the order RPM needs
+ # to find the packages.
+ #
+ set -A cds "zero_pad" ${distro_cdorder[@]}
+ fi
+
+ if [[ "$install_media" = "ISO" ]]; then
+ set -A ldevs "zero_pad" "$@"
+ else
+ mountdev="$1"
+ err_media="$release_name, CD ${cds[$cdnum]} (or DVD)"
+ fi
+
+ unset rpms_left_save
+
+ while ((cdnum <= distro_ncds)); do
+ [[ -z ${cds[$cdnum]} ]] && ((cdnum += 1)) && continue
+
+ if [[ "$install_media" = "ISO" ]]; then
+ typeset isonum="${cds[$cdnum]}"
+
+ #
+ # If this routine was called with a single ISO device
+ # name, it must be a DVD, so refer to that one lofi
+ # device (and associated ISO pathname)
+ #
+ [[ $# -eq 1 ]] && isonum=1
+
+ err_media="ISO \"${iso_pathnames[$isonum]}\""
+ mountdev="${ldevs[$isonum]}"
+ fi
+
+ #
+ # If the disc needed in the install order isn't the one in
+ # the drive, ask for the correct one.
+ #
+ if ! get_cd "$mountdev" "${cds[$cdnum]}"; then
+ screenlog "$err_msg" "$zonename" "$err_media"
+ return 1
+ fi
+
+ # set the RedHat personality type, if applicable
+ [[ -n $rd_pers && -z $rh_pers ]] && rh_pers=$rd_pers
+
+ #
+ # We now know the actual type of media being used, so
+ # modify the "err_media" string accordingly.
+ #
+ if [[ "$install_media" = "disc" ]]; then
+ if [[ "$rd_disctype" = "DVD" ]]; then
+ err_media="$release_name DVD"
+ else
+ err_media="$release_name, CD ${cds[$cdnum]}"
+ fi
+ fi
+
+ find_packages "$mntdir" $disc_rpms
+
+ #
+ # Save a copy of $rpms_left. Other functions clobber it.
+ #
+ rpms_left_save="${rpms_left[@]}"
+
+ if [[ -n $rpms_found ]]; then
+ if [[ "$install_type" = "miniroot" ]]; then
+ verboselog "\nInstalling miniroot from"
+ verboselog " $err_media...\n"
+
+ if ! install_miniroot "$mntdir" \
+ "${rpms_found[@]}"; then
+ screenlog "$err_msg" "$zonename" \
+ "$err_media"
+ return 1
+ fi
+ else
+ screenlog "\n$install_msg\n" "$zonename" \
+ "$err_media"
+
+ if ! install_zone "$mntdir" \
+ ${rpms_found[@]}; then
+ screenlog "$err_msg" "$zonename" \
+ "$err_media"
+ return 1
+ fi
+ fi
+
+ #
+ # Mark installation from this CD (or ISO representing
+ # this CD) as completed.
+ #
+ if [[ "$rd_disctype" = "CD" ]]; then
+ unset cds[$cdnum]
+ fi
+ fi
+
+ # A DVD install takes a single disc, so stop iterating
+ [[ "$rd_disctype" = "DVD" ]] && break
+
+ # If there are no RPMs left, we're done.
+ [[ -z $rpms_left_save ]] && break
+
+ disc_rpms="$rpms_left_save"
+ ((cdnum += 1))
+
+ if [[ "$install_media" != "ISO" ]]; then
+ #
+ # modify the err_media variable to reflect the next
+ # CD in the sequence
+ #
+ err_media="$release_name, CD ${cds[$cdnum]}"
+ else
+ # Unmount the last used ISO if appropriate
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+ fi
+ done
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if [[ -n $rpms_left_save ]]; then
+ #
+ # Uh oh - there were RPMS we couldn't locate. This COULD
+ # indicate a failed installation, but we need to check for
+ # a RedHat personality "missing" list first.
+ #
+ if [[ -n $rh_pers && "$rh_pers" != "AS" ]]; then
+ typeset missing
+
+ if [[ $rh_pers = "WS" ]]; then
+ missing="$distro_WS_missing"
+ elif [[ $rh_pers = "ES" ]]; then
+ missing="$distro_ES_missing"
+ fi
+
+ #
+ # If any packages left in "rpm_left_save" appear in the
+ # list of packages expected to be missing from this
+ # personality, remove them from the "rpm_left_save"
+ # list.
+ #
+ if [[ -n $missing ]]; then
+ typeset pkg
+
+ for pkg in $missing
+ do
+ rpm_left_save=$(echo "$rpm_left_save " |
+ sed "s/$pkg //g")
+
+ #
+ # If all of the packages in
+ # "rpm_left_save" appeared in this
+ # personality's list of "expected
+ # missing" packages, then the
+ # installation completed successfully.
+ #
+ [[ -z ${rpm_left_save%%+( )} ]] &&
+ return 0
+ done
+ fi
+ fi
+
+ log "\nERROR: Unable to locate some needed packages:\n" \
+ " ${rpms_left_save%%+( )}\n"
+ screenlog "$err_msg" "$zonename"
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Install a zone from installation media
+#
+# Returns 0 on success, 1 on failure
+#
+install_from_media()
+{
+ msg=$(gettext "Installing miniroot for zone '%s'.")
+ screenlog "$msg" "$zonename"
+
+ iterate_media "miniroot" $@ || return 1
+
+ if ! setup_miniroot; then
+ screenlog "$mini_setfail" "$zonename"
+ return 1
+ fi
+
+ msg=$(gettext "Performing full install for zone '%s'.")
+
+ screenlog "\n$msg" "$zonename"
+
+ iterate_media "full" $@ || return 1
+
+ #
+ # Attempt to install deferred RPMS, if any
+ #
+ if [[ -n $deferred_rpms ]]; then
+ if ! install_zone ""; then
+ return 1
+ fi
+ fi
+
+ finish_install
+ return $?
+}
+
+#
+# Add an entry to the valid distro list.
+#
+# The passed argument is the ISO type ("CD Set" or "DVD")
+#
+add_to_distro_list()
+{
+ typeset name
+
+ distro_file[${#distro_file[@]}]="$distro"
+
+ name="$release_name"
+ [[ -n $redhat_pers ]] && name="$name $redhat_pers"
+
+ select_name[${#select_name[@]}]="$name ($1)"
+ release[${#release[@]}]="$release_name"
+ iso_set[${#iso_set[@]}]="${iso_names[@]}"
+ verboselog "Distro \"$name\" ($1) found."
+}
+
+#
+# Find out which distros we have ISO files to support
+#
+# Do this by cycling through the distro directory and reading each distro
+# file in turn looking for:
+#
+# 1) The number of discs in a distribution
+# 2) The serial number of the distribution
+# 3) The name of the distribution
+#
+# Based on this, we can determine based on the ISO files available which
+# distributions, if any, we have a complete set of files to support.
+#
+# The function returns the supported isos in the array "iso_set."
+#
+validate_iso_distros()
+{
+ typeset cd
+ typeset disctype
+ typeset index
+ typeset iso
+ typeset ncds
+ typeset pers
+ typeset pers_cd
+ typeset pers_index
+ typeset serial
+
+ typeset distro_files="$(echo $distro_dir/*.distro)"
+ typeset nisos=${#iso_filename[@]}
+
+ unset distro_file
+ unset iso_set
+ unset release
+ unset select_name
+
+ if [[ "$distro_files" = "$distro_dir/*.distro" ]]; then
+ msg=$(gettext "Unable to find any distro files!")
+ screenlog "$msg"
+ return
+ fi
+
+ for distro in $distro_files; do
+ #
+ # We're done if we've already processed all available ISO files
+ # or if there were none in the first place.
+ #
+ ((${#iso_filename[@]} == 0)) && break
+
+ [[ ! -f $distro ]] && continue
+
+ . "$distro" > /dev/null
+ ncds=${#distro_cdorder[@]}
+
+ unset iso_names
+ unset pers
+ unset pers_cd
+
+ verbose "\nChecking ISOs against distro file \"$distro\"..."
+
+ index=0
+
+ while ((index < nisos)); do
+ #
+ # If the filename has been nulled out, it's already
+ # been found as part of a distro, so continue to the
+ # next one.
+ #
+ if [[ -z ${iso_filename[$index]} ]]; then
+ ((index += 1))
+ continue
+ fi
+
+ iso="${iso_filename[$index]}"
+ serial="${iso_serial[$index]}"
+ release_name="${iso_release[$index]}"
+ redhat_pers="${iso_pers[$index]}"
+
+ verbose " ISO \"$iso\":"
+
+ #
+ # If the serial number doesn't match that for
+ # this distro, check other ISOs
+ #
+ if [[ "$serial" != "$distro_serial" ]]; then
+ ((index += 1))
+ continue
+ fi
+
+ verbose " Serial #$serial"
+ verbose " Release Name \"$release_name\""
+
+ [[ -n ${iso_pers[$index]} ]] &&
+ verbose " RedHat Personality \"$redhat_pers\""
+
+ if [[ "${iso_disctype[$index]}" = "CD" ]]; then
+ disctype="CD #"
+ cd="${iso_cdnum[$index]}"
+ else
+ disctype="DVD, representing CDs #"
+ cd=0
+ fi
+
+ verbose " ${disctype}${iso_cdnum[$index]}\n"
+
+ #
+ # Once we've matched a particular distro, don't check
+ # this ISO to see if it's part of any other.
+ #
+ unset iso_filename[$index]
+
+ iso_names[$cd]="$iso"
+
+ #
+ # A DVD-based distro consists of one and ONLY one disc,
+ # so process it now.
+ #
+ if [[ "${iso_disctype[$index]}" = "DVD" ]]; then
+ typeset dvd_discs=",${iso_cdnum[$index]}"
+
+ cd=1
+ while ((cd <= ncds)); do
+ dvd_discs=$(echo "$dvd_discs" |
+ sed "s/,$cd//")
+ ((cd += 1))
+ done
+
+ #
+ # If no CDs are left in $dvd_discs, the DVD
+ # was a complete distribution, so add it to
+ # the valid distro list.
+ #
+ if [[ -z $dvd_discs ]]; then
+ add_to_distro_list "DVD"
+ unset iso_names[$cd]
+ fi
+ elif [[ -n ${iso_pers[$index]} ]]; then
+ #
+ # If this is a RedHat personality CD, save off
+ # some extra information about it so we can
+ # discern between mutiple personality discs
+ # later, if needed.
+ #
+ pers[${#pers[@]}]=${iso_pers[$index]}
+ pers_cd[${#pers_cd[@]}]="$iso"
+ fi
+
+ ((index += 1))
+ done
+
+ #
+ # Check to see if we have ISOs representing a full CD set.
+ # If we don't, don't mark this as an available distro.
+ #
+ (( ${#iso_names[@]} != $ncds )) && continue
+
+ relase_name="$release_name $distro_version"
+
+ if [[ -z ${pers[@]} ]]; then
+ #
+ # If there were no personality discs, just add this
+ # ISO set to the distro list.
+ #
+ unset redhat_pers
+ add_to_distro_list "CD Set"
+ else
+ #
+ # If a valid CD-based distro was found and there are
+ # RedHat personality discs for that distro present,
+ # create entries for each personality in the available
+ # distro list.
+ #
+ pers_index=0
+
+ while ((pers_index < ${#pers[@]})); do
+ redhat_pers=${pers[$pers_index]}
+
+ if [[ -n ${pers_cd[$pers_index]} ]]; then
+ #
+ # RedHat personality discs are always
+ # disc 1 of a CD set, so if we found a
+ # valid personality disc for this set,
+ # set the disc 1 entry for this distro
+ # to the ISO for the proper personality
+ # disc.
+ #
+ iso_names[1]="${pers_cd[$pers_index]}"
+ add_to_distro_list "CD Set"
+ fi
+
+ ((pers_index += 1))
+ done
+ fi
+ done
+}
+
+#
+# Do a lofi add for the passed filename and set lofi_dev to the lofi
+# device name lofiadm created for it (e.g. "/dev/lofi/1".)
+#
+# If the passed filename already has a lofi device name, simply set lofi_dir
+# to the existing device name.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_add()
+{
+ typeset filename="$1"
+
+ lofi_dev=$(lofiadm "$filename" 2>/dev/null) && return 0
+ lofi_dev=$(lofiadm -a "$filename") && return 0
+
+ screenlog "$lofi_failed" "$filename"
+ return 1
+}
+
+#
+# Delete the lofi device name passed in.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_del()
+{
+ typeset dev="$1"
+
+ [[ "$dev" != /dev/lofi/* ]] && return 1
+
+ if lofiadm -d "$dev" 2>/dev/null; then
+ [[ -n $lofi_dev ]] && unset lofi_dev
+ return 0
+ fi
+
+ return 1
+}
+
+#
+# Mount the lofi device name passed in.
+#
+# Set the variable mntdir to the directory on which the lofi device is
+# mounted.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_mount()
+{
+ typeset lofidev="$1"
+ typeset mntpoint="$2"
+
+ #
+ # Check to see if the lofi device is already mounted and return
+ # the existing mount point if it is.
+ #
+ get_mountdir "$lofidev" && { mntdir="$mount_dir" ; return 0 ; }
+
+ unset mntdir
+ if [[ ! -d "$mntpoint" ]]; then
+ if ! mkdir -p "$mntpoint"; then
+ log "Could not create mountpoint \"$mntpoint\"!\n"
+ return 1
+ fi
+ lofi_created="$mntpoint"
+ fi
+
+ verbose "Attempting mount of device \"$lofidev\""
+ verbose " on directory \"$mntpoint\"... \c"
+
+ if ! mount -F hsfs -r "$lofidev" "$mntpoint" 2>/dev/null; then
+ verbose "FAILED."
+ [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" &&
+ unset lofi_created
+ return 1
+ fi
+
+ mntdir="$mntpoint"
+ verbose "succeeded."
+ return 0
+}
+
+#
+# Unmount the lofi device name passed in, and remove the device mount point
+# after unmounting the device.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_umount()
+{
+ typeset mntdev="$1"
+
+ #
+ # If the directory name passed wasn't mounted to begin with,
+ # just return success.
+ #
+ get_mountdir "$mntdev" || return 0
+
+ verbose "Unmounting device \"$mntdev\"... \c"
+
+ if ! umount "$mntdev" ; then
+ verbose "FAILED."
+ return 1
+ fi
+
+ verbose "succeeded."
+ return 0
+}
+
+# Scan the passed list of ISOs.
+scan_isos()
+{
+ typeset iso
+ typeset index=0
+
+ unset iso_serial
+ unset iso_release
+ unset iso_cdnum
+ unset iso_disctype
+ unset iso_filename
+ unset iso_pers
+
+ for iso in "$@"; do
+ verbose "Checking possible ISO\n \"$iso\"..."
+
+ if lofi_add "$iso"; then
+ verbose " added as lofi device \"$lofi_dev\""
+ if lofi_mount "$lofi_dev" "/tmp/lxiso"; then
+ if read_discinfo "$mntdir"; then
+ iso_release[$index]="$rd_release"
+ iso_serial[$index]="$rd_serial"
+ iso_cdnum[$index]="$rd_cdnum"
+ iso_disctype[$index]="$rd_disctype"
+
+ [[ -n $rd_pers ]] &&
+ iso_pers[$index]="$rd_pers"
+
+ iso_filename[$index]="$iso"
+ ((index += 1))
+ fi
+ lofi_umount "$lofi_dev"
+ else
+ verbose " not a usable ISO image."
+ log "Unable to mount \"$lofi_dev\" (\"$iso\")"
+ fi
+
+ lofi_del "$lofi_dev"
+ else
+ verbose " not a valid ISO image."
+ fi
+ done
+}
+
+#
+# Prompt the user with the first argument, then make a menu selection
+# from the balance.
+#
+# This is effectively similar to the ksh "select" function, except it
+# outputs to stdout.
+#
+# Shell variables set:
+# choice - set to the menu number selected
+# selection - set to the menu text selected
+#
+pick_one()
+{
+ typeset menu_items
+ typeset menu_index
+ typeset reply
+
+ typeset prompt="$1"
+ shift
+
+ unset choice
+
+ set -A menu_items "$@"
+
+ until [[ -n $choice ]]; do
+ menu_index=1
+
+ echo "\n$prompt\n"
+
+ for f in "${menu_items[@]}"; do
+ echo "$menu_index) $f"
+ ((menu_index += 1))
+ done
+
+ echo "\n$(gettext "Please select") (1-$#): " "\c"
+ read reply
+ echo
+
+ [[ -z $reply ]] && echo && continue
+
+ #
+ # Reprint menu selections if the answer was not a number in
+ # range of the menu items available
+ #
+ [[ $reply != +([0-9]) ]] && continue
+ ((reply < 1)) || ((reply > $#)) && continue
+
+ choice=$reply
+ selection=${menu_items[((choice - 1))]}
+ done
+}
+
+#
+# Select a distribution to install from the arguments passed and set
+# "ndsitro" to the value chosen - 1 (so it may be used as an array index.)
+#
+# The routine will automatically return with ndisto set to 0 if only one
+# argument is passed.
+#
+select_distro()
+{
+ unset choice
+ unset ndistro
+
+ if (($# > 1)); then
+ if [[ -n $silent_mode ]]; then
+ typeset dist
+
+ log "ERROR: multiple distrubutions present in ISO" \
+ "directory but silent install"
+ log " mode specified. Distros available:"
+ for dist in "$@"; do
+ log " \"$dist\""
+ done
+ return 1
+ fi
+
+ pick_one \
+ "$(gettext "Which distro would you like to install?")" \
+ "$@"
+ fi
+
+ #
+ # Covers both the cases of when only one distro name is passed
+ # to the routine as well as when an EOF is sent to the distribution
+ # selection prompt.
+ #
+ if [[ -z $choice ]]; then
+ screenlog "$install_dist" "$1"
+ ndistro=0
+ else
+ screenlog "$install_dist" "$selection"
+ ndistro=$((choice - 1))
+ fi
+
+ return 0
+}
+
+#
+# Install a zone from discs or manually lofi-mounted ISOs.
+#
+# Return 0 on success, 1 on failure
+#
+do_disc_install()
+{
+ typeset path="$1"
+
+ typeset eject_final="N"
+ typeset install_status
+
+ #
+ # Get a disc, it doesn't matter which one.
+ #
+ # We don't know which distro this may be yet, so we can't yet
+ # ask for the first disc in the install order.
+ #
+ if ! get_cd "$path"; then
+ if [[ -z $silent_mode ]]; then
+ typeset distro_disc=\
+ $(gettext "a supported Linux distribution disc")
+
+ screenlog "\n$distro_mediafail" "$distro_disc ($path)"
+ fi
+ return 1
+ fi
+
+ if [[ -n $silent_mode && "$rd_disctype" = "CD" ]]; then
+ log "$silent_err_msg"
+ return 1
+ fi
+
+ if ! get_disc_distro "$mntdir"; then
+ msg=$(gettext "Unable to find a supported Linux release on")
+ screenlog "$msg"
+ screenlog " $media_spec" "$path"
+ umount "$mntdir" > /dev/null 2>&1
+ return 1
+ fi
+
+ check_mbfree $zoneroot $distro_mb_required || return 1
+ build_rpm_list $install_packages
+
+ echo
+
+ if [[ "$install_media" = "disc" ]]; then
+ #
+ # If we're in interactive mode, ask the user if they want the
+ # disc ejected when the installation is complete.
+ #
+ # Silent mode installs will require the user to manually run
+ # eject(1).
+ #
+ if [[ -n $removable && -z $silent_mode ]]; then
+ typeset ans
+ typeset disc
+ typeset status
+ typeset which=""
+
+ disc="$rd_disctype"
+ [[ "$disc" = "CD" ]] && which=$(gettext "final ")
+
+ #
+ # Ask the user if they want the install disc ejected
+ # when the installation is complete. Any answer but
+ # "n" or "N" is taken to mean yes, eject it.
+ #
+ eject_final="Y"
+ status=$(gettext "WILL")
+
+ screenlog "$eject_final_msg" "$which" "$disc"
+ screenlog " $eject_final_prompt" "$zonename" "[y]/n"
+
+ read ans && [[ "$ans" = [Nn]* ]] && eject_final="N" &&
+ status=$(gettext "will NOT")
+
+ screenlog "\n$eject_final_status\n" "$which" "$disc" \
+ "$status"
+ fi
+
+ screenlog "$install_ndiscs" "$distro_ncds"
+
+ msg=$(gettext "install %s.")
+ screenlog "$msg" "$release_name"
+ else
+ screenlog "$install_nisos" "$distro_ncds"
+
+ msg=$(gettext "DVD) to install %s.")
+ screenlog "$msg" "$release_name"
+ fi
+
+ install_from_media "$path"
+ install_status=$?
+
+ [[ "$eject_final" = "Y" ]] && eject_removable_disc
+
+ return $install_status
+}
+
+#
+# Install a zone using the list of ISO files passed as arguments to this
+# function.
+#
+# Return 0 on success, 1 on failure.
+#
+do_iso_install()
+{
+ typeset install_status
+ typeset iso_path
+ typeset ldev
+
+ msg=$(gettext "Checking for valid Linux distribution ISO images...")
+ screenlog "\n$msg"
+
+ scan_isos "$@"
+
+ if [[ -z ${iso_filename[@]} ]]; then
+ msg=$(gettext "No valid ISO images available or mountable.")
+ screenlog "\n$msg"
+ return 1
+ fi
+
+ validate_iso_distros
+
+ if [[ -z ${release[@]} ]]; then
+ msg=$(gettext "No supported Linux distributions found.")
+ screenlog "\n$msg"
+ return 1
+ fi
+
+ select_distro "${select_name[@]}" || return 1
+ unset select_name
+
+ . ${distro_file[$ndistro]} > /dev/null
+ distro_ncds=${#distro_cdorder[@]}
+
+ check_mbfree $zoneroot $distro_mb_required || return 1
+ build_rpm_list $install_packages
+
+ unset lofi_devs
+
+ verboselog ""
+ for iso_path in ${iso_set[$ndistro]}; do
+ if ! lofi_add "$iso_path"; then
+ for ldev in $lofi_devs; do
+ lofi_del "$ldev"
+ done
+ return 1
+ fi
+
+ verboselog "Added \"$iso_path\""
+ verboselog " as \"$lofi_dev\""
+ lofi_devs="$lofi_devs $lofi_dev"
+ done
+
+ release_name="${release[$ndistro]}"
+
+ set -A iso_pathnames "zero_pad" ${iso_set[$ndistro]}
+ install_from_media $lofi_devs
+ install_status=$?
+
+ for ldev in $lofi_devs; do
+ lofi_del "$ldev"
+ done
+
+ unset lofi_devs
+ return $install_status
+}
+
+# Clean up on interrupt
+trap_cleanup()
+{
+ cd "$cwd"
+
+ msg=$(gettext "Interrupt received, cleaning up partial install...")
+ screenlog "$msg"
+
+ [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt &&
+ unset miniroot_booted && unset newroot_mounted
+
+ #
+ # OK, why a sync here? Because certain commands may have written data
+ # to mounted file systems before the interrupt, and given just the right
+ # timing there may be buffered data not yet sent to the disk or the
+ # system may still be writing data to the disk. Either way, the umount
+ # will then fail because the system will still see the mounted
+ # filesystems as busy.
+ #
+ sync
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ #
+ # Normally, this isn't needed but there is a window where mntdir is set
+ # before zone_mounted, so account for that case.
+ #
+ if [[ -n $mntdir ]]; then
+ umount "$mntdir"
+ unset mntdir
+ fi
+
+ [[ -n $lofi_dev ]] && lofi_del "$lofi_dev"
+
+ if [[ -n $lofi_devs ]]; then
+ typeset ldev
+
+ for ldev in $lofi_devs
+ do
+ lofi_del "$ldev"
+ done
+
+ unset lofi_devs
+ fi
+
+ [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" &&
+ unset lofi_created
+
+ msg=$(gettext "Installation aborted.")
+ screenlog "$msg"
+ exit $ZONE_SUBPROC_FATAL
+}
+
+#
+# Start of main script
+#
+cwd=$(dirname "$0")
+distro_dir="$cwd/distros"
+
+unset deferred_saved
+unset distro_path
+unset logfile
+unset msg
+unset newroot_mounted
+unset silent_err_msg
+unset silent_mode
+unset verbose_mode
+unset zone_mounted
+unset zoneroot
+unset zonename
+
+#
+# Exit values used by the script, as #defined in <sys/zone.h>
+#
+# ZONE_SUBPROC_OK
+# ===============
+# Installation was successful
+#
+# ZONE_SUBPROC_USAGE
+# ==================
+# Improper arguments were passed, so print a usage message before exiting
+#
+# ZONE_SUBPROC_NOTCOMPLETE
+# ========================
+# Installation did not complete, but another installation attempt can be
+# made without an uninstall
+#
+# ZONE_SUBPROC_FATAL
+# ==================
+# Installation failed and an uninstall will be required before another
+# install can be attempted
+#
+ZONE_SUBPROC_OK=0
+ZONE_SUBPROC_USAGE=253
+ZONE_SUBPROC_NOTCOMPLETE=254
+ZONE_SUBPROC_FATAL=255
+
+#
+# Process and set up various global option variables:
+#
+# distro_path - Path containing files that make up the distribution
+# (e.g. a directory containing ISO files or a disc device)
+# logfile - Name (if any) of the install log file
+# zoneroot - Root directory for the zone to install
+# zonename - Name of the zone to install
+#
+while getopts 'svxd:l:r:z:' opt; do
+ case $opt in
+ s) silent_mode=1; unset verbose_mode;;
+ v) verbose_mode=1; unset silent_mode;;
+ x) set -x;;
+ d) distro_path="$OPTARG";;
+ l) logfile="$OPTARG";;
+ r) zoneroot="$OPTARG";;
+ z) zonename="$OPTARG";;
+ esac
+done
+shift OPTIND-1
+
+distro_path=${distro_path:=/cdrom/cdrom0}
+
+install_packages="$@"
+
+[[ -n $silent_mode ]] && exec 1>/dev/null
+
+if [[ -z $zonename ]]; then
+ msg=$(gettext "ERROR: Cannot install - no zone name was specified")
+ screenlog "$msg"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+if [[ -z $zoneroot ]]; then
+ msg=$(gettext "ERROR: Cannot install - no zone root directory was")
+ screenlog "$msg"
+
+ msg=$(gettext "specified.")
+ screenlog " $msg"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+# Make sure the specified zone root directory exists
+[[ -d "$zoneroot" ]] || mkdir -m 0700 -p "$zoneroot"
+
+if [[ ! -d "$zoneroot" ]]; then
+ screenlog "$zone_rootfail" "$zoneroot"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+rootdir="$zoneroot/root"
+
+# Make sure the specified zone root subdirectory exists
+[[ -d "$rootdir" ]] || mkdir -p "$rootdir"
+
+if [[ ! -d "$rootdir" ]]; then
+ screenlog "$zone_rootsub" "$rootdir"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+media_mntdir="$rootdir/media"
+
+if [[ -n $logfile ]]; then
+ # If a log file was specified, log information regarding the install
+ log "\nInstallation started `date`"
+ log "Installing from path \"$distro_path\""
+else
+ # Redirect stderr to /dev/null if silent mode is specified.
+ [[ -n $silent_mode ]] && exec 2>/dev/null
+fi
+
+distro_path=${distro_path:=$default_distro_path}
+
+# From this point on, call trap_cleanup() on interrupt (^C)
+trap trap_cleanup INT
+
+verbose "Installing zone \"$zonename\" at root \"$zoneroot\""
+release_name="supported Linux distribution"
+
+#
+# Based on the pathname, attempt to determine whether this will be a disc or
+# lofi-based install or one using ISOs.
+#
+if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* ||
+ "$distro_path" = /dev/dsk/* || "$distro_path" = /dev/lofi/* ]]; then
+ if [[ "$distro_path" = /dev/lofi/* ]]; then
+ silent_err_msg="$silent_nolofi"
+ install_media="lofi"
+ else
+ silent_err_msg="$silent_nodisc"
+ install_media="disc"
+ fi
+
+ if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* ]]; then
+ managed_removable=1
+ else
+ managed_removable=0
+ fi
+
+ log "Installing zone \"$zonename\" at root \"$zoneroot\""
+ verboselog " Attempting ${install_media}-based install via:"
+ verboselog " \"$distro_path\""
+
+ do_disc_install "$distro_path"
+else
+ typeset dir_start
+ typeset dir_file
+
+ dir_start=$(dirname "$distro_path" | cut -c 1)
+
+ [[ "$dir_start" != "/" ]] && distro_path="${PWD:=$(pwd)}/$distro_path"
+
+ if [[ ! -d "$distro_path" ]]; then
+ screenlog "$no_distropath" "$distro_path"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+
+ log "Installing zone \"$zonename\" at root \"$zoneroot\""
+ verboselog " Attempting ISO-based install from directory:"
+ verboselog " \"$distro_path\""
+
+ unset iso_files
+
+ for dir_file in $distro_path/*; do
+ #
+ # Skip this file if it's not a regular file or isn't readable
+ #
+ [[ ! -f $dir_file || ! -r $dir_file ]] && continue
+
+ #
+ # If it's an hsfs file, it's an ISO, so add it to the possible
+ # distro ISO list
+ #
+ filetype=$(LC_ALL=C fstyp $dir_file 2>/dev/null) &&
+ [[ "$filetype" = "hsfs" ]] &&
+ iso_files="$iso_files $dir_file"
+ done
+
+ install_media="ISO"
+ do_iso_install $iso_files
+fi
+
+if [[ $? -ne 0 ]]; then
+ cd "$cwd"
+
+ [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt &&
+ unset miniroot_booted && unset newroot_mounted
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+
+ screenlog "\n$install_failed\n" "$release_name" "$zonename" "`date`"
+
+ msg=$(gettext "Cleaning up after failed install...")
+ screenlog "$msg"
+
+ #
+ # The extra checks are some basic paranoia due to the potentially
+ # dangerous nature of these commands but are not intended to catch all
+ # malicious cases.
+ #
+ [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a"
+
+ exit $ZONE_SUBPROC_FATAL
+fi
+
+screenlog "$install_done" "$release_name" "$zonename" "`date`"
+
+exit $ZONE_SUBPROC_OK
diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh
new file mode 100644
index 0000000000..bd377d67a5
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh
@@ -0,0 +1,325 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This script contains various routines used to post-process a zone for use
+# with BrandZ after it has been installed from RPM media or a tar image.
+#
+# Briefly, there are three main jobs we need to do:
+#
+# 1) Create any needed directories and symlinks BrandZ needs but that the
+# Linux install may not create
+#
+# 2) Modify rc scripts to shut off services that don't apply to a zone
+# or that wish to access hardware directly
+#
+# 3) Modify various Linux system files for use within a zone environment
+#
+
+#
+# Restrict executables to /bin and /usr/bin
+#
+PATH=/bin:/usr/bin
+export PATH
+
+#
+# Sends output to a log file via redirection of stderr.
+#
+# This script assumes its caller has already performed the redirection to the
+# logfile.
+#
+log()
+{
+ echo "$@" >&2
+}
+
+#
+# Setup i18n output
+#
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+cmd_failed=$(gettext "%s failed! Aborting installation...")
+cmd2_failed=$(gettext "%s of '%s' to '%s' failed!")
+create_failed=$(gettext "Could not create new file '%s'!")
+disable_failed=$(gettext "Attempt to disable entries in '%s' failed!")
+install_aborted=$(gettext "Aborting installation...")
+install_noroot=$(gettext "Installation root directory '%s' does not exist.")
+ln_fail=$(gettext "Unable to symlink '%s' to '%s'!")
+mkdir_fail=$(gettext "Unable to create the directory '%s'")
+mod_failed=$(gettext -n "Attempt to modify entries in '%s' failed!")
+
+usage=$(gettext "usage: %s <zonename> <install_root> [mini]")
+
+#
+# Output an internationalized string followed by a carriage return
+#
+i18n_echo()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+}
+
+#
+# Routine to make a full path out of a supplied path
+#
+fullpath()
+{
+ typeset path="$1"
+
+ echo $path | egrep -s "^/" || path="${PWD:=$(pwd)}/$path"
+ echo $path
+}
+
+#
+# Routine to create directories and handle errors
+#
+makedir()
+{
+ typeset dirname=$(fullpath "$1")
+ typeset mode=""
+
+ [[ $# -eq 2 ]] && mode="-m $2"
+
+ [[ -d "$dirname" ]] && return
+
+ if ! mkdir $mode -p "$dirname"; then
+ log "Unable to create the directory \"$dirname\"!"
+ i18n_echo "$mkdir_fail" "$dirname"
+ echo $(gettext "Aborting installation...")
+ exit 1
+ fi
+}
+
+#
+# Routine to create initial symlinks and handle errors
+#
+symlink()
+{
+ typeset src="$1"
+ typeset dst=$(fullpath "$2")
+
+ [[ -e "$dst" || -h "$dst" ]] && rm -f "$dst"
+
+ if ! ln -s "$src" "$dst"; then
+ log "Unable to symlink \"$src\" to \"$dst\"!"
+ i18n_echo "$ln_fail" "$src" "$dst"
+ echo $(gettext "Aborting installation...")
+ exit 1
+ fi
+}
+
+#
+# Install a file using "ln -s"
+#
+# Returns 0 on success, 1 on failure.
+#
+install_ln()
+{
+ typeset source="$1"
+ typeset target=$(fullpath "$2")
+
+ log " Installing \"$target\""
+
+ mv -f "$target" "$target.$tag" 2>/dev/null
+
+ if ! ln -s "$source" "$target"; then
+ log ""
+ log "Attempt to install $target FAILED."
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# The main script starts here.
+#
+# The syntax is:
+#
+# lx_init_zone <zonename> <rootdir> [mini]
+#
+# Where:
+# <zonename> is the name of the zone to be modified
+# <rootdir> is the root of the zone directory to be modified
+#
+# [mini] is an optional third argument that signifies whether this is
+# to be a miniroot install; if it is, NFS services are not enabled
+# in the processed zone
+#
+unset is_miniroot
+unset install_root
+
+zonename="$1"
+install_root="$2"
+
+tag="lxsave_$(date +%m.%d.%Y@%T)"
+
+if (($# < 2 || $# > 3)); then
+ i18n_echo "$usage" "$0"
+ exit 1
+fi
+
+(($# == 3)) && is_miniroot=1
+
+if [[ ! -d "$install_root" ]]; then
+ i18n_echo "$install_noroot" "$install_root"
+ echo $(gettext "** Installation aborted **")
+ exit 1
+fi
+
+cd "$install_root"
+
+log ""
+log "Initial lx_brand environment modification started `date`"
+log "Making needed directories in \"$install_root\"."
+echo $(gettext "Setting up the initial lx brand environment.")
+
+#
+# Make various directories in /native that are needed to boot an lx branded
+# zone.
+#
+makedir native/dev
+makedir native/etc/default
+makedir native/etc/svc/volatile
+makedir native/lib
+makedir native/proc
+makedir native/tmp 1777
+makedir native/usr
+makedir native/var
+
+#
+# Make various other directories needed for the lx brand
+#
+makedir mnt
+makedir opt
+makedir usr/local/bin
+makedir usr/local/include
+makedir usr/local/lib
+makedir usr/local/sbin
+makedir usr/local/share
+makedir usr/local/src
+
+makedir dev 0755
+makedir tmp 1777
+makedir proc 0555
+makedir boot 0755
+
+#
+# zlogin requires that these utilities live in places other than their
+# Linux defaults, so create appropriate links for them here.
+#
+# XX - The need for these links may go away in the future if zlogin is
+# appropriately modified
+#
+symlink /bin/sh sbin/sh
+symlink /bin/su usr/bin/su
+symlink /native/usr/lib/ld.so.1 usr/lib/ld.so.1
+
+libpam_so="$(echo lib/libpam.so.0.*)"
+libpam_misc="$(echo lib/libpam_misc.so.0.*)"
+libpamc_so="$(echo lib/libpamc.so.0.*)"
+
+symlink "/$libpam_so" lib/libpam.so.0
+symlink "/$libpam_misc" lib/libpam_misc.so.0
+symlink "/$libpamc_so" lib/libpamc.so.0
+
+log ""
+log "Modifying system configuration in \"$install_root\""
+
+#
+# Create a /var/ld/ld.config that will point to /native/lib for our Solaris
+# libraries.
+#
+log "Creating \"$install_root/var/ld/ld.config\"..."
+
+makedir var/ld
+
+if ! crle -c var/ld/ld.config -l /native/lib:/native/usr/lib \
+ -s /native/lib/secure:/native/usr/lib/secure; then
+ log "\tCreation of \"$install_root/var/ld/ld.config\" failed!"
+ i18n_echo "$cmd_failed" "crle"
+ exit 1
+fi
+
+log ""
+log "Modifying \"$install_root/etc/fstab\"..."
+
+mv -f etc/fstab etc/fstab.$tag 2>/dev/null
+
+cat > etc/fstab <<- EOF
+ none / zfs defaults 1 1
+ proc /proc proc defaults 0 0
+EOF
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/fstab\"!"
+ i18n_echo "$create_failed" "$install_root/etc/fstab"
+ exit 1
+fi
+
+if [[ ! -e "$install_root/etc/hosts" ]]; then
+ log ""
+ log "Creating: \"$install_root/etc/hosts\"..."
+
+ cat > "$install_root/etc/hosts" <<-_EOF_
+ 127.0.0.1 localhost
+ _EOF_
+fi
+
+#
+# Perform distribution-specific changes.
+#
+distro=""
+if [[ -f etc/redhat-release ]]; then
+ distro="redhat"
+elif [[ -f etc/lsb-release ]]; then
+ if egrep -s Ubuntu etc/lsb-release; then
+ distro="ubuntu"
+ elif [[ -f etc/debian_version ]]; then
+ distro="debian"
+ fi
+elif [[ -f etc/debian_version ]]; then
+ distro="debian"
+fi
+
+if [[ -z $distro ]]; then
+ log ""
+ log "NOTE: Unsupported distribution!"
+ i18n_echo "NOTE: Unsupported distribution!"
+ exit 1
+fi
+
+i18n_echo "Customizing for $distro"
+. $(dirname $0)/lx_init_zone_${distro}
+
+log ""
+log "System configuration modifications complete `date`"
+log ""
+i18n_echo "System configuration modifications complete."
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone_debian.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone_debian.ksh
new file mode 100644
index 0000000000..57dd4adf54
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_init_zone_debian.ksh
@@ -0,0 +1,242 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Customisation for Debian-based distributions. Assumes to have been
+# sourced from lx_init_zone.
+#
+
+
+#
+# The default /etc/inittab spawns getty on each of the virtual consoles
+# as well as xdm on the X console. Since we don't have virtual consoles nor
+# an X console, spawn a single getty on /dev/console instead.
+#
+# Don't bother changing the file if it looks like we already did.
+#
+if [[ -f etc/inittab ]] && ! egrep -s "Disabled by lx brand" etc/inittab; then
+ log "Modifying: \"$install_root/etc/inittab\"..."
+
+ tmpfile=/tmp/inittab.$$
+
+ sed 's/^[1-6]:/# Disabled by lx brand: &/
+ s/^id:5:initdefault:/id:3:initdefault: # Modified by lx brand: &/' \
+ etc/inittab > $tmpfile
+
+ #
+ # Don't bother with further alterations if the sed above failed...
+ #
+ if [[ $? -eq 0 ]]; then
+ egrep -s "console login for lx brand" etc/inittab
+ if [[ $? -ne 0 ]]; then
+ cat >> $tmpfile <<- EOF
+
+ #
+ # console login for lx brand
+ #
+ 1:2345:respawn:/sbin/getty 115200 console
+ EOF
+
+ #
+ # Only install the new inittab if the append
+ # above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original inittab
+ # before moving over the modified version.
+ #
+ mv -f etc/inittab etc/inittab.$tag 2>/dev/null
+
+ mv -f $tmpfile etc/inittab
+
+ if [[ $? -ne 0 ]]; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/inittab\"" \
+ "failed!"
+ i18n_echo "$cmd2_failed" "mv" \
+ "$tmpfile" \
+ "$installroot/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 644 etc/inittab
+ fi
+ fi
+ fi
+
+ else
+ log "Attempt to disable entries in" \
+ "\"$install_root/etc/inittab\" failed!"
+ i18n_echo "$disable_failed" "$install_root/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+#
+# Stop rsyslogd logging to /dev/xconsole, we cannot create it and it is
+# unlikely to be useful in our environment.
+#
+if sed -n 115p etc/rsyslog.conf | egrep -s "^ .*\|/dev/xconsole"; then
+ log ""
+ log "Modifying: \"$install_root/etc/rsyslog.conf\"..."
+
+ tmpfile=/tmp/lx_rsyslog.conf.$$
+
+ sed -e '111,115s/^/# Disabled by lx brand/' \
+ etc/rsyslog.conf > $tmpfile
+
+ if [[ $? -eq 0 ]]; then
+ mv -f etc/rsyslog.conf etc/rsyslog.conf.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/rsyslog.conf; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/rsyslog.conf\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/rsyslog.conf"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/sysconfig/syslog\" failed!"
+ i18n_echo "$mod_failed" "$install_root/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+if grep "^ create_xconsole" etc/init.d/rsyslog >/dev/null 2>&1; then
+ log ""
+ log "Modifying: \"$install_root/etc/init.d/rsyslog\"..."
+
+ tmpfile=/tmp/lx_init_rsyslog.$$
+
+ sed -e '/^ create_xconsol/s/^/# Disabled by lx brand/' \
+ etc/init.d/rsyslog > $tmpfile
+
+ if [[ $? -eq 0 ]]; then
+ mv -f etc/init.d/rsyslog etc/init.d/rsyslog.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/init.d/rsyslog; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/init.d/rsyslog\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/init.d/rsyslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 755 etc/init.d/rsyslog
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/sysconfig/syslog\" failed!"
+ i18n_echo "$mod_failed" "$install_root/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+#
+# The following scripts attempt to start services or otherwise configure
+# the system in ways incompatible with zones, so don't execute them at boot
+# time.
+#
+log ""
+log "Modifying \"$install_root/etc/init.d\" to disable any"
+log " services not supported by BrandZ:"
+unsupported_services="
+ bootmisc.sh
+ checkroot.sh
+ hwclock.sh
+ hwclockfirst.sh
+ mountdevsubfs.sh
+ mountkernfs.sh
+ mtab.sh
+ networking
+ umountfs
+"
+
+for file in $unsupported_services; do
+ if [[ -e "etc/init.d/$file" ]]; then
+
+ if mv -f "etc/init.d/$file" "etc/init.d/$file.$tag"; then
+ log " + Moved script \"etc/init.d/$file\" to"
+ log " \"etc/init.d/$file.$tag\""
+ fi
+ fi
+
+ rc_files="$(echo etc/rc[S0-6].d/[SK]+([0-9])$file)"
+
+ if [[ "$rc_files" != "etc/rc[0-6].d/[SK]+([0-9])$file" ]]; then
+ for file in $rc_files; do
+ if [[ -h "$file" ]]; then
+ rm -f "$file" &&
+ log " + Removed symbolic link \"$file\""
+ else
+ rm -f "$file" &&
+ log " + Removed script \"$file\""
+ fi
+ done
+ fi
+done
+
+#
+# udev isn't usable within the zone
+#
+if [[ -e "sbin/udevd" ]]; then
+ if mv -f "sbin/udevd" "sbin/udevd.$tag"; then
+ log " + Moved \"sbin/udevd\" to \"sbin/udevd.$tag\""
+ fi
+fi
+
+#
+# Fix mountall
+#
+if [ -f etc/init.d/mountall.sh ]; then
+ tmpfile=etc/init.d/mountall.$$
+ sed 's/mount_run/# disabled for lx brand: &/
+ s/mount_shm/# disabled for lx brand: &/
+ s/mount_tmp/# disabled for lx brand: &/
+ s/pidof \/sbin\/init/false/
+ ' etc/init.d/mountall.sh > $tmpfile
+ mv -f $tmpfile etc/init.d/mountall.sh
+ chmod +x etc/init.d/mountall.sh
+fi
+
+ip_stack_type=`/usr/sbin/zonecfg -z $zonename info ip-type | cut -d' ' -f2`
+if [[ "$ip_stack_type" == "exclusive" ]]; then
+ # We already moved aside the 'networking' service in the code
+ # above. Setup our own service which will configure the net.
+ cp /usr/lib/brand/lx/lx_networking etc/init.d/networking
+fi
+
+if [[ $distro == "debian" ]]; then
+ # No upstart, setup rc link
+ ln -s ../init.d/networking etc/rcS.d/S10networking
+
+# else must be Ubuntu, so upstart.
+
+fi
+
+if [[ -f etc/mtab ]]; then
+ log "Modifying: \"$install_root/etc/mtab\"..."
+
+ echo "/ / zfs rw 0 0" > etc/mtab
+ echo "proc /proc proc rw,noexec,nosuid,nodev 0 0" >> etc/mtab
+fi
+
+# Hand control back to lx_init_zone
diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone_redhat.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone_redhat.ksh
new file mode 100644
index 0000000000..a6fe483532
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_init_zone_redhat.ksh
@@ -0,0 +1,453 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Customisation for RedHat-based distributions. Assumes to have been
+# sourced from lx_init_zone.
+#
+
+enable_nfs_services()
+{
+ log "Non-miniroot install; enabing NFS servers and NFS lock daemon"
+
+ #
+ # Setup files required for NFS:
+ #
+ # /native/etc/netconfig
+ # /native/etc/default/nfs
+ #
+ # These two files are treated as read-only in lx branded zones.
+ # To enfore this restriction we will read-only lofs mount them
+ # into the zone from the global zone. For these lofs mounts to
+ # work we'll need to create empty directories now that will serve
+ # as mount points later.
+ #
+ # /sbin/rpc.statd
+ # /sbin/rpc.lockd
+ #
+ # These files are symlinks to scripts supplied by the lx brand
+ # that will start up the solaris nfs daemons.
+ #
+ if { ! makedir native/etc/netconfig ||
+ ! makedir native/etc/default/nfs ; }; then
+ log "Aborting NFS setup..."
+ log ""
+ return
+ fi
+
+ if { ! install_ln ../native/usr/lib/brand/lx/lx_lockd sbin/rpc.lockd ||
+ ! install_ln ../native/usr/lib/brand/lx/lx_statd \
+ sbin/rpc.statd ; }; then
+ log "Aborting NFS setup..."
+ log ""
+ return
+ fi
+
+ #
+ # update /etc/services for NFS
+ #
+ log ""
+ log "Adding lockd entry to \"$install_root/etc/services\"..."
+
+ cp -p $install_root/etc/services $install_root/etc/services.$tag
+
+ #
+ # Brackets in the sed script below contain a space followed by a tab
+ #
+ cat $install_root/etc/services.$tag |
+ sed 's:\(111\/..p[ ][ ]*\):\1rpcbind :' |
+ cat > $install_root/etc/services
+
+ cat >> $install_root/etc/services <<-EOF
+ lockd 4045/udp # NFS lock daemon/manager
+ lockd 4045/tcp # NFS lock daemon/manager
+ EOF
+
+ #
+ # Modify /etc/init.d/nfslock to enable the USERLAND_LOCKD option and to
+ # find some commands in alternate locations.
+ #
+ log ""
+ log "Modifying \"$install_root/etc/init.d/nfslock\"..."
+ cp -p etc/init.d/nfslock etc/init.d/nfslock.$tag
+ cat etc/init.d/nfslock.$tag |
+ sed '
+ s/USERLAND_LOCKD=$/USERLAND_LOCKD="yes"/
+ s/killproc rpc.statd/killproc statd/
+ s/status rpc.statd/status statd/
+ s/pidof rpc.statd/pidof statd/
+ ' |
+ cat > etc/init.d/nfslock
+}
+
+#
+# The default /etc/inittab spawns mingetty on each of the virtual consoles
+# as well as xdm on the X console. Since we don't have virtual consoles nor
+# an X console, spawn a single mingetty on /dev/console instead.
+#
+# Don't bother changing the file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/inittab; then
+ log "Modifying: \"$install_root/etc/inittab\"..."
+
+ tmpfile=/tmp/inittab.$$
+
+ sed 's/^[1-6]:/# Disabled by lx brand: &/
+ s/^id:5:initdefault:/id:3:initdefault: # Modified by lx brand: &/' \
+ etc/inittab > $tmpfile
+
+ #
+ # Don't bother with further alterations if the sed above failed...
+ #
+ if [[ $? -eq 0 ]]; then
+ egrep -s "console login for lx brand" etc/inittab
+ if [[ $? -ne 0 ]]; then
+ cat >> $tmpfile <<- EOF
+
+ #
+ # console login for lx brand
+ #
+ 1:2345:respawn:/sbin/mingetty console
+ EOF
+
+ #
+ # Only install the new inittab if the append
+ # above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original inittab
+ # before moving over the modified version.
+ #
+ mv -f etc/inittab etc/inittab.$tag 2>/dev/null
+
+ mv -f $tmpfile etc/inittab
+
+ if [[ $? -ne 0 ]]; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/inittab\"" \
+ "failed!"
+ i18n_echo "$cmd2_failed" "mv" \
+ "$tmpfile" \
+ "$installroot/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 644 etc/inittab
+ fi
+ fi
+ fi
+
+ else
+ log "Attempt to disable entries in" \
+ "\"$install_root/etc/inittab\" failed!"
+ i18n_echo "$disable_failed" "$install_root/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+#
+# User must configure various brand-specific items to enable networking, so
+# boot the system non-networked.
+#
+log ""
+log "Modifying: \"$install_root/etc/sysconfig/network\"..."
+
+mv -f etc/sysconfig/network etc/sysconfig/network.$tag 2>/dev/null
+
+cat > etc/sysconfig/network <<- EOF
+ NETWORKING="no"
+ #
+ # To enable networking, change the "no" above to "yes" and
+ # uncomment and fill in the following parameters.
+ #
+ # If you are specifying a hostname by name rather than by IP address,
+ # be sure the system can resolve the name properly via the use of a
+ # name service and/or the proper name files, as specified by
+ # nsswitch.conf. See nsswitch.conf(5) for further details.
+ #
+ # HOSTNAME=your_hostname_here
+ #
+EOF
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/sysconfig/network\"!"
+ i18n_echo "$create_failed" "$install_root/etc/sysconfig/network"
+ i18n_echo "$install_aborted"
+ exit 1
+fi
+
+#
+# SELinux must be disabled otherwise we won't get past init.
+#
+if egrep -s "^SELINUX=enforcing" etc/selinux/config; then
+ log "Modifying: \"$install_root/etc/selinux/config\"..."
+
+ tmpfile=/tmp/selinux_config.$$
+
+ sed 's/^SELINUX=enforcing/SELINUX=disabled/' \
+ etc/selinux/config > $tmpfile
+
+ if [[ $? -eq 0 ]]; then
+ mv -f etc/selinux/config etc/selinux/config.$tag 2>/dev/null
+
+ mv -f $tmpfile etc/selinux/config
+
+ if [[ $? -ne 0 ]]; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/selinux/config\"" \
+ "failed!"
+ i18n_echo "$cmd2_failed" "mv" \
+ "$tmpfile" \
+ "$installroot/etc/selinux/config"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 644 etc/selinux/config
+ fi
+ else
+ log "Attempt to disable entries in" \
+ "\"$install_root/etc/selinux/config\" failed!"
+ i18n_echo "$disable_failed" "$install_root/etc/selinux/config"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ -a etc/sysconfig/syslog ]]; then
+ #
+ # By default, syslogd will attempt to create a socket in /dev/log, but
+ # /dev is not be writable. Instead, modify /etc/sysconfig/syslog to
+ # tell it to use /var/run/syslog instead, and make /dev/log a symlink
+ # to /var/run/syslog.
+ #
+ log ""
+ log "Modifying: \"$install_root/etc/sysconfig/syslog\"..."
+
+ tmpfile=/tmp/lx_sc.syslog.$$
+
+ sed 's@\(SYSLOGD_OPTIONS="-m 0\)"@\1 -p /var/run/syslog"@' \
+ etc/sysconfig/syslog > $tmpfile
+
+ #
+ # Only install the new sysconfig/syslog if the edit above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original syslog before moving over
+ # the modified version.
+ #
+ mv -f etc/sysconfig/syslog etc/sysconfig/syslog.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/sysconfig/syslog; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/sysconfig/syslog\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 755 etc/sysconfig/syslog
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/sysconfig/syslog\" failed!"
+ i18n_echo "$mod_failed" "$install_root/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/sysconfig/syslog\"!"
+ i18n_echo "$create_failed" "$install_root/etc/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+fi
+
+#
+# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't
+# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all.
+#
+mv -f etc/sysconfig/keyboard etc/sysconfig/keyboard.$tag 2>/dev/null
+
+#
+# /etc/rc.d/init.d/gpm tries to configure the console mouse for cut-and-paste
+# text operations, which we don't support. Removing this file disables the
+# mouse configuration.
+#
+mv -f etc/sysconfig/mouse etc/sysconfig/mouse.$tag 2>/dev/null
+
+#
+# The following scripts attempt to start services or otherwise configure
+# the system in ways incompatible with zones, so don't execute them at boot
+# time.
+#
+log ""
+log "Modifying \"$install_root/etc/rc.d/init.d\" to disable any"
+log " services not supported by BrandZ:"
+unsupported_services="
+ kudzu
+ microcode_ctl
+ network
+ random
+ pcmcia
+ isdn
+ iptables
+ ip6tables
+ iscsi
+ psacct
+ gpm
+ irda
+ smartd
+ rawdevices
+ netdump
+ hpoj
+ mdmonitor
+ mdmpd
+ irqbalance
+"
+
+for file in $unsupported_services; do
+ if [[ -a "etc/rc.d/init.d/$file" ]]; then
+
+ if mv -f "etc/rc.d/init.d/$file" "etc/rc.d/init.d/$file.$tag"; then
+ log " + Moved script \"etc/rc.d/init.d/$file\" to"
+ log " \"etc/rc.d/init.d/$file.$tag\""
+ fi
+ fi
+
+ rc_files="$(echo etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)"
+
+ if [[ "$rc_files" != "etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then
+ for file in $rc_files; do
+ if [[ -h "$file" ]]; then
+ rm -f "$file" &&
+ log " + Removed symbolic link \"$file\""
+ else
+ rm -f "$file" &&
+ log " + Removed script \"$file\""
+ fi
+ done
+ fi
+done
+
+#
+# There is a lot of stuff in the standard halt and reboot scripts that we
+# have no business running in a zone. Fortunately, the stuff we want to
+# skip is all in one contiguous chunk.
+#
+# Don't bother to modify the file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/rc.d/init.d/halt; then
+ log ""
+ log "Modifying \"$install_root/etc/rc.d/init.d/halt\" for operation"
+ log " within a zone..."
+ awk 'BEGIN {skip = ""}
+ /^# Save mixer/ {skip = "# Disabled by lx brand: "}
+ /halt.local/ {skip = ""}
+ /./ {print skip $0}' etc/rc.d/init.d/halt > /tmp/halt.$$
+
+ if [[ $? -eq 0 ]]; then
+ mv -f etc/rc.d/init.d/halt etc/rc.d/init.d/halt.$tag 2>/dev/null
+ mv -f /tmp/halt.$$ etc/rc.d/init.d/halt
+ chmod 755 etc/rc.d/init.d/halt
+ else
+ log "Attempt to modify \"$install_root/etc/rc.d/init.d/halt\"" \
+ "FAILED"
+ log "Continuing with balance of zone setup..."
+ fi
+fi
+
+#
+# Fix up /etc/rc.d/rc.sysinit:
+#
+# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support.
+# Since the hardware clock cannot be set from within a zone, we comment out
+# the line.
+#
+# 2) Disable dmesg commands, since we don't implement klogctl
+#
+# 3) Disable initlog and the mount of /dev/pts
+#
+# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't
+# work from within a zone.
+#
+# 5) Don't try to check the root filesystem (/) as there is no associated
+# physical device, and any attempt to run fsck will fail.
+#
+# Don't modify the rc.sysinit file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/rc.d/rc.sysinit; then
+ log ""
+ log "Modifying: \"$install_root/etc/rc.d/rc.sysinit\"..."
+ log ""
+
+ tmpfile=/tmp/lx_rc.sysinit.$$
+
+ sed 's@^/sbin/hwclock@# Disabled by lx brand: &@
+ s@^HOSTTYPE=@HOSTTYPE=\"s390\" # Spoofed for lx brand: &@
+ s@/bin/dmesg -n@: # Disabled by lx brand: &@
+ s@^dmesg -s@# Disabled by lx brand: &@
+ s@initlog -c \"fsck@: # Disabled by lx brand: &@
+ s@^.*mount .* /dev/pts$@# Disabled by lx brand: &@' \
+ etc/rc.d/rc.sysinit > $tmpfile
+
+ #
+ # Only install the new rc.sysinit if the edit above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original rc.sysinit
+ # before moving over the modified version.
+ #
+ mv -f etc/rc.d/rc.sysinit etc/rc.d/rc.sysinit.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/rc.d/rc.sysinit; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/rc.d/rc.sysinit\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/rc.d/rc.sysinit"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 755 etc/rc.d/rc.sysinit
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/rc.d/rc.sysinit\" failed!"
+ i18n_echo "$mod_failed" "$install_root/rc.d/rc.sysinit"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ -z $is_miniroot ]]; then
+ enable_nfs_services || log "NFS services were not properly enabled."
+fi
+
+# Hand control back to lx_init_zone
diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone_ubuntu.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone_ubuntu.ksh
new file mode 100644
index 0000000000..e46c536ea3
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_init_zone_ubuntu.ksh
@@ -0,0 +1,25 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Customisation for Ubuntu-based distributions. Assumes to have been
+# sourced from lx_init_zone.
+#
+
+# Use the debian script to begin with
+. $(dirname $0)/lx_init_zone_debian
+
+# Hand control back to lx_init_zone
diff --git a/usr/src/lib/brand/lx/zone/lx_install.ksh b/usr/src/lib/brand/lx/zone/lx_install.ksh
new file mode 100644
index 0000000000..9767000801
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_install.ksh
@@ -0,0 +1,579 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+# Restrict executables to /bin, /usr/bin, /usr/sbin and /usr/sfw/bin
+PATH=/bin:/usr/bin:/usr/sbin:/usr/sfw/bin
+
+export PATH
+
+# Setup i18n output
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+# Log passed arguments to file descriptor 2
+log()
+{
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+#
+# Send the provided printf()-style arguments to the screen and to the
+# logfile.
+#
+screenlog()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+ [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2
+}
+
+# Print and log provided text if the shell variable "verbose_mode" is set
+verbose()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2
+}
+
+unsupported_cpu=\
+$(gettext "ERROR: Cannot install branded zone: processor must be %s-compatible")
+
+cmd_not_found=$(gettext "Required command '%s' cannot be found!")
+cmd_not_exec=$(gettext "Required command '%s' not executable!")
+zone_initfail=$(gettext "Attempt to initialize zone '%s' FAILED.")
+path_abs=$(gettext "Pathname specified to -d '%s' must be absolute.")
+
+cmd_h=$(gettext "%s -z <zone name> %s -h")
+cmd_full=\
+$(gettext "%s -z <zone name> %s [-v | -s] [-d <dir>|<device>] [<cluster> ... ]")
+
+both_modes=$(gettext "%s: error: cannot select both silent and verbose modes")
+
+not_found=$(gettext "%s: error: file or directory not found.")
+
+wrong_type=\
+$(gettext "%s: error: must be a gzip, bzip2, .Z or uncompressed tar archive.")
+
+not_readable=$(gettext "Cannot read file '%s'")
+
+no_install=$(gettext "Could not create install directory '%s'")
+no_log=$(gettext "Could not create log directory '%s'")
+no_logfile=$(gettext "Could not create log file '%s'")
+
+root_full=$(gettext "Zonepath root %s exists and contains data; remove or move aside prior to install.")
+
+install_zone=$(gettext "Installing zone '%s' at root directory '%s'")
+install_from=$(gettext "from archive '%s'")
+
+install_fail=$(gettext "Installation of zone '%s' FAILED.")
+see_log=$(gettext "See the log file:\n '%s'\nfor details.")
+
+install_abort=$(gettext "Installation of zone '%s' aborted.")
+install_good=$(gettext "Installation of zone '%s' completed successfully.")
+
+# Check if commands passed in exist and are executable.
+check_cmd()
+{
+ for cmd in "$@"; do
+ if [[ ! -f $cmd ]]; then
+ screenlog "$cmd_not_found" "$cmd"
+ screenlog "$install_abort" "$zonename"
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+
+ if [[ ! -x $cmd ]]; then
+ screenlog "$cmd_not_exec" "$cmd"
+ screenlog "$install_abort" "$zonename"
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+ done
+}
+
+# Post process as tarball-installed zone for use by BrandZ.
+init_tarzone()
+{
+ typeset rootdir="$1"
+
+ if ! $branddir/lx_init_zone "$zonename" "$rootdir"; then
+ screenlog "$zone_initfail" "$zonename"
+ return 1
+ fi
+}
+
+# Clean up on interrupt
+trap_cleanup()
+{
+ msg=$(gettext "Installation cancelled due to interrupt.")
+
+ screenlog "$msg"
+ exit $int_code
+}
+
+#
+# Output the usage message.
+#
+# This is done this way due to limitations in the way gettext strings are
+# extracted from shell scripts and processed. Use of this somewhat awkward
+# syntax allows us to produce longer lines of text than otherwise would be
+# possible without wrapping lines across more than one line of code.
+#
+usage()
+{
+ int_code=$ZONE_SUBPROC_USAGE
+
+ echo $(gettext "Usage:")
+ printf " $cmd_h\n" "zoneadm" "install"
+ printf " $cmd_full\n" "zoneadm" "install"
+
+ echo
+
+ echo $(gettext "The installer will attempt to use the default system") \
+ $(gettext "removable disc device if <archive dir> is not") \
+ $(gettext "specified.") | fmt -80
+
+ echo
+
+ echo $(gettext "<cluster> specifies which package cluster you wish") \
+ $(gettext "to install.") | fmt -80
+
+ echo
+ echo $(gettext "The 'desktop' cluster will be installed by default.")
+ echo
+ echo $(gettext "The available clusters are:")
+ echo " + core"
+ echo " + server"
+ echo " + desktop"
+ echo " + development"
+ echo " + all"
+ echo
+
+ echo $(gettext "Each cluster includes all of the clusters preceding") \
+ $(gettext "it, so the 'server' cluster includes the 'core'") \
+ $(gettext "cluster, the 'desktop' cluster includes the 'core'") \
+ $(gettext "and 'server' clusters, and so on.") | fmt -80
+
+ echo
+ echo $(gettext "Examples")
+ echo "========"
+
+ echo $(gettext "Example 1: Install a base Linux system from CDs or a") \
+ $(gettext "DVD using the system default removable disc device:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install"
+ echo
+
+ echo $(gettext "Example 2: Install the 'server' cluster from CDs or") \
+ $(gettext "a DVD via an alternative removable disc device:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /cdrom/cdrom1 server"
+ echo
+
+ echo $(gettext "Example 3: Install the desktop Linux environment") \
+ $(gettext "from an ISO image made available as '/dev/lofi/1' by") \
+ $(gettext "use of lofiadm(1M):") | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /dev/lofi/1 desktop"
+ echo
+
+ echo $(gettext "Example 4: Install the entire Linux environment from") \
+ $(gettext "ISO images located in the directory") \
+ "'/export/centos_3.8/isos':" | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /export/centos_3.8/isos all"
+ echo
+
+ echo $(gettext "Example 5: Install from a compressed tar archive of") \
+ $(gettext "an existing Linux installation (a tar ball) with") \
+ $(gettext "verbose output regarding the progress of the") \
+ $(gettext "installation:") | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -v -d /tmp/linux_full.tar.gz"
+ echo
+
+ echo $(gettext "Example 6: Install from a compressed tar archive of") \
+ $(gettext "an existing Linux installation (a tar ball) with NO") \
+ $(gettext "output regarding the progress of the installation") \
+ $(gettext "(silent mode.)") | fmt -80
+
+ echo
+
+ echo $(gettext "NOTE: Silent mode is only recommended for use by") \
+ $(gettext "shell scripts and other non-interactive programs:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /tmp/linux_full.tar.gz -s"
+ echo
+
+ exit $int_code
+}
+
+#
+# The main body of the script starts here.
+#
+# This script should never be called directly by a user but rather should
+# only be called by zoneadm to install a BrandZ Linux zone.
+#
+
+#
+# Exit values used by the script, as #defined in <sys/zone.h>
+#
+# ZONE_SUBPROC_OK
+# ===============
+# Installation was successful
+#
+# ZONE_SUBPROC_USAGE
+# ==================
+# Improper arguments were passed, so print a usage message before exiting
+#
+# ZONE_SUBPROC_NOTCOMPLETE
+# ========================
+# Installation did not complete, but another installation attempt can be
+# made without an uninstall
+#
+# ZONE_SUBPROC_FATAL
+# ==================
+# Installation failed and an uninstall will be required before another
+# install can be attempted
+#
+ZONE_SUBPROC_OK=0
+ZONE_SUBPROC_USAGE=253
+ZONE_SUBPROC_NOTCOMPLETE=254
+ZONE_SUBPROC_FATAL=255
+
+#
+# An unspecified exit or interrupt should exit with ZONE_SUBPROC_NOTCOMPLETE,
+# meaning a user will not need to do an uninstall before attempting another
+# install.
+#
+int_code=$ZONE_SUBPROC_NOTCOMPLETE
+
+trap trap_cleanup INT
+
+# If we weren't passed at least two arguments, exit now.
+[[ $# -lt 2 ]] && usage
+
+#
+# This script is always started with a full path so we can extract the
+# brand directory name here.
+#
+branddir=$(dirname "$0")
+zonename="$1"
+zoneroot="$2"
+
+install_root="$zoneroot/root"
+logdir="$install_root/var/log"
+
+shift; shift # remove zonename and zoneroot from arguments array
+
+unset gtaropts
+unset install_opts
+unset install_src
+unset msg
+unset silent_mode
+unset verbose_mode
+
+while getopts "d:hsvX" opt
+do
+ case "$opt" in
+ h) usage;;
+ s) silent_mode=1;;
+ v) verbose_mode=1;;
+ d) install_src="$OPTARG" ;;
+ X) install_opts="$install_opts -x" ;;
+ *) usage;;
+ esac
+done
+shift OPTIND-1
+
+# Providing more than one passed argument generates a usage message
+if [[ $# -gt 1 ]]; then
+ msg=$(gettext "ERROR: Too many arguments provided:")
+
+ screenlog "$msg"
+ screenlog " \"%s\"" "$@"
+ screenlog ""
+ usage
+fi
+
+# Validate any free-form arguments
+if [[ $# -eq 1 && "$1" != "core" && "$1" != "server" && "$1" != "desktop" &&
+ "$1" != "development" && "$1" != "all" ]]; then
+ msg=$(gettext "ERROR: Unknown cluster name specified: %s")
+
+ screenlog "$msg" "\"$1\""
+ screenlog ""
+ usage
+fi
+
+# The install can't be both verbose AND silent...
+if [[ -n $silent_mode && -n $verbose_mode ]]; then
+ screenlog "$both_modes" "zoneadm install"
+ screenlog ""
+ usage
+fi
+
+#
+# Validate that we're running on a i686-compatible CPU; abort the zone
+# installation now if we're not.
+#
+procinfo=$(LC_ALL=C psrinfo -vp | grep family)
+
+#
+# All x86 processors in CPUID families 6, 15, 16 or 17 should be
+# i686-compatible, assuming third party processor vendors follow AMD and
+# Intel's lead.
+#
+if [[ "$procinfo" != *" x86 "* ]] ||
+ [[ "$procinfo" != *" family 6 "* && "$procinfo" != *" family 15 "* &&
+ "$procinfo" != *" family 16 "* && "$procinfo" != *" family 17 "* ]] ; then
+ screenlog "$unsupported_cpu" "i686"
+ exit $int_code
+fi
+
+if [[ -n $install_src ]]; then
+ #
+ # Validate $install_src.
+ #
+ # If install_src is a directory, assume it contains ISO images to
+ # install from, otherwise treat the argument as if it points to a tar
+ # ball file.
+ #
+ if [[ "`echo $install_src | cut -c 1`" != "/" ]]; then
+ screenlog "$path_abs" "$install_src"
+ exit $int_code
+ fi
+
+ if [[ ! -a "$install_src" ]]; then
+ screenlog "$not_found" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ if [[ ! -r "$install_src" ]]; then
+ screenlog "$not_readable" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ #
+ # If install_src is a block device, a directory, a possible device
+ # created via lofiadm(1M), or the directory used by a standard volume
+ # management daemon, pass it on to the secondary install script.
+ #
+ # Otherwise, validate the passed filename to prepare for a tar ball
+ # install.
+ #
+ if [[ ! -b "$install_src" && ! -d "$install_src" &&
+ "$install_src" != /dev/lofi/* && "$install_src" != /cdrom/* &&
+ "$install_src" != /media/* ]]; then
+ if [[ ! -f "$install_src" ]]; then
+ screenlog "$wrong_type" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ filetype=`{ LC_ALL=C file $install_src |
+ awk '{print $2}' ; } 2>/dev/null`
+
+ if [[ "$filetype" = "gzip" ]]; then
+ verbose "\"$install_src\": \"gzip\" archive"
+ gtaropts="-xz"
+ elif [[ "$filetype" = "bzip2" ]]; then
+ verbose "\"$install_src\": \"bzip2\" archive"
+ gtaropts="-xj"
+ elif [[ "$filetype" = "compressed" ]]; then
+ verbose "\"$install_src\": Lempel-Ziv" \
+ "compressed (\".Z\") archive."
+ gtaropts="-xZ"
+ elif [[ "$filetype" = "USTAR" ]]; then
+ verbose "\"$install_src\":" \
+ "uncompressed (\"tar\") archive."
+ gtaropts="-x"
+ else
+ screenlog "$wrong_type" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+ fi
+fi
+
+#
+# Start silent operation and pass the flag to prepare pass the flag to
+# the ISO installer, if needed.
+#
+if [[ -n $silent_mode ]]
+then
+ exec 1>/dev/null
+ install_opts="$install_opts -s"
+fi
+
+#
+# If verbose mode was specified, pass the verbose flag to lx_distro_install
+# for ISO or disc installations and to gtar for tarball-based installs.
+#
+if [[ -n $verbose_mode ]]
+then
+ echo $(gettext "Verbose output mode enabled.")
+ install_opts="$install_opts -v"
+ [[ -n $gtaropts ]] && gtaropts="${gtaropts}v"
+fi
+
+[[ -n $gtaropts ]] && gtaropts="${gtaropts}f"
+
+if [[ ! -d "$install_root" ]]
+then
+ if ! mkdir -p "$install_root" 2>/dev/null; then
+ screenlog "$no_install" "$install_root"
+ exit $int_code
+ fi
+fi
+
+#
+# Check for a non-empty root.
+#
+cnt=`ls $install_root | wc -l`
+if [ $cnt -ne 0 ]; then
+ screenlog "$root_full" "$install_root"
+ exit $int_code
+fi
+
+if [[ ! -d "$logdir" ]]
+then
+ if ! mkdir -p "$logdir" 2>/dev/null; then
+ screenlog "$no_log" "$logdir"
+ exit $int_code
+ fi
+fi
+
+logfile="${logdir}/$zonename.install.$$.log"
+
+if ! > $logfile; then
+ screenlog "$no_logfile" "$logfile"
+ exit $int_code
+fi
+
+# Redirect stderr to the log file to automatically log any error messages
+exec 2>>"$logfile"
+
+#
+# From here on out, an unspecified exit or interrupt should exit with
+# ZONE_SUBPROC_FATAL, meaning a user will need to do an uninstall before
+# attempting another install, as we've modified the directories we were going
+# to install to in some way.
+#
+int_code=$ZONE_SUBPROC_FATAL
+
+log "Installation started for zone \"$zonename\" `/usr/bin/date`"
+
+if [[ -n $gtaropts ]]; then
+
+ screenlog "$install_zone" "$zonename" "$zoneroot"
+ screenlog "$install_from" "$install_src"
+ echo
+ echo $(gettext "This process may take several minutes.")
+ echo
+
+ if ! ( cd "$install_root" && gtar "$gtaropts" "$install_src" ) ; then
+ log "Error: extraction from tar archive failed."
+ else
+ if ! [[ -d "${install_root}/bin" &&
+ -d "${install_root}/sbin" ]]; then
+ log "Error: improper or incomplete tar archive."
+ else
+ $branddir/lx_init_zone "$zonename" "$install_root" &&
+ init_tarzone "$install_root"
+
+ #
+ # Emit the same code from here whether we're
+ # interrupted or exiting normally.
+ #
+ int_code=$?
+ fi
+ fi
+
+ if [[ $int_code -eq ZONE_SUBPROC_OK ]]; then
+ log "Tar install completed for zone '$zonename' `date`."
+ else
+ log "Tar install failed for zone \"$zonename\" `date`."
+
+ fi
+else
+ check_cmd $branddir/lx_distro_install
+
+ $branddir/lx_distro_install -z "$zonename" -r "$zoneroot" \
+ -d "$install_src" -l "$logfile" $install_opts "$@"
+
+ #
+ # Emit the same code from here whether we're interrupted or exiting
+ # normally.
+ #
+ int_code=$?
+
+ [[ $int_code -eq $ZONE_SUBPROC_USAGE ]] && usage
+fi
+
+if [[ $int_code -ne $ZONE_SUBPROC_OK ]]; then
+ screenlog ""
+ screenlog "$install_fail" "$zonename"
+ screenlog ""
+
+ #
+ # Only make a reference to the log file if one will exist after
+ # zoneadm exits.
+ #
+ [[ $int_code -ne $ZONE_SUBPROC_NOTCOMPLETE ]] &&
+ screenlog "$see_log" "$logfile"
+
+ exit $int_code
+fi
+
+#
+# After the install completes, we've likely moved a new copy of the logfile into
+# place atop the logfile we WERE writing to, so if we don't reopen the logfile
+# here the shell will continue writing to the old logfile's inode, meaning we
+# would lose all log information from this point on.
+#
+exec 2>>"$logfile"
+
+screenlog ""
+screenlog "$install_good" "$zonename"
+screenlog ""
+
+echo $(gettext "Details saved to log file:")
+echo " \"$logfile\""
+echo
+
+exit $ZONE_SUBPROC_OK
diff --git a/usr/src/lib/brand/lx/zone/lx_networking.ksh b/usr/src/lib/brand/lx/zone/lx_networking.ksh
new file mode 100644
index 0000000000..7c1451b2fa
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_networking.ksh
@@ -0,0 +1,74 @@
+#!/bin/sh -e
+### BEGIN INIT INFO
+# Provides: networking
+# Required-Start:
+# Required-Stop: $local_fs
+# Should-Start: ifupdown
+# Should-Stop: ifupdown
+# Default-Start:
+# Default-Stop: 0 6
+# Short-Description: Raise network interfaces.
+### END INIT INFO
+
+PATH="/sbin:/bin:/usr/sbin:/usr/bin"
+
+[ -x /sbin/ipmgmtd ] || exit 0
+[ -x /sbin/ifconfig ] || exit 0
+
+. /lib/lsb/init-functions
+
+[ -f /etc/default/networking ] && . /etc/default/networking
+
+config_ifs() {
+ if [ ! -f /etc/network/interfaces ]; then
+ return
+ fi
+
+ while read cmd nm typ how; do
+ if [ "$cmd" = "iface" ]; then
+ ifconfig $nm plumb
+ if [ "$nm" != "lo0" ]; then
+ if [ "$how" = "dhcp" ]; then
+ ifconfig $nm dhcp
+ elif [ "$how" = "static" ]; then
+ read nxt addr remain
+ read nxt mask remain
+ ifconfig $nm inet $addr netmask $mask up
+ fi
+ fi
+ fi
+ done < /etc/network/interfaces
+}
+
+case "$1" in
+start)
+ /sbin/ipmgmtd
+
+ log_action_begin_msg "Configuring network interfaces"
+ config_ifs
+ log_action_end_msg $?
+ ;;
+
+stop)
+ log_action_begin_msg "Deconfiguring network interfaces"
+ log_action_end_msg $?
+ ;;
+
+reload)
+ log_action_begin_msg "Reloading network interfaces configuration"
+ log_action_end_msg $?
+ ;;
+
+force-reload|restart)
+ log_warning_msg "Running $0 $1 is deprecated because it may not re-enable some interfaces"
+ log_action_begin_msg "Reconfiguring network interfaces"
+ log_action_end_msg $?
+ ;;
+
+*)
+ echo "Usage: /etc/init.d/networking {start|stop|reload|restart|force-reload}"
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml
new file mode 100644
index 0000000000..a586d7ae8f
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/platform.xml
@@ -0,0 +1,141 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+ Copyright 2014 Sun Microsystems, Inc. All rights reserved.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN"
+ "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1">
+
+<platform name="lx" allow-exclusive-ip="true">
+ <!-- Global filesystems to mount when booting the zone -->
+ <global_mount special="/dev" directory="/native/dev" type="dev"
+ opt="attrdir=%R/dev" />
+
+ <!--
+ Local filesystems to mount when booting the zone. The /native/sbin
+ entry is needed so that we can replace ifconfig with its native
+ counterpart (this is required for exclusive-stack zones to work).
+ We also need dladm from /native/sbin.
+ -->
+ <global_mount special="/lib" directory="/native/lib"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr" directory="/native/usr"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr/lib/brand/lx/etc_default_nfs"
+ directory="/native/etc/default/nfs" type="lofs" opt="ro" />
+ <global_mount special="/usr/lib/brand/lx/etc_netconfig"
+ directory="/native/etc/netconfig" type="lofs" opt="ro" />
+ <global_mount special="/sbin"
+ directory="/native/sbin" opt="ro,nodevices" type="lofs" />
+ <global_mount special="/usr/lib/brand/lx/ld" directory="/var/ld"
+ opt="ro" type="lofs" />
+
+ <!-- Local filesystems to mount when booting the zone -->
+ <mount special="/native/dev" directory="/dev" type="lofs" />
+ <mount special="proc" directory="/native/proc" type="proc" />
+ <mount special="swap" directory="/native/etc/svc/volatile"
+ type="tmpfs" />
+ <mount special="swap" directory="/native/tmp" type="tmpfs" />
+ <mount special="objfs" directory="/system/object" type="objfs" />
+ <mount special="ctfs" directory="/system/contract" type="ctfs" />
+ <mount special="mnttab" directory="/etc/mnttab" type="mntfs" />
+
+ <!-- Devices to create under /dev -->
+ <device match="arp" />
+ <device match="dtrace/*" />
+ <device match="dtrace/provider/*" />
+ <device match="inotify" />
+ <device match="ipnet" />
+ <device match="kstat" />
+ <device match="lo0" />
+ <device match="null" />
+ <device match="poll" />
+ <device match="pts/*" />
+ <device match="random" />
+ <device match="tcp" />
+ <device match="tcp6" />
+ <device match="tty" />
+ <device match="udp" />
+ <device match="udp6" />
+ <device match="urandom" />
+ <device match="zero" />
+ <device match="zfs" />
+ <device match="zvol/dsk/%P/%z/*" />
+ <device match="zvol/rdsk/%P/%z/*" />
+
+ <!-- Devices to create in exclusive IP zone only -->
+ <device match="dld" ip-type="exclusive" />
+ <device match="icmp" ip-type="exclusive" />
+ <device match="icmp6" ip-type="exclusive" />
+ <device match="ip" ip-type="exclusive" />
+ <device match="ip6" ip-type="exclusive" />
+ <device match="ipauth" ip-type="exclusive" />
+ <device match="ipf" ip-type="exclusive" />
+ <device match="ipl" ip-type="exclusive" />
+ <device match="iplookup" ip-type="exclusive" />
+ <device match="ipmpstub" ip-type="exclusive" />
+ <device match="ipnat" ip-type="exclusive" />
+ <device match="ipscan" ip-type="exclusive" />
+ <device match="ipsecah" ip-type="exclusive" />
+ <device match="ipsecesp" ip-type="exclusive" />
+ <device match="ipstate" ip-type="exclusive" />
+ <device match="ipsync" ip-type="exclusive" />
+ <device match="keysock" ip-type="exclusive" />
+ <device match="net/*" ip-type="exclusive" />
+ <device match="rawip" ip-type="exclusive" />
+ <device match="rawip6" ip-type="exclusive" />
+ <device match="rts" ip-type="exclusive" />
+ <device match="sad/admin" ip-type="exclusive" />
+ <device match="sctp" ip-type="exclusive" />
+ <device match="sctp6" ip-type="exclusive" />
+ <device match="spdsock" ip-type="exclusive" />
+ <device match="sppp" ip-type="exclusive" />
+ <device match="sppptun" ip-type="exclusive" />
+ <device match="vni" ip-type="exclusive" />
+
+ <!-- Renamed devices to create under /dev -->
+ <device match="brand/lx/ptmx" name="ptmx" />
+ <device match="zcons/%z/zoneconsole" name="console" />
+
+ <!-- Audio devices to create under /dev -->
+ <device match="brand/lx/dsp" name="dsp" />
+ <device match="brand/lx/mixer" name="mixer" />
+
+ <!-- Symlinks to create under /dev -->
+ <symlink source="shm" target="/run/shm" />
+ <symlink source="fd" target="../proc/self/fd" />
+ <symlink source="stderr" target="../proc/self/fd/2" />
+ <symlink source="stdin" target="../proc/self/fd/0" />
+ <symlink source="stdout" target="../proc/self/fd/1" />
+ <symlink source="systty" target="console" />
+
+ <!-- Create a mount point for for the /dev/initctl fifo -->
+ <device match="null" name="initctl" />
+ <!-- Create a mount point for for the /dev/log socket -->
+ <device match="null" name="log" />
+
+</platform>
diff --git a/usr/src/lib/brand/shared/zone/common.ksh b/usr/src/lib/brand/shared/zone/common.ksh
index 52441d814a..0f87686414 100644
--- a/usr/src/lib/brand/shared/zone/common.ksh
+++ b/usr/src/lib/brand/shared/zone/common.ksh
@@ -19,6 +19,7 @@
# CDDL HEADER END
#
# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
#
#
@@ -96,9 +97,27 @@ vlog()
safe_dir()
{
typeset dir="$1"
+ typeset pwd_dir=""
- if [[ -h $ZONEROOT/$dir || ! -d $ZONEROOT/$dir ]]; then
- fatal "$e_baddir" "$dir"
+ if [[ -d $ZONEROOT/$dir ]]; then
+ if [[ -h $ZONEROOT/$dir ]]; then
+ #
+ # When dir is a symlink to a directory, we 'cd' to that
+ # directory to ensure that's under $ZONEROOT. We use pwd
+ # from /usr/bin instead of built-in because they give
+ # different results.
+ #
+ pwd_dir=$(cd $ZONEROOT/$dir && /usr/bin/pwd)
+ if [[ $pwd_dir =~ "^$ZONEROOT" ]]; then
+ return;
+ else
+ fatal \
+ "$e_baddir: symlink out of zoneroot" "$dir"
+ fi
+ else
+ # it's a dir and not a symlink, so that's ok.
+ return
+ fi
fi
}
@@ -109,9 +128,7 @@ safe_opt_dir()
[[ ! -e $ZONEROOT/$dir ]] && return
- if [[ -h $ZONEROOT/$dir || ! -d $ZONEROOT/$dir ]]; then
- fatal "$e_baddir" "$dir"
- fi
+ safe_dir $dir
}
# Only make a copy if we haven't already done so.
@@ -187,7 +204,7 @@ safe_replace()
fi
cat <<-END >$filename || exit 1
- #!/bin/sh -p
+ #!/bin/sh
#
# Solaris Brand Replacement
#
@@ -333,10 +350,12 @@ post_unpack()
#
# Check if the image was created with a valid libc.so.1.
#
- hwcap=`moe -v -32 $ZONEROOT/lib/libc.so.1 2>&1`
- if (( $? != 0 )); then
- vlog "$f_hwcap_info" "$hwcap"
- fail_fatal "$f_sanity_hwcap"
+ if [[ -f $ZONEROOT/lib/libc.so.1 ]]; then
+ hwcap=`moe -v -32 $ZONEROOT/lib/libc.so.1 2>&1`
+ if (( $? != 0 )); then
+ vlog "$f_hwcap_info" "$hwcap"
+ fail_fatal "$f_sanity_hwcap"
+ fi
fi
( cd "$ZONEROOT" && \
@@ -1003,41 +1022,36 @@ install_image()
return 0
}
-# Setup i18n output
-TEXTDOMAIN="SUNW_OST_OSCMD"
-export TEXTDOMAIN
-
-e_cannot_wrap=$(gettext "%s: error: wrapper file already exists")
-e_baddir=$(gettext "Invalid '%s' directory within the zone")
-e_badfile=$(gettext "Invalid '%s' file within the zone")
-e_path_abs=$(gettext "Pathname specified to -a '%s' must be absolute.")
-e_not_found=$(gettext "%s: error: file or directory not found.")
-e_install_abort=$(gettext "Installation aborted.")
-e_not_readable=$(gettext "Cannot read directory '%s'")
-e_not_dir=$(gettext "Error: must be a directory")
-e_unknown_archive=$(gettext "Error: Unknown archive format. Must be a flash archive, a cpio archive (can also be gzipped or bzipped), a pax XUSTAR archive, or a level 0 ufsdump archive.")
-e_absolute_archive=$(gettext "Error: archive contains absolute paths instead of relative paths.")
-e_mismatch_archive=$(gettext "Error: the archive top-level directory (%s) does not match the zonepath (%s).")
-e_tmpfile=$(gettext "Unable to create temporary file")
-e_root_full=$(gettext "Zonepath root %s exists and contains data; remove or move aside prior to install.")
-f_mkdir=$(gettext "Unable to create directory %s.")
-f_chmod=$(gettext "Unable to chmod directory %s.")
-f_chown=$(gettext "Unable to chown directory %s.")
-f_hwcap_info=$(gettext "HWCAP: %s\n")
-f_sanity_hwcap=$(gettext \
-"The image was created with an incompatible libc.so.1 hwcap lofs mount.\n"\
+e_cannot_wrap="%s: error: wrapper file already exists"
+e_baddir="Invalid '%s' directory within the zone"
+e_badfile="Invalid '%s' file within the zone"
+e_path_abs="Pathname specified to -a '%s' must be absolute."
+e_not_found="%s: error: file or directory not found."
+e_install_abort="Installation aborted."
+e_not_readable="Cannot read directory '%s'"
+e_not_dir="Error: must be a directory"
+e_unknown_archive="Error: Unknown archive format. Must be a flash archive, a cpio archive (can also be gzipped or bzipped), a pax XUSTAR archive, or a level 0 ufsdump archive."
+e_absolute_archive="Error: archive contains absolute paths instead of relative paths."
+e_mismatch_archive="Error: the archive top-level directory (%s) does not match the zonepath (%s)."
+e_tmpfile="Unable to create temporary file"
+e_root_full="Zonepath root %s exists and contains data; remove or move aside prior to install."
+f_mkdir="Unable to create directory %s."
+f_chmod="Unable to chmod directory %s."
+f_chown="Unable to chown directory %s."
+f_hwcap_info="HWCAP: %s\n"
+f_sanity_hwcap="The image was created with an incompatible libc.so.1 hwcap lofs mount.\n"\
" The zone will not boot on this platform. See the zone's\n"\
-" documentation for the recommended way to create the archive.")
+" documentation for the recommended way to create the archive."
-m_analyse_archive=$(gettext "Analysing the archive")
+m_analyse_archive="Analysing the archive"
-not_readable=$(gettext "Cannot read file '%s'")
-not_flar=$(gettext "Input is not a flash archive")
-bad_flar=$(gettext "Flash archive is a corrupt")
-bad_zfs_flar=$(gettext "Flash archive contains a ZFS send stream.\n\tRecreate the flar using the -L option with cpio or pax.")
-f_unpack_failed=$(gettext "Unpacking the archive failed")
-unknown_archiver=$(gettext "Archiver %s is not supported")
-cmd_not_exec=$(gettext "Required command '%s' not executable!")
+not_readable="Cannot read file '%s'"
+not_flar="Input is not a flash archive"
+bad_flar="Flash archive is a corrupt"
+bad_zfs_flar="Flash archive contains a ZFS send stream.\n\tRecreate the flar using the -L option with cpio or pax."
+f_unpack_failed="Unpacking the archive failed"
+unknown_archiver="Archiver %s is not supported"
+cmd_not_exec="Required command '%s' not executable!"
#
# Exit values used by the script, as #defined in <sys/zone.h>
diff --git a/usr/src/lib/brand/shared/zone/uninstall.ksh b/usr/src/lib/brand/shared/zone/uninstall.ksh
index 468a7ed92f..923363c864 100644
--- a/usr/src/lib/brand/shared/zone/uninstall.ksh
+++ b/usr/src/lib/brand/shared/zone/uninstall.ksh
@@ -35,31 +35,31 @@ bname=`basename $0`
#
# error messages
#
-m_usage=$(gettext "Usage: %s: [-hFn]")
-
-m_1_zfs_promote=$(gettext "promoting '%s'.")
-m_1_zfs_destroy=$(gettext "destroying '%s'.")
-m_2_zfs_rename=$(gettext "renaming '%s' to '%s'.")
-m_3_zfs_set=$(gettext "setting property %s='%s' for '%s'.")
-m_rm_r=$(gettext "recursively deleting '%s'.")
-m_rm=$(gettext "deleting '%s'.")
-
-w_no_ds=$(gettext "Warning: no zonepath dataset found.")
-
-f_usage_err=$(gettext "Error: invalid usage")
-f_abort=$(gettext "Error: internal error detected, aborting.")
-f_1_zfs_promote=$(gettext "Error: promoting ZFS dataset '%s'.")
-f_2_zfs_rename=$(gettext "Error: renaming ZFS dataset '%s' to '%s'.")
-f_3_zfs_set=$(gettext "Error: setting ZFS propery %s='%s' for '%s'.")
-f_1_zfs_destroy=$(gettext "Error: destroying ZFS dataset.")
-f_2_zfs_get=$(gettext "Error: reading ZFS dataset property '%s' from '%s'.")
-f_user_snap=$(gettext "Error: user snapshot(s) detected.")
-f_stray_snap=$(gettext "Error: uncloned snapshot(s) detected.")
-f_stray_clone=$(gettext "Error: cloned zone datasets found outsize of zone.")
-f_rm_snap=$(gettext "Error: please delete snapshot(s) and retry uninstall.")
-f_rm_clone=$(gettext "Error: please delete clone(s) and retry uninstall.")
-f_iu_clone=$(gettext "Error: cloned zone dataset(s) in use.")
-f_dis_clone=$(gettext "Error: please stop using clone(s) and retry uninstall.")
+m_usage="Usage: %s: [-hFn]"
+
+m_1_zfs_promote="promoting '%s'."
+m_1_zfs_destroy="destroying '%s'."
+m_2_zfs_rename="renaming '%s' to '%s'."
+m_3_zfs_set="setting property %s='%s' for '%s'."
+m_rm_r="recursively deleting '%s'."
+m_rm="deleting '%s'."
+
+w_no_ds="Warning: no zonepath dataset found."
+
+f_usage_err="Error: invalid usage"
+f_abort="Error: internal error detected, aborting."
+f_1_zfs_promote="Error: promoting ZFS dataset '%s'."
+f_2_zfs_rename="Error: renaming ZFS dataset '%s' to '%s'."
+f_3_zfs_set="Error: setting ZFS propery %s='%s' for '%s'."
+f_1_zfs_destroy="Error: destroying ZFS dataset."
+f_2_zfs_get="Error: reading ZFS dataset property '%s' from '%s'."
+f_user_snap="Error: user snapshot(s) detected."
+f_stray_snap="Error: uncloned snapshot(s) detected."
+f_stray_clone="Error: cloned zone datasets found outsize of zone."
+f_rm_snap="Error: please delete snapshot(s) and retry uninstall."
+f_rm_clone="Error: please delete clone(s) and retry uninstall."
+f_iu_clone="Error: cloned zone dataset(s) in use."
+f_dis_clone="Error: please stop using clone(s) and retry uninstall."
#
# functions
diff --git a/usr/src/lib/brand/sngl/Makefile b/usr/src/lib/brand/sngl/Makefile
new file mode 100644
index 0000000000..780acc21e1
--- /dev/null
+++ b/usr/src/lib/brand/sngl/Makefile
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+default: all
+
+include Makefile.sngl
+
+# Build everything in parallel; use .WAIT for dependencies
+.PARALLEL:
+
+SUBDIRS = sngl_brand zone
+MSGSUBDIRS = zone
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint: $(SUBDIRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/sngl/Makefile.sngl b/usr/src/lib/brand/sngl/Makefile.sngl
new file mode 100644
index 0000000000..0c7a280696
--- /dev/null
+++ b/usr/src/lib/brand/sngl/Makefile.sngl
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2012 Joyent, Inc. All rights reserved.
+#
+
+BRAND = sngl
+
+include $(SRC)/lib/brand/Makefile.brand
+
diff --git a/usr/src/lib/brand/sngl/sngl_brand/Makefile b/usr/src/lib/brand/sngl/sngl_brand/Makefile
new file mode 100644
index 0000000000..4b667e1fa8
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include $(SRC)/lib/Makefile.lib
+
+default: all
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint _msg: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/sngl/sngl_brand/Makefile.com b/usr/src/lib/brand/sngl/sngl_brand/Makefile.com
new file mode 100644
index 0000000000..070a03dcb5
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/Makefile.com
@@ -0,0 +1,76 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = sngl_brand.a
+VERS = .1
+COBJS = sngl_brand.o
+ASOBJS = crt.o handler.o runexe.o brand_util.o
+OBJECTS = $(COBJS)
+
+include ../../Makefile.sngl
+include $(SRC)/lib/Makefile.lib
+
+SRCDIR = ../common
+UTSBASE = $(SRC)/uts
+
+LIBS = $(DYNLIB)
+CSRCS = $(COBJS:%o=../common/%c)
+SHAREDOBJS = $(ASOBJS:%o=$(ISAOBJDIR)/%o)
+SRCS = $(CSRCS)
+
+#
+# Note that the architecture specific makefiles MUST update DYNFLAGS to
+# explicitly specify an interpreter for the brand emulation library so that we
+# use /lib/ld.so.1 or /lib/64/ld.so.1, which in a sngl zone is the system
+# linker.
+#
+# Note that since the linker is used to setup processes before libc is loaded,
+# it makes system calls directly (ie avoiding libc), and it makes these system
+# calls before our library has been initialized. Since our library hasn't been
+# initialized yet, there's no way for us to intercept and emulate any of those
+# system calls. Luckily we don't have to.
+#
+# Note that we make sure to link our brand emulation library to libmapmalloc.
+# This is required because in most cases there will be two copies of libc in
+# the same process and we don't want them to fight over the heap. So for our
+# brand library we link against libmapmalloc so that if we (our or copy of
+# libc) try to allocate any memory it will be done via mmap() instead of brk().
+#
+CPPFLAGS += -D_REENTRANT -U_ASM \
+ -I. -I$(BRAND_SHARED)/brand/sys -I$(UTSBASE)/common/brand/sngl
+CFLAGS += $(CCVERBOSE)
+DYNFLAGS += $(DYNFLAGS_$(CLASS))
+DYNFLAGS += $(BLOCAL) $(ZNOVERSION) -Wl,-e_start
+LDLIBS += -lc -lmapmalloc
+
+$(LIBS):= PICS += $(SHAREDOBJS)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/brand/sngl/sngl_brand/amd64/Makefile b/usr/src/lib/brand/sngl/sngl_brand/amd64/Makefile
new file mode 100644
index 0000000000..d019eaf841
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/amd64/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+ISAOBJDIR = $(BRAND_SHARED)/brand/amd64/pics/
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+CPPFLAGS += -I$(BRAND_SHARED)/brand/amd64
+
+#
+# see ../Makefile.com for why we MUST explicity make ld.so.1 our interpreter
+#
+DYNFLAGS += -Wl,-I/lib/64/ld.so.1
+CPPFLAGS += -D_SYSCALL32
+
+# Note that we also set the runtime path for the emulation library to point
+# into /system. This is an attempt to ensure that our brand library get's the
+# native version of libmapmallc.
+#
+DYNFLAGS += -R/lib/64 -R/system/usr/lib/64
+
+CLEANFILES += $(DYNLIB)
+CLOBBERFILES += $(ROOTLIBS64)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/sngl/sngl_brand/common/mapfile-vers b/usr/src/lib/brand/sngl/sngl_brand/common/mapfile-vers
new file mode 100644
index 0000000000..29800204f8
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/common/mapfile-vers
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+SYMBOL_SCOPE {
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/sngl/sngl_brand/common/sngl_brand.c b/usr/src/lib/brand/sngl/sngl_brand/common/sngl_brand.c
new file mode 100644
index 0000000000..d2b8d8702d
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/common/sngl_brand.c
@@ -0,0 +1,429 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <sys/brand.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#include <sys/auxv.h>
+
+#include <sngl_brand.h>
+#include <brand_misc.h>
+
+/*
+ * See usr/src/lib/brand/shared/brand/common/brand_util.c for general
+ * emulation notes.
+ */
+
+#define CONF32_PATH "/system/usr/lib/brand/sngl/ld.sys.config"
+#define CONF64_PATH "/system/usr/lib/brand/sngl/ld.sys64.config"
+
+brand_sysent_table_t brand_sysent_table[];
+
+static boolean_t is_sys = B_FALSE;
+static boolean_t is_crle = B_FALSE;
+
+typedef struct {
+ char *mnt_name;
+ dev_t mnt_id;
+} sys_mnt_dev_t;
+
+/*
+ * The brand platform mounts several GZ file systems into the zone. We know
+ * which ones can actually be on seperate file systems, so we only stat those
+ * when checking for system commands. This reduces the number of stats needed
+ * when we start up.
+ */
+static sys_mnt_dev_t sys_mounts[] = {
+ {"/system/usr", 0},
+ {"/lib", 0},
+ {NULL, 0}
+};
+
+/*
+ * If this is a /sytem binary and ld.so is opening the default config file,
+ * then redirect so it opens the /system config file instead. We need to do it
+ * this way, instead of setting one of the LD_CONFIG env vars, since those
+ * are ignored for secure binaries.
+ *
+ * We don't redirect if we're crle so that it can still be used on the default
+ * config files.
+ */
+int
+sngl_open(sysret_t *rval, char *path, int oflag, mode_t mode)
+{
+ char tstr[MAXPATHLEN];
+
+ if (is_sys && !is_crle) {
+ /* Get a copy of the path we're trying to open */
+ bzero(tstr, sizeof (tstr));
+ (void) brand_uucopystr(path, tstr, sizeof (tstr));
+
+ if (strcmp(tstr, "/var/ld/ld.config") == 0)
+ return (__systemcall(rval, SYS_open + 1024,
+ CONF32_PATH, oflag, mode));
+
+ if (strcmp(tstr, "/var/ld/64/ld.config") == 0)
+ return (__systemcall(rval, SYS_open + 1024,
+ CONF64_PATH, oflag, mode));
+ }
+
+ return (__systemcall(rval, SYS_open + 1024, path, oflag, mode));
+}
+
+/*ARGSUSED*/
+int
+brand_init(int argc, char *argv[], char *envp[])
+{
+ ulong_t ldentry;
+ int i;
+ uintptr_t *p;
+ auxv_t *ap;
+ struct stat64 buf;
+ char *bname;
+
+ brand_pre_init();
+
+ /*
+ * Check if we're trying to run a system binary.
+ *
+ * We haven't installed our emulation table yet, so its safe to make
+ * system calls directly.
+ *
+ * First, get the /system devices, then stat the executable to see if
+ * its on one of the /system devs.
+ */
+ for (i = 0; sys_mounts[i].mnt_name != NULL; i++) {
+ if (stat64(sys_mounts[i].mnt_name, &buf) != -1)
+ sys_mounts[i].mnt_id = buf.st_dev;
+ }
+
+ /* Find the aux vector on the stack. */
+ p = (uintptr_t *)envp;
+ while (*p != NULL)
+ p++;
+ p++;
+
+ /* Find AT_SUN_EXECNAME */
+ for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) {
+ if (ap->a_type != AT_SUN_EXECNAME)
+ continue;
+ if (stat64(ap->a_un.a_ptr, &buf) != -1) {
+ for (i = 0; sys_mounts[i].mnt_name != NULL; i++) {
+ if (sys_mounts[i].mnt_id == buf.st_dev) {
+ is_sys = B_TRUE;
+ bname = basename(ap->a_un.a_ptr);
+ if (strcmp("crle", bname) == 0)
+ is_crle = B_TRUE;
+ break;
+ }
+ }
+ }
+ break;
+ }
+
+ ldentry = brand_post_init(SNGL_VERSION, argc, argv, envp);
+
+ brand_runexe(argv, ldentry);
+ /*NOTREACHED*/
+ brand_abort(0, "brand_runexe() returned");
+ return (-1);
+}
+
+/*
+ * This table must have at least NSYSCALL entries in it.
+ *
+ * The second parameter of each entry in the brand_sysent_table
+ * contains the number of parameters and flags that describe the
+ * syscall return value encoding. See the block comments at the
+ * top of this file for more information about the syscall return
+ * value flags and when they should be used.
+ */
+brand_sysent_table_t brand_sysent_table[] = {
+ NOSYS, /* 0 */
+ NOSYS, /* 1 */
+ NOSYS, /* 2 */
+ NOSYS, /* 3 */
+ NOSYS, /* 4 */
+ EMULATE(sngl_open, 3 | RV_DEFAULT), /* 5 */
+ NOSYS, /* 6 */
+ NOSYS, /* 7 */
+ NOSYS, /* 8 */
+ NOSYS, /* 9 */
+ NOSYS, /* 10 */
+ NOSYS, /* 11 */
+ NOSYS, /* 12 */
+ NOSYS, /* 13 */
+ NOSYS, /* 14 */
+ NOSYS, /* 15 */
+ NOSYS, /* 16 */
+ NOSYS, /* 17 */
+ NOSYS, /* 18 */
+ NOSYS, /* 19 */
+ NOSYS, /* 20 */
+ NOSYS, /* 21 */
+ NOSYS, /* 22 */
+ NOSYS, /* 23 */
+ NOSYS, /* 24 */
+ NOSYS, /* 25 */
+ NOSYS, /* 26 */
+ NOSYS, /* 27 */
+ NOSYS, /* 28 */
+ NOSYS, /* 29 */
+ NOSYS, /* 30 */
+ NOSYS, /* 31 */
+ NOSYS, /* 32 */
+ NOSYS, /* 33 */
+ NOSYS, /* 34 */
+ NOSYS, /* 35 */
+ NOSYS, /* 36 */
+ NOSYS, /* 37 */
+ NOSYS, /* 38 */
+ NOSYS, /* 39 */
+ NOSYS, /* 40 */
+ NOSYS, /* 41 */
+ NOSYS, /* 42 */
+ NOSYS, /* 43 */
+ NOSYS, /* 44 */
+ NOSYS, /* 45 */
+ NOSYS, /* 46 */
+ NOSYS, /* 47 */
+ NOSYS, /* 48 */
+ NOSYS, /* 49 */
+ NOSYS, /* 50 */
+ NOSYS, /* 51 */
+ NOSYS, /* 52 */
+ NOSYS, /* 53 */
+ NOSYS, /* 54 */
+ NOSYS, /* 55 */
+ NOSYS, /* 56 */
+ NOSYS, /* 57 */
+ NOSYS, /* 58 */
+ NOSYS, /* 59 */
+ NOSYS, /* 60 */
+ NOSYS, /* 61 */
+ NOSYS, /* 62 */
+ NOSYS, /* 63 */
+ NOSYS, /* 64 */
+ NOSYS, /* 65 */
+ NOSYS, /* 66 */
+ NOSYS, /* 67 */
+ NOSYS, /* 68 */
+ NOSYS, /* 69 */
+ NOSYS, /* 70 */
+ NOSYS, /* 71 */
+ NOSYS, /* 72 */
+ NOSYS, /* 73 */
+ NOSYS, /* 74 */
+ NOSYS, /* 75 */
+ NOSYS, /* 76 */
+ NOSYS, /* 77 */
+ NOSYS, /* 78 */
+ NOSYS, /* 79 */
+ NOSYS, /* 80 */
+ NOSYS, /* 81 */
+ NOSYS, /* 82 */
+ NOSYS, /* 83 */
+ NOSYS, /* 84 */
+ NOSYS, /* 85 */
+ NOSYS, /* 86 */
+ NOSYS, /* 87 */
+ NOSYS, /* 88 */
+ NOSYS, /* 89 */
+ NOSYS, /* 90 */
+ NOSYS, /* 91 */
+ NOSYS, /* 92 */
+ NOSYS, /* 93 */
+ NOSYS, /* 94 */
+ NOSYS, /* 95 */
+ NOSYS, /* 96 */
+ NOSYS, /* 97 */
+ NOSYS, /* 98 */
+ NOSYS, /* 99 */
+ NOSYS, /* 100 */
+ NOSYS, /* 101 */
+ NOSYS, /* 102 */
+ NOSYS, /* 103 */
+ NOSYS, /* 104 */
+ NOSYS, /* 105 */
+ NOSYS, /* 106 */
+ NOSYS, /* 107 */
+ NOSYS, /* 108 */
+ NOSYS, /* 109 */
+ NOSYS, /* 110 */
+ NOSYS, /* 111 */
+ NOSYS, /* 112 */
+ NOSYS, /* 113 */
+ NOSYS, /* 114 */
+ NOSYS, /* 115 */
+ NOSYS, /* 116 */
+ NOSYS, /* 117 */
+ NOSYS, /* 118 */
+ NOSYS, /* 119 */
+ NOSYS, /* 120 */
+ NOSYS, /* 121 */
+ NOSYS, /* 122 */
+ NOSYS, /* 123 */
+ NOSYS, /* 124 */
+ NOSYS, /* 125 */
+ NOSYS, /* 126 */
+ NOSYS, /* 127 */
+ NOSYS, /* 128 */
+ NOSYS, /* 129 */
+ NOSYS, /* 130 */
+ NOSYS, /* 131 */
+ NOSYS, /* 132 */
+ NOSYS, /* 133 */
+ NOSYS, /* 134 */
+ NOSYS, /* 135 */
+ NOSYS, /* 136 */
+ NOSYS, /* 137 */
+ NOSYS, /* 138 */
+ NOSYS, /* 139 */
+ NOSYS, /* 140 */
+ NOSYS, /* 141 */
+ NOSYS, /* 142 */
+ NOSYS, /* 143 */
+ NOSYS, /* 144 */
+ NOSYS, /* 145 */
+ NOSYS, /* 146 */
+ NOSYS, /* 147 */
+ NOSYS, /* 148 */
+ NOSYS, /* 149 */
+ NOSYS, /* 150 */
+ NOSYS, /* 151 */
+ NOSYS, /* 152 */
+ NOSYS, /* 153 */
+ NOSYS, /* 154 */
+ NOSYS, /* 155 */
+ NOSYS, /* 156 */
+ NOSYS, /* 157 */
+ NOSYS, /* 158 */
+ NOSYS, /* 159 */
+ NOSYS, /* 160 */
+ NOSYS, /* 161 */
+ NOSYS, /* 162 */
+ NOSYS, /* 163 */
+ NOSYS, /* 164 */
+ NOSYS, /* 165 */
+ NOSYS, /* 166 */
+ NOSYS, /* 167 */
+ NOSYS, /* 168 */
+ NOSYS, /* 169 */
+ NOSYS, /* 170 */
+ NOSYS, /* 171 */
+ NOSYS, /* 172 */
+ NOSYS, /* 173 */
+ NOSYS, /* 174 */
+ NOSYS, /* 175 */
+ NOSYS, /* 176 */
+ NOSYS, /* 177 */
+ NOSYS, /* 178 */
+ NOSYS, /* 179 */
+ NOSYS, /* 180 */
+ NOSYS, /* 181 */
+ NOSYS, /* 182 */
+ NOSYS, /* 183 */
+ NOSYS, /* 184 */
+ NOSYS, /* 185 */
+ NOSYS, /* 186 */
+ NOSYS, /* 187 */
+ NOSYS, /* 188 */
+ NOSYS, /* 189 */
+ NOSYS, /* 190 */
+ NOSYS, /* 191 */
+ NOSYS, /* 192 */
+ NOSYS, /* 193 */
+ NOSYS, /* 194 */
+ NOSYS, /* 195 */
+ NOSYS, /* 196 */
+ NOSYS, /* 197 */
+ NOSYS, /* 198 */
+ NOSYS, /* 199 */
+ NOSYS, /* 200 */
+ NOSYS, /* 201 */
+ NOSYS, /* 202 */
+ NOSYS, /* 203 */
+ NOSYS, /* 204 */
+ NOSYS, /* 205 */
+ NOSYS, /* 206 */
+ NOSYS, /* 207 */
+ NOSYS, /* 208 */
+ NOSYS, /* 209 */
+ NOSYS, /* 210 */
+ NOSYS, /* 211 */
+ NOSYS, /* 212 */
+ NOSYS, /* 213 */
+ NOSYS, /* 214 */
+ NOSYS, /* 215 */
+ NOSYS, /* 216 */
+ NOSYS, /* 217 */
+ NOSYS, /* 218 */
+ NOSYS, /* 219 */
+ NOSYS, /* 220 */
+ NOSYS, /* 221 */
+ NOSYS, /* 222 */
+ NOSYS, /* 223 */
+ NOSYS, /* 224 */
+ NOSYS, /* 225 */
+ NOSYS, /* 226 */
+ NOSYS, /* 227 */
+ NOSYS, /* 228 */
+ NOSYS, /* 229 */
+ NOSYS, /* 230 */
+ NOSYS, /* 231 */
+ NOSYS, /* 232 */
+ NOSYS, /* 233 */
+ NOSYS, /* 234 */
+ NOSYS, /* 235 */
+ NOSYS, /* 236 */
+ NOSYS, /* 237 */
+ NOSYS, /* 238 */
+ NOSYS, /* 239 */
+ NOSYS, /* 240 */
+ NOSYS, /* 241 */
+ NOSYS, /* 242 */
+ NOSYS, /* 243 */
+ NOSYS, /* 244 */
+ NOSYS, /* 245 */
+ NOSYS, /* 246 */
+ NOSYS, /* 247 */
+ NOSYS, /* 248 */
+ NOSYS, /* 249 */
+ NOSYS, /* 250 */
+ NOSYS, /* 251 */
+ NOSYS, /* 252 */
+ NOSYS, /* 253 */
+ NOSYS, /* 254 */
+ NOSYS /* 255 */
+};
diff --git a/usr/src/lib/brand/sngl/sngl_brand/i386/Makefile b/usr/src/lib/brand/sngl/sngl_brand/i386/Makefile
new file mode 100644
index 0000000000..a0c4927252
--- /dev/null
+++ b/usr/src/lib/brand/sngl/sngl_brand/i386/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+ISAOBJDIR = $(BRAND_SHARED)/brand/i386/pics
+
+include ../Makefile.com
+
+CPPFLAGS += -I$(BRAND_SHARED)/brand/i386
+
+#
+# see ../Makefile.com for why we explicity make ld.so.1 our interpreter
+#
+DYNFLAGS += -Wl,-I/lib/ld.so.1
+
+# Note that we also set the runtime path for the emulation library to point
+# into /system. This is an attempt to ensure that our brand library get's the
+# native version of libmapmallc.
+#
+DYNFLAGS += -R/lib -R/system/usr/lib
+
+CLEANFILES += $(DYNLIB)
+CLOBBERFILES += $(ROOTLIBS)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/sngl/zone/Makefile b/usr/src/lib/brand/sngl/zone/Makefile
new file mode 100644
index 0000000000..42785deab3
--- /dev/null
+++ b/usr/src/lib/brand/sngl/zone/Makefile
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2012 Joyent, Inc. All rights reserved.
+#
+
+PROGS = sinstall
+XMLDOCS= config.xml platform.xml
+CLOBBERFILES= $(ROOTPROGS) $(ROOTXMLDOCS)
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.sngl
+
+.KEEP_STATE:
+
+all: $(PROGS)
+
+POFILES =
+POFILE =
+
+install: $(PROGS) $(ROOTPROGS) $(ROOTXMLDOCS)
+ crle -c $(ROOTBRANDDIR)/ld.sys.config \
+ -l /lib:/system/usr/lib \
+ -s /lib/secure:/system/usr/lib/secure
+ crle -64 -c $(ROOTBRANDDIR)/ld.sys64.config \
+ -l /lib/64:/system/usr/lib/64 \
+ -s /lib/secure/64:/system/usr/lib/secure/64
+
+lint:
+
+clean:
+ -$(RM) $(PROGS)
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/lib/brand/sngl/zone/config.xml b/usr/src/lib/brand/sngl/zone/config.xml
new file mode 100644
index 0000000000..421400e82a
--- /dev/null
+++ b/usr/src/lib/brand/sngl/zone/config.xml
@@ -0,0 +1,112 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright 2012 Joyent, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE brand PUBLIC "-//Joyent Inc//DTD Brands//EN"
+ "file:///usr/share/lib/xml/dtd/brand.dtd.1">
+
+<brand name="sngl">
+ <modname>sngl_brand</modname>
+
+ <initname>/system/sbin/init</initname>
+ <login_cmd>/system/usr/bin/login -z %Z %u</login_cmd>
+ <forcedlogin_cmd>/system/usr/bin/login -z %Z -f %u</forcedlogin_cmd>
+ <user_cmd>/system/usr/bin/getent passwd %u</user_cmd>
+
+ <install>/usr/lib/brand/sngl/sinstall -z %z -R %R</install>
+ <installopts>R:t:U:q:z:</installopts>
+ <boot></boot>
+ <halt></halt>
+ <verify_cfg></verify_cfg>
+ <verify_adm></verify_adm>
+ <postclone></postclone>
+ <postinstall></postinstall>
+ <attach>/bin/true</attach>
+ <detach>/bin/true</detach>
+ <clone></clone>
+ <uninstall>/usr/lib/brand/joyent/juninstall -z %z -R %R</uninstall>
+ <prestatechange>/usr/lib/brand/joyent/prestate %z %R</prestatechange>
+ <poststatechange>/usr/lib/brand/joyent/poststate %z %R</poststatechange>
+ <query>/usr/lib/brand/joyent/query %z %R</query>
+
+ <privilege set="default" name="contract_event" />
+ <privilege set="default" name="contract_identity" />
+ <privilege set="default" name="contract_observer" />
+ <privilege set="default" name="dtrace_proc" />
+ <privilege set="default" name="dtrace_user" />
+ <privilege set="default" name="file_chown" />
+ <privilege set="default" name="file_chown_self" />
+ <privilege set="default" name="file_dac_execute" />
+ <privilege set="default" name="file_dac_read" />
+ <privilege set="default" name="file_dac_search" />
+ <privilege set="default" name="file_dac_write" />
+ <privilege set="default" name="file_owner" />
+ <privilege set="default" name="file_setid" />
+ <privilege set="default" name="ipc_dac_read" />
+ <privilege set="default" name="ipc_dac_write" />
+ <privilege set="default" name="ipc_owner" />
+ <privilege set="default" name="net_bindmlp" />
+ <privilege set="default" name="net_icmpaccess" />
+ <privilege set="default" name="net_mac_aware" />
+ <privilege set="default" name="net_observability" />
+ <privilege set="default" name="net_privaddr" />
+ <privilege set="default" name="net_rawaccess" ip-type="exclusive" />
+ <privilege set="default" name="proc_chroot" />
+ <privilege set="default" name="sys_audit" />
+ <privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
+ <privilege set="default" name="proc_owner" />
+ <privilege set="default" name="proc_prioup" />
+ <privilege set="default" name="proc_setid" />
+ <privilege set="default" name="proc_taskid" />
+ <privilege set="default" name="sys_acct" />
+ <privilege set="default" name="sys_admin" />
+ <privilege set="default" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_iptun_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_mount" />
+ <privilege set="default" name="sys_nfs" />
+ <privilege set="default" name="sys_resource" />
+ <privilege set="default" name="sys_ppp_config" ip-type="exclusive" />
+
+ <privilege set="prohibited" name="dtrace_kernel" />
+ <privilege set="prohibited" name="proc_zone" />
+ <privilege set="prohibited" name="sys_config" />
+ <privilege set="prohibited" name="sys_devices" />
+ <privilege set="prohibited" name="sys_ip_config" ip-type="shared" />
+ <privilege set="prohibited" name="sys_linkdir" />
+ <privilege set="prohibited" name="sys_net_config" />
+ <privilege set="prohibited" name="sys_res_config" />
+ <privilege set="prohibited" name="sys_suser_compat" />
+ <privilege set="prohibited" name="xvm_control" />
+ <privilege set="prohibited" name="virt_manage" />
+ <privilege set="prohibited" name="sys_ppp_config" ip-type="shared" />
+
+ <privilege set="required" name="proc_exec" />
+ <privilege set="required" name="proc_fork" />
+ <privilege set="required" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="required" name="sys_mount" />
+</brand>
diff --git a/usr/src/lib/brand/sngl/zone/platform.xml b/usr/src/lib/brand/sngl/zone/platform.xml
new file mode 100644
index 0000000000..94ba5ca59c
--- /dev/null
+++ b/usr/src/lib/brand/sngl/zone/platform.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE platform PUBLIC "-//Joyent Inc//Zones Platform//EN"
+ "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1">
+
+<platform name="sngl" allow-exclusive-ip="true">
+
+ <!-- Global filesystems to mount when booting the zone -->
+ <global_mount special="/dev" directory="/dev" type="dev"
+ opt="attrdir=%R/root/dev"/>
+
+ <global_mount special="/lib" directory="/lib"
+ opt="ro,nodevices" type="lofs" />
+ <global_mount special="%P/manifests/joyent"
+ directory="/lib/svc/manifest"
+ opt="ro,nodevices" type="lofs" />
+ <global_mount special="%R/site" directory="/lib/svc/manifest/site"
+ opt="nodevices" type="lofs" />
+ <global_mount special="/sbin" directory="/sbin"
+ opt="ro,nodevices" type="lofs" />
+ <global_mount special="/lib" directory="/system/lib"
+ opt="ro,nodevices" type="lofs" />
+ <global_mount special="/sbin" directory="/system/sbin"
+ opt="ro,nodevices" type="lofs" />
+ <global_mount special="/usr" directory="/system/usr"
+ opt="ro,nodevices" type="lofs" />
+
+ <!-- Local filesystems to mount when booting the zone -->
+ <mount special="/proc" directory="/proc" type="proc" />
+ <mount special="ctfs" directory="/system/contract" type="ctfs" />
+ <mount special="mnttab" directory="/etc/mnttab" type="mntfs" />
+ <mount special="objfs" directory="/system/object" type="objfs" />
+ <mount special="lxproc" directory="/system/lxproc" type="lxproc" />
+ <mount special="swap" directory="/etc/svc/volatile" type="tmpfs" />
+
+ <!-- Devices to create under /dev -->
+ <device match="arp" />
+ <device match="bpf" />
+ <device match="conslog" />
+ <device match="cpu/self/cpuid" />
+ <device match="crypto" />
+ <device match="cryptoadm" />
+ <device match="dsk" />
+ <device match="dtrace/*" />
+ <device match="dtrace/provider/*" />
+ <device match="fd" />
+ <device match="ipnet" />
+ <device match="kstat" />
+ <device match="lo0" />
+ <device match="lofictl" />
+ <device match="lofi" />
+ <device match="log" />
+ <device match="logindmux" />
+ <device match="nsmb" />
+ <device match="net/*" />
+ <device match="null" />
+ <device match="openprom" arch="sparc" />
+ <device match="poll" />
+ <device match="pool" />
+ <device match="ptmx" />
+ <device match="pts/*" />
+ <device match="random" />
+ <device match="rdsk" />
+ <device match="rlofi" />
+ <device match="rmt" />
+ <device match="sad/user" />
+ <device match="svvslo0" />
+ <device match="svvslo1" />
+ <device match="svvslo2" />
+ <device match="svvslo3" />
+ <device match="swap" />
+ <device match="sysevent" />
+ <device match="tap" />
+ <device match="tcp" />
+ <device match="tcp6" />
+ <device match="term" />
+ <device match="ticlts" />
+ <device match="ticots" />
+ <device match="ticotsord" />
+ <device match="tty" />
+ <device match="tun" />
+ <device match="udp" />
+ <device match="udp6" />
+ <device match="urandom" />
+ <device match="vnd/*" />
+ <device match="zero" />
+ <device match="zfs" />
+ <device match="zvol/dsk/%P/%z/*" />
+ <device match="zvol/rdsk/%P/%z/*" />
+
+ <!-- Devices to create in exclusive IP zone only -->
+ <device match="dld" ip-type="exclusive" />
+ <device match="icmp" ip-type="exclusive" />
+ <device match="icmp6" ip-type="exclusive" />
+ <device match="ip" ip-type="exclusive" />
+ <device match="ip6" ip-type="exclusive" />
+ <device match="ipauth" ip-type="exclusive" />
+ <device match="ipd" ip-type="exclusive" />
+ <device match="ipf" ip-type="exclusive" />
+ <device match="ipl" ip-type="exclusive" />
+ <device match="iplookup" ip-type="exclusive" />
+ <device match="ipmpstub" ip-type="exclusive" />
+ <device match="ipnat" ip-type="exclusive" />
+ <device match="ipscan" ip-type="exclusive" />
+ <device match="ipsecah" ip-type="exclusive" />
+ <device match="ipsecesp" ip-type="exclusive" />
+ <device match="ipstate" ip-type="exclusive" />
+ <device match="ipsync" ip-type="exclusive" />
+ <device match="keysock" ip-type="exclusive" />
+ <device match="rawip" ip-type="exclusive" />
+ <device match="rawip6" ip-type="exclusive" />
+ <device match="rts" ip-type="exclusive" />
+ <device match="sad/admin" ip-type="exclusive" />
+ <device match="sctp" ip-type="exclusive" />
+ <device match="sctp6" ip-type="exclusive" />
+ <device match="spdsock" ip-type="exclusive" />
+ <device match="sppp" ip-type="exclusive" />
+ <device match="sppptun" ip-type="exclusive" />
+ <device match="vni" ip-type="exclusive" />
+
+ <!-- Renamed devices to create under /dev -->
+ <device match="zcons/%z/zoneconsole" name="zconsole" />
+
+ <!-- Symlinks to create under /dev -->
+ <symlink source="console" target="zconsole" />
+ <symlink source="dtremote" target="/dev/null" />
+ <symlink source="msglog" target="zconsole" />
+ <symlink source="stderr" target="./fd/2" />
+ <symlink source="stdin" target="./fd/0" />
+ <symlink source="stdout" target="./fd/1" />
+ <symlink source="syscon" target="zconsole" />
+ <symlink source="sysmsg" target="zconsole" />
+ <symlink source="systty" target="zconsole" />
+
+</platform>
diff --git a/usr/src/lib/brand/sngl/zone/sinstall.sh b/usr/src/lib/brand/sngl/zone/sinstall.sh
new file mode 100644
index 0000000000..4928c36304
--- /dev/null
+++ b/usr/src/lib/brand/sngl/zone/sinstall.sh
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+# Does this brand support reprovisioning?
+# jst_reprovision="yes"
+
+# Is a template image optional?
+# jst_tmplopt="yes"
+
+. /usr/lib/brand/jcommon/libhooks.ksh
+
+function jcommon_attach_hook
+{
+ jattach_zone_final_setup
+}
+
+. /usr/lib/brand/jcommon/cinstall
diff --git a/usr/src/lib/common/i386/crtn.s b/usr/src/lib/common/i386/crtn.s
index 6e37e25a8f..413c102006 100644
--- a/usr/src/lib/common/i386/crtn.s
+++ b/usr/src/lib/common/i386/crtn.s
@@ -33,7 +33,6 @@
*
* For further details - see bug#4433015
*/
- .ident "%Z%%M% %I% %E% SMI"
.file "crtn.s"
/*
diff --git a/usr/src/lib/fm/topo/libtopo/common/libtopo.h b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
index 3ea35cdddd..e0adb6e0ab 100644
--- a/usr/src/lib/fm/topo/libtopo/common/libtopo.h
+++ b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#ifndef _LIBTOPO_H
#define _LIBTOPO_H
@@ -389,6 +392,8 @@ extern void topo_hdl_free(topo_hdl_t *, void *, size_t);
extern int topo_hdl_nvalloc(topo_hdl_t *, nvlist_t **, uint_t);
extern int topo_hdl_nvdup(topo_hdl_t *, nvlist_t *, nvlist_t **);
extern char *topo_hdl_strdup(topo_hdl_t *, const char *);
+extern char *topo_hdl_strsplit(topo_hdl_t *, const char *, const char *,
+ char **);
/*
* Interfaces for converting sensor/indicator types, units, states, etc to
diff --git a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
index b81f4fd7c6..c6ff800951 100644
--- a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
+++ b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
@@ -76,6 +76,7 @@ SYMBOL_VERSION SUNWprivate {
topo_hdl_prominfo;
topo_hdl_strdup;
topo_hdl_strfree;
+ topo_hdl_strsplit;
topo_hdl_zalloc;
topo_led_state_name;
topo_led_type_name;
@@ -118,6 +119,7 @@ SYMBOL_VERSION SUNWprivate {
topo_mod_str2nvl;
topo_mod_strdup;
topo_mod_strfree;
+ topo_mod_strsplit;
topo_mod_unload;
topo_mod_unregister;
topo_mod_walk_init;
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
index 2a137ad388..e6dda440a0 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#ifndef _TOPO_MOD_H
#define _TOPO_MOD_H
@@ -215,6 +218,8 @@ extern void *topo_mod_zalloc(topo_mod_t *, size_t);
extern void topo_mod_free(topo_mod_t *, void *, size_t);
extern char *topo_mod_strdup(topo_mod_t *, const char *);
extern void topo_mod_strfree(topo_mod_t *, char *);
+extern char *topo_mod_strsplit(topo_mod_t *, const char *, const char *,
+ char **);
extern int topo_mod_nvalloc(topo_mod_t *, nvlist_t **, uint_t);
extern int topo_mod_nvdup(topo_mod_t *, nvlist_t *, nvlist_t **);
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_string.c b/usr/src/lib/fm/topo/libtopo/common/topo_string.c
index 2c1f451770..2f59013e9e 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_string.c
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_string.c
@@ -23,8 +23,9 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include <strings.h>
#include <ctype.h>
@@ -56,6 +57,58 @@ topo_hdl_strfree(topo_hdl_t *thp, char *s)
}
char *
+topo_hdl_strsplit(topo_hdl_t *hdl, const char *input, const char *sep,
+ char **lastp)
+{
+ size_t seplen = strlen(sep);
+ const char *scanstart;
+ char *token;
+ char *ret;
+
+ if (input != NULL) {
+ /*
+ * Start scanning at beginning of input:
+ */
+ scanstart = input;
+ } else if (*lastp == NULL) {
+ /*
+ * If we have already finished scanning, return NULL.
+ */
+ return (NULL);
+ } else {
+ /*
+ * Otherwise, start scanning where we left off:
+ */
+ scanstart = *lastp;
+ }
+
+ token = strstr(scanstart, sep);
+ if (token != NULL) {
+ /*
+ * We still have a separator, so advance the next-start
+ * pointer past it:
+ */
+ *lastp = token + seplen;
+ /*
+ * Copy out this element. The buffer must fit the string
+ * exactly, so that topo_hdl_strfree() can determine its
+ * size with strlen().
+ */
+ ret = topo_hdl_alloc(hdl, token - scanstart + 1);
+ (void) strncpy(ret, scanstart, token - scanstart);
+ ret[token - scanstart] = '\0';
+ } else {
+ /*
+ * We have no separator, so this is the last element:
+ */
+ *lastp = NULL;
+ ret = topo_hdl_strdup(hdl, scanstart);
+ }
+
+ return (ret);
+}
+
+char *
topo_mod_strdup(topo_mod_t *mod, const char *s)
{
return (topo_hdl_strdup(mod->tm_hdl, s));
@@ -67,6 +120,13 @@ topo_mod_strfree(topo_mod_t *mod, char *s)
topo_hdl_strfree(mod->tm_hdl, s);
}
+char *
+topo_mod_strsplit(topo_mod_t *mod, const char *input, const char *sep,
+ char **lastp)
+{
+ return (topo_hdl_strsplit(mod->tm_hdl, input, sep, lastp));
+}
+
const char *
topo_strbasename(const char *s)
{
diff --git a/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh b/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
index 000742db71..874bad142f 100644..100755
--- a/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
+++ b/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
@@ -69,17 +69,19 @@ EOF
enclosure=1
bay=0
slot=0
-devctl='/devices/pci@0,0/pci8086,3c02@1/pci15d9,691@0:devctl'
+devctl0='/devices/pci@0,0/pci8086,3c02@1/pci15d9,691@0:devctl'
+devctl1='/devices/pci@0,0/pci8086,e02@1/pci15d9,691@0:devctl'
while (( slot <= 7 )); do
- do_node $bay "Front Disk $bay" "$devctl" $enclosure $slot
+ do_node $bay "Front Disk $bay" "$devctl0|$devctl1" $enclosure $slot
(( bay = bay + 1 ))
(( slot = slot + 1 ))
done
slot=0
-devctl='/devices/pci@0,0/pci8086,3c06@2,2/pci15d9,691@0:devctl'
+devctl0='/devices/pci@0,0/pci8086,3c06@2,2/pci15d9,691@0:devctl'
+devctl1='/devices/pci@0,0/pci8086,e06@2,2/pci15d9,691@0:devctl'
while (( slot <= 7 )); do
- do_node $bay "Front Disk $bay" "$devctl" $enclosure $slot
+ do_node $bay "Front Disk $bay" "$devctl0|$devctl1" $enclosure $slot
(( bay = bay + 1 ))
(( slot = slot + 1 ))
done
diff --git a/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml b/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
index fda43bca6c..7d8f85fd88 100644
--- a/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
+++ b/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
@@ -2,7 +2,7 @@
<!DOCTYPE topology SYSTEM "/usr/share/lib/xml/dtd/topology.dtd.1">
<!--
Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2013, Joyent, Inc. All rights reserved.
+Copyright (c) 2014, Joyent, Inc. All rights reserved.
CDDL HEADER START
@@ -152,7 +152,7 @@ Copyright (c) 2013, Joyent, Inc. All rights reserved.
</range>
</set>
- <set type='product' setlist='Joyent-Compute-Platform-1101|Joyent-Compute-Platform-1102'>
+ <set type='product' setlist='Joyent-Compute-Platform-1101|Joyent-Compute-Platform-1102|Joyent-Compute-Platform-2101|Joyent-Compute-Platform-2102'>
<range name='psu' min='0' max='1'>
<enum-method name='ipmi' version='1' />
</range>
diff --git a/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c b/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
index db853c6695..beae89f9f1 100644
--- a/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
+++ b/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
@@ -32,11 +32,18 @@
#include "disk.h"
#include "disk_drivers.h"
+/*
+ * Request the SAS address of the disk (if any) attached to this mpt_sas
+ * instance at (Enclosure Number, Slot Number). The function returns
+ * -1 on error and sets errno to ENOENT _only_ if the /devices node
+ * (*devctl) does not exist.
+ */
static int
get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
uint32_t slot, char **sas_address)
{
- int fd, err, i;
+ int ret = -1, en = ENXIO;
+ int fd, i;
mptsas_get_disk_info_t gdi;
mptsas_disk_info_t *di;
size_t disz;
@@ -44,16 +51,19 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
bzero(&gdi, sizeof (gdi));
if ((fd = open(devctl, O_RDWR)) == -1) {
+ en = errno;
topo_mod_dprintf(mod, "could not open '%s' for ioctl: %s\n",
devctl, strerror(errno));
+ errno = en;
return (-1);
}
if (ioctl(fd, MPTIOCTL_GET_DISK_INFO, &gdi) == -1) {
+ if (errno != ENOENT)
+ en = errno;
topo_mod_dprintf(mod, "ioctl 1 on '%s' failed: %s\n", devctl,
strerror(errno));
- (void) close(fd);
- return (-1);
+ goto out;
}
gdi.DiskInfoArraySize = disz = sizeof (mptsas_disk_info_t) *
@@ -61,19 +71,19 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
gdi.PtrDiskInfoArray = di = topo_mod_alloc(mod, disz);
if (di == NULL) {
topo_mod_dprintf(mod, "memory allocation failed\n");
- (void) close(fd);
- return (-1);
+ en = ENOMEM;
+ goto out;
}
if (ioctl(fd, MPTIOCTL_GET_DISK_INFO, &gdi) == -1) {
+ if (errno != ENOENT)
+ en = errno;
topo_mod_dprintf(mod, "ioctl 2 on '%s' failed: %s\n", devctl,
strerror(errno));
topo_mod_free(mod, di, disz);
- (void) close(fd);
- return (-1);
+ goto out;
}
- err = -1;
for (i = 0; i < gdi.DiskCount; i++) {
if (di[i].Enclosure == enclosure && di[i].Slot == slot) {
char sas[17]; /* 16 hex digits and NUL */
@@ -81,14 +91,16 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
topo_mod_dprintf(mod, "found mpt_sas disk (%d/%d) "
"with adddress %s\n", enclosure, slot, sas);
*sas_address = topo_mod_strdup(mod, sas);
- err = 0;
+ en = ret = 0;
break;
}
}
topo_mod_free(mod, di, disz);
+out:
(void) close(fd);
- return (err);
+ errno = en;
+ return (ret);
}
int
@@ -97,6 +109,8 @@ disk_mptsas_find_disk(topo_mod_t *mod, tnode_t *baynode, char **sas_address)
char *devctl = NULL;
uint32_t enclosure, slot;
int err;
+ char *elem, *lastp;
+ int ret = -1;
/*
* Get the required properties from the node. These come from
@@ -115,6 +129,35 @@ disk_mptsas_find_disk(topo_mod_t *mod, tnode_t *baynode, char **sas_address)
return (-1);
}
- return (get_sas_address(mod, devctl, enclosure, slot, sas_address));
+ /*
+ * devctl is a (potentially) pipe-separated list of different device
+ * paths to try.
+ */
+ if ((elem = topo_mod_strsplit(mod, devctl, "|", &lastp)) != NULL) {
+ boolean_t done = B_FALSE;
+ do {
+ topo_mod_dprintf(mod, "trying mpt_sas instance at %s\n",
+ elem);
+
+ ret = get_sas_address(mod, elem, enclosure,
+ slot, sas_address);
+
+ /*
+ * Only try further devctl paths from the list if this
+ * one was not found:
+ */
+ if (ret == 0 || errno != ENOENT) {
+ done = B_TRUE;
+ } else {
+ topo_mod_dprintf(mod, "instance not found\n");
+ }
+
+ topo_mod_strfree(mod, elem);
+
+ } while (!done && (elem = topo_mod_strsplit(mod, NULL, "|",
+ &lastp)) != NULL);
+ }
+ topo_mod_strfree(mod, devctl);
+ return (ret);
}
diff --git a/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c b/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
index a49a131811..115e3b801d 100644
--- a/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
+++ b/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
@@ -72,6 +72,12 @@ _topo_fini(topo_mod_t *mod)
topo_mod_unregister(mod);
}
+/*
+ * Get or set LED state for a particular target attached to an mpt_sas
+ * instance at (Enclosure Number, Slot Number). The function returns
+ * -1 on error and sets errno to ENOENT _only_ if the /devices node
+ * (*devctl) does not exist.
+ */
static int
do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
uint16_t slot, uint8_t led, uint32_t *ledmode, boolean_t set)
@@ -88,8 +94,10 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
lc.LedStatus = *ledmode;
if ((fd = open(devctl, (set ? O_RDWR : O_RDONLY))) == -1) {
+ int en = errno;
topo_mod_dprintf(mod, "devctl open failed: %s",
strerror(errno));
+ errno = en;
return (-1);
}
@@ -103,9 +111,11 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
*/
lc.LedStatus = 0;
} else {
+ int en = errno;
topo_mod_dprintf(mod, "led control ioctl failed: %s",
strerror(errno));
(void) close(fd);
+ errno = en;
return (-1);
}
}
@@ -113,6 +123,7 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
*ledmode = lc.LedStatus ? TOPO_LED_STATE_ON : TOPO_LED_STATE_OFF;
(void) close(fd);
+ errno = 0;
return (0);
}
@@ -127,7 +138,8 @@ mptsas_led_mode(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
char *driver = NULL, *devctl = NULL;
uint32_t enclosure, slot;
uint8_t mptsas_led;
- boolean_t set;
+ boolean_t set, done;
+ char *elem, *lastp;
if (vers > TOPO_METH_MPTSAS_LED_MODE_VERSION)
return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW));
@@ -197,8 +209,41 @@ mptsas_led_mode(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
topo_mod_dprintf(mod, "%s: Getting LED mode\n", __func__);
}
- if (do_led_control(mod, devctl, enclosure, slot, mptsas_led, &ledmode,
- set) != 0) {
+ /*
+ * devctl is a (potentially) pipe-separated list of different device
+ * paths to try.
+ */
+ if ((elem = topo_mod_strsplit(mod, devctl, "|", &lastp)) == NULL) {
+ topo_mod_dprintf(mod, "%s: could not parse devctl list",
+ __func__);
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+ done = B_FALSE;
+ do {
+ topo_mod_dprintf(mod, "%s: trying mpt_sas instance at %s\n",
+ __func__, elem);
+
+ ret = do_led_control(mod, elem, enclosure, slot,
+ mptsas_led, &ledmode, set);
+
+ /*
+ * Only try further devctl paths from the list if this one
+ * was not found:
+ */
+ if (ret == 0 || errno != ENOENT) {
+ done = B_TRUE;
+ } else {
+ topo_mod_dprintf(mod, "%s: instance not found\n",
+ __func__);
+ }
+
+ topo_mod_strfree(mod, elem);
+
+ } while (!done && (elem = topo_mod_strsplit(mod, NULL, "|",
+ &lastp)) != NULL);
+
+ if (ret != 0) {
topo_mod_dprintf(mod, "%s: do_led_control failed", __func__);
ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
goto out;
diff --git a/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com b/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
index 8449d22e24..07fcb2bbf7 100644
--- a/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
+++ b/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
@@ -70,7 +70,7 @@ CERRWARN += -_gcc=-Wno-unused-function
CFLAGS += $(CCVERBOSE) -I..
DYNFLAGS += $(KRUNPATH) $(KMECHLIB) -znodelete
-LDLIBS += -L $(ROOTLIBDIR) -lcrypto -lc
+LDLIBS += -L $(ROOTLIBDIR) -lsunw_crypto -lc
ROOTLIBDIR= $(ROOT)/usr/lib/krb5/plugins/preauth
diff --git a/usr/src/lib/libbe/common/be_list.c b/usr/src/lib/libbe/common/be_list.c
index 1733f6dc43..535e35a2ca 100644
--- a/usr/src/lib/libbe/common/be_list.c
+++ b/usr/src/lib/libbe/common/be_list.c
@@ -25,6 +25,7 @@
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <assert.h>
diff --git a/usr/src/lib/libbrand/common/libbrand.c b/usr/src/lib/libbrand/common/libbrand.c
index 7d34df9e58..b865a43ca2 100644
--- a/usr/src/lib/libbrand/common/libbrand.c
+++ b/usr/src/lib/libbrand/common/libbrand.c
@@ -323,6 +323,7 @@ i_substitute_tokens(const char *sbuf, char *dbuf, int dbuf_size,
const char *curr_zone)
{
int dst, src;
+ static char *env_pool = NULL;
/*
* Walk through the characters, substituting values as needed.
@@ -339,6 +340,13 @@ i_substitute_tokens(const char *sbuf, char *dbuf, int dbuf_size,
case '%':
dst += strlcpy(dbuf + dst, "%", dbuf_size - dst);
break;
+ case 'P':
+ if (env_pool == NULL)
+ env_pool = getenv("_ZONEADMD_ZPOOL");
+ if (env_pool == NULL)
+ break;
+ dst += strlcpy(dbuf + dst, env_pool, dbuf_size - dst);
+ break;
case 'R':
if (zonepath == NULL)
break;
diff --git a/usr/src/lib/libbsm/common/adt.c b/usr/src/lib/libbsm/common/adt.c
index 8c7b299e32..4cf0dd7566 100644
--- a/usr/src/lib/libbsm/common/adt.c
+++ b/usr/src/lib/libbsm/common/adt.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
#include <bsm/adt.h>
@@ -702,7 +703,37 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
int tries = 3;
char msg[512];
int eai_err;
+ struct ifaddrlist al;
+ int family;
+ boolean_t found = B_FALSE;
+ /*
+ * getaddrinfo can take a long time to timeout if it can't map the
+ * hostname to an IP address so try to get an IP address from a local
+ * interface first.
+ */
+ family = AF_INET6;
+ if (adt_get_local_address(family, &al) == 0) {
+ found = B_TRUE;
+ } else {
+ family = AF_INET;
+ if (adt_get_local_address(family, &al) == 0)
+ found = B_TRUE;
+ }
+
+ if (found) {
+ if (family == AF_INET) {
+ p_term->at_type = AU_IPv4;
+ (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
+ } else {
+ p_term->at_type = AU_IPv6;
+ (void) memcpy(p_term->at_addr, &al.addr.addr6, AU_IPv6);
+ }
+
+ return (0);
+ }
+
+ /* Now try getaddrinfo */
while ((tries-- > 0) &&
((eai_err = getaddrinfo(hostname, NULL, NULL, &ai)) != 0)) {
/*
@@ -739,7 +770,9 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
}
freeaddrinfo(ai);
return (0);
- } else if (auditstate & (AUC_AUDITING | AUC_NOSPACE)) {
+ }
+
+ if (auditstate & (AUC_AUDITING | AUC_NOSPACE)) {
auditinfo_addr_t audit_info;
/*
@@ -747,58 +780,23 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
* kernel audit context
*/
if (auditon(A_GETKAUDIT, (caddr_t)&audit_info,
- sizeof (audit_info)) < 0) {
- adt_write_syslog("unable to get kernel audit context",
- errno);
- goto try_interface;
+ sizeof (audit_info)) >= 0) {
+ adt_write_syslog("setting Audit IP address to kernel",
+ 0);
+ *p_term = audit_info.ai_termid;
+ return (0);
}
- adt_write_syslog("setting Audit IP address to kernel", 0);
- *p_term = audit_info.ai_termid;
- return (0);
+ adt_write_syslog("unable to get kernel audit context", errno);
}
-try_interface:
- {
- struct ifaddrlist al;
- int family;
- char ntop[INET6_ADDRSTRLEN];
-
- /*
- * getaddrinfo has failed to map the hostname
- * to an IP address, try to get an IP address
- * from a local interface. If none up, default
- * to loopback.
- */
- family = AF_INET6;
- if (adt_get_local_address(family, &al) != 0) {
- family = AF_INET;
-
- if (adt_get_local_address(family, &al) != 0) {
- adt_write_syslog("adt_get_local_address "
- "failed, no Audit IP address available, "
- "faking loopback and error",
- errno);
- IN_SET_LOOPBACK_ADDR(
- (struct sockaddr_in *)&(al.addr.addr));
- (void) memcpy(p_term->at_addr, &al.addr.addr,
- AU_IPv4);
- p_term->at_type = AU_IPv4;
- return (-1);
- }
- }
- if (family == AF_INET) {
- p_term->at_type = AU_IPv4;
- (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
- } else {
- p_term->at_type = AU_IPv6;
- (void) memcpy(p_term->at_addr, &al.addr.addr6, AU_IPv6);
- }
- (void) snprintf(msg, sizeof (msg), "mapping %s to %s",
- hostname, inet_ntop(family, &(al.addr), ntop,
- sizeof (ntop)));
- adt_write_syslog(msg, 0);
- return (0);
- }
+ /* No mapping, default to loopback. */
+ errno = ENETDOWN;
+ adt_write_syslog("adt_get_local_address failed, no Audit IP address "
+ "available, faking loopback and error", errno);
+ IN_SET_LOOPBACK_ADDR((struct sockaddr_in *)&(al.addr.addr));
+ (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
+ p_term->at_type = AU_IPv4;
+ return (-1);
}
/*
@@ -2093,8 +2091,8 @@ adt_selected(struct adt_event_state *event, au_event_t actual_id, int status)
}
/*
- * Can't map the host name to an IP address in
- * adt_get_hostIP. Get something off an interface
+ * Before trying to map the host name to an IP address in
+ * adt_get_hostIP, get something off an interface
* to act as the hosts IP address for auditing.
*/
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index 5b378065c5..9a496b0d7e 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -20,7 +20,7 @@
#
#
# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
@@ -853,6 +853,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ epoll.o \
exacctsys.o \
execl.o \
execle.o \
@@ -860,6 +861,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index 6f6ebf0ed1..01f71c334c 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -20,7 +20,7 @@
#
#
# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
# Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
#
@@ -891,6 +891,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ epoll.o \
exacctsys.o \
execl.o \
execle.o \
@@ -898,6 +899,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
diff --git a/usr/src/lib/libc/inc/thr_inlines.h b/usr/src/lib/libc/inc/thr_inlines.h
index f7cdc6a6bd..66d811f25b 100644
--- a/usr/src/lib/libc/inc/thr_inlines.h
+++ b/usr/src/lib/libc/inc/thr_inlines.h
@@ -47,17 +47,19 @@
extern __GNU_INLINE ulwp_t *
_curthread(void)
{
-#if defined(__amd64)
ulwp_t *__value;
- __asm__ __volatile__("movq %%fs:0, %0" : "=r" (__value));
+ __asm__ __volatile__(
+#if defined(__amd64)
+ "movq %%fs:0, %0\n\t"
#elif defined(__i386)
- ulwp_t *__value;
- __asm__ __volatile__("movl %%gs:0, %0" : "=r" (__value));
+ "movl %%gs:0, %0\n\t"
#elif defined(__sparc)
- register ulwp_t *__value __asm__("g7");
+ ".register %%g7, #scratch\n\t"
+ "mov %%g7, %0\n\t"
#else
#error "port me"
#endif
+ : "=r" (__value));
return (__value);
}
diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h
index de0d4a6b05..a58eaf915f 100644
--- a/usr/src/lib/libc/inc/thr_uberdata.h
+++ b/usr/src/lib/libc/inc/thr_uberdata.h
@@ -932,6 +932,7 @@ typedef struct uberdata {
int ndaemons; /* total number of THR_DAEMON threads/lwps */
pid_t pid; /* the current process's pid */
void (*sigacthandler)(int, siginfo_t *, void *);
+ int (*setctxt)(const ucontext_t *);
ulwp_t *lwp_stacks;
ulwp_t *lwp_laststack;
int nfreestack;
@@ -944,6 +945,7 @@ typedef struct uberdata {
robust_t **robustlocks; /* table of registered robust locks */
robust_t *robustlist; /* list of registered robust locks */
char *progname; /* the basename of the program, from argv[0] */
+ char *ub_broot; /* the root of the native code in the brand */
struct uberdata **tdb_bootstrap;
tdb_t tdb; /* thread debug interfaces (for libc_db) */
} uberdata_t;
@@ -1144,6 +1146,7 @@ typedef struct uberdata32 {
int ndaemons;
int pid;
caddr32_t sigacthandler;
+ caddr32_t setctxt;
caddr32_t lwp_stacks;
caddr32_t lwp_laststack;
int nfreestack;
@@ -1243,6 +1246,7 @@ extern void rwl_free(ulwp_t *);
extern void heldlock_exit(void);
extern void heldlock_free(ulwp_t *);
extern void sigacthandler(int, siginfo_t *, void *);
+extern int setctxt(const ucontext_t *);
extern void signal_init(void);
extern int sigequalset(const sigset_t *, const sigset_t *);
extern void mutex_setup(void);
diff --git a/usr/src/lib/libc/port/gen/getauxv.c b/usr/src/lib/libc/port/gen/getauxv.c
index 500675719c..4356a01392 100644
--- a/usr/src/lib/libc/port/gen/getauxv.c
+++ b/usr/src/lib/libc/port/gen/getauxv.c
@@ -24,9 +24,8 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "lint.h"
+#include "thr_uberdata.h"
#include <libc.h>
#include <fcntl.h>
#include <stdlib.h>
@@ -38,6 +37,7 @@
#include <thread.h>
#include <synch.h>
#include <atomic.h>
+#include <limits.h>
static mutex_t auxlock = DEFAULTMUTEX;
@@ -59,11 +59,20 @@ _getaux(int type)
if (auxb == NULL) {
lmutex_lock(&auxlock);
if (auxb == NULL) {
+ uberdata_t *udp = curthread->ul_uberdata;
struct stat statb;
auxv_t *buf = NULL;
+ char *path = "/proc/self/auxv";
+ char pbuf[PATH_MAX];
int fd;
- if ((fd = open("/proc/self/auxv", O_RDONLY)) != -1 &&
+ if (udp->ub_broot != NULL) {
+ (void) snprintf(pbuf, sizeof (pbuf),
+ "%s/proc/self/auxv", udp->ub_broot);
+ path = pbuf;
+ }
+
+ if ((fd = open(path, O_RDONLY)) != -1 &&
fstat(fd, &statb) != -1)
buf = libc_malloc(
statb.st_size + sizeof (auxv_t));
diff --git a/usr/src/lib/libc/port/gen/getlogin.c b/usr/src/lib/libc/port/gen/getlogin.c
index fadf3e055b..e4b2f62dc3 100644
--- a/usr/src/lib/libc/port/gen/getlogin.c
+++ b/usr/src/lib/libc/port/gen/getlogin.c
@@ -35,6 +35,7 @@
#include "lint.h"
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
diff --git a/usr/src/lib/libc/port/gen/psiginfo.c b/usr/src/lib/libc/port/gen/psiginfo.c
index 1675db36ee..6775fef969 100644
--- a/usr/src/lib/libc/port/gen/psiginfo.c
+++ b/usr/src/lib/libc/port/gen/psiginfo.c
@@ -27,8 +27,6 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Print the name of the siginfo indicated by "sig", along with the
* supplied message
@@ -50,16 +48,23 @@ psiginfo(siginfo_t *sip, char *s)
{
char buf[256];
char *c;
+ size_t l = 0;
const struct siginfolist *listp;
- if (sip == 0)
+ if (sip == NULL)
return;
+ if (s != NULL && *s != '\0') {
+ l = snprintf(buf, sizeof (buf), _libc_gettext("%s : "), s);
+ if (l > sizeof (buf))
+ l = sizeof (buf);
+ }
+
if (sip->si_code <= 0) {
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s ( from process %d )\n"),
- s, strsignal(sip->si_signo), sip->si_pid);
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s ( from process %d )\n"),
+ strsignal(sip->si_signo), sip->si_pid);
} else if (((listp = &_sys_siginfolist[sip->si_signo-1]) != NULL) &&
sip->si_code <= listp->nsiginfo) {
c = _libc_gettext(listp->vsiginfo[sip->si_code-1]);
@@ -68,21 +73,20 @@ psiginfo(siginfo_t *sip, char *s)
case SIGBUS:
case SIGILL:
case SIGFPE:
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s ( [%p] %s)\n"),
- s, strsignal(sip->si_signo),
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s ( [%p] %s)\n"),
+ strsignal(sip->si_signo),
sip->si_addr, c);
break;
default:
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s (%s)\n"),
- s, strsignal(sip->si_signo), c);
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s (%s)\n"),
+ strsignal(sip->si_signo), c);
break;
}
} else {
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s\n"),
- s, strsignal(sip->si_signo));
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s\n"), strsignal(sip->si_signo));
}
(void) write(2, buf, strlen(buf));
}
diff --git a/usr/src/lib/libc/port/gen/psignal.c b/usr/src/lib/libc/port/gen/psignal.c
index 201beaacd7..2a61cd9610 100644
--- a/usr/src/lib/libc/port/gen/psignal.c
+++ b/usr/src/lib/libc/port/gen/psignal.c
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Print the name of the signal indicated by "sig", along with the
* supplied message
@@ -61,14 +59,15 @@ void
psignal(int sig, const char *s)
{
char *c;
- size_t n;
+ size_t n = 0;
char buf[256];
if (sig < 0 || sig >= NSIG)
sig = 0;
c = strsignal(sig);
- n = strlen(s);
- if (n) {
+ if (s != NULL)
+ n = strlen(s);
+ if (n != 0) {
(void) snprintf(buf, sizeof (buf), "%s: %s\n", s, c);
} else {
(void) snprintf(buf, sizeof (buf), "%s\n", c);
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index 6509cd1aad..70d37d04be 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -24,8 +24,8 @@
# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright (c) 2013 Gary Mills
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -2808,6 +2808,11 @@ $endif
_dgettext;
_doprnt;
_doscan;
+ epoll_create;
+ epoll_create1;
+ epoll_ctl;
+ epoll_wait;
+ epoll_pwait;
_errfp;
_errxfp;
exportfs;
@@ -2853,6 +2858,10 @@ $endif
__idmap_unreg;
__init_daemon_priv;
__init_suid_priv;
+ inotify_init;
+ inotify_init1;
+ inotify_add_watch;
+ inotify_rm_watch;
_insert;
inst_sync;
_iswctype;
@@ -3046,6 +3055,7 @@ $endif
zone_list;
zone_list_datalink;
zonept;
+ zone_get_nroot;
zone_remove_datalink;
zone_setattr;
zone_shutdown;
diff --git a/usr/src/lib/libc/port/sys/epoll.c b/usr/src/lib/libc/port/sys/epoll.c
new file mode 100644
index 0000000000..93379b583e
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/epoll.c
@@ -0,0 +1,207 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/devpoll.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+
+/*
+ * Events that match their epoll(7) equivalents.
+ */
+#if EPOLLIN != POLLIN
+#error value of EPOLLIN does not match value of POLLIN
+#endif
+
+#if EPOLLPRI != POLLPRI
+#error value of EPOLLPRI does not match value of POLLPRI
+#endif
+
+#if EPOLLOUT != POLLOUT
+#error value of EPOLLOUT does not match value of POLLOUT
+#endif
+
+#if EPOLLRDNORM != POLLRDNORM
+#error value of EPOLLRDNORM does not match value of POLLRDNORM
+#endif
+
+#if EPOLLRDBAND != POLLRDBAND
+#error value of EPOLLRDBAND does not match value of POLLRDBAND
+#endif
+
+#if EPOLLERR != POLLERR
+#error value of EPOLLERR does not match value of POLLERR
+#endif
+
+#if EPOLLHUP != POLLHUP
+#error value of EPOLLHUP does not match value of POLLHUP
+#endif
+
+/*
+ * Events that we ignore entirely. They can be set in events, but they will
+ * never be returned.
+ */
+#define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP)
+
+/*
+ * Events that we swizzle into other bit positions.
+ */
+#define EPOLLSWIZZLED \
+ (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM)
+
+int
+epoll_create(int size)
+{
+ int fd;
+
+ /*
+ * From the epoll_create() man page: "Since Linux 2.6.8, the size
+ * argument is ignored, but must be greater than zero." You keep using
+ * that word "ignored"...
+ */
+ if (size <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if ((fd = open("/dev/poll", O_RDWR)) == -1)
+ return (-1);
+
+ if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+int
+epoll_create1(int flags)
+{
+ int fd, oflags = O_RDWR;
+
+ if (flags & EPOLL_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ if ((fd = open("/dev/poll", oflags)) == -1)
+ return (-1);
+
+ if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+int
+epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
+{
+ dvpoll_epollfd_t epoll[2];
+ uint32_t events, ev = 0;
+ int i = 0;
+
+ epoll[i].dpep_pollfd.fd = fd;
+
+ switch (op) {
+ case EPOLL_CTL_DEL:
+ ev = POLLREMOVE;
+ break;
+
+ case EPOLL_CTL_MOD:
+ /*
+ * In the modify case, we pass down two events: one to
+ * remove the event and another to add it back.
+ */
+ epoll[i++].dpep_pollfd.events = POLLREMOVE;
+ epoll[i].dpep_pollfd.fd = fd;
+ /* FALLTHROUGH */
+
+ case EPOLL_CTL_ADD:
+ /*
+ * Mask off the events that we ignore, and then swizzle the
+ * events for which our values differ from their epoll(7)
+ * equivalents.
+ */
+ events = event->events;
+ ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED);
+
+ if (events & EPOLLRDHUP)
+ ev |= POLLRDHUP;
+
+ if (events & EPOLLET)
+ ev |= POLLET;
+
+ if (events & EPOLLONESHOT)
+ ev |= POLLONESHOT;
+
+ if (events & EPOLLWRNORM)
+ ev |= POLLWRNORM;
+
+ if (events & EPOLLWRBAND)
+ ev |= POLLWRBAND;
+
+ epoll[i].dpep_data = event->data.u64;
+ break;
+
+ default:
+ errno = EOPNOTSUPP;
+ return (-1);
+ }
+
+ epoll[i].dpep_pollfd.events = ev;
+
+ return (write(epfd, epoll, sizeof (epoll[0]) * (i + 1)) == -1 ? -1 : 0);
+}
+
+int
+epoll_wait(int epfd, struct epoll_event *events,
+ int maxevents, int timeout)
+{
+ struct dvpoll arg;
+
+ if (maxevents <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ arg.dp_nfds = maxevents;
+ arg.dp_timeout = timeout;
+ arg.dp_fds = (pollfd_t *)events;
+
+ return (ioctl(epfd, DP_POLL, &arg));
+}
+
+int
+epoll_pwait(int epfd, struct epoll_event *events,
+ int maxevents, int timeout, const sigset_t *sigmask)
+{
+ struct dvpoll arg;
+
+ if (maxevents <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ arg.dp_nfds = maxevents;
+ arg.dp_timeout = timeout;
+ arg.dp_fds = (pollfd_t *)events;
+ arg.dp_setp = (sigset_t *)sigmask;
+
+ return (ioctl(epfd, DP_PPOLL, &arg));
+}
diff --git a/usr/src/lib/libc/port/sys/inotify.c b/usr/src/lib/libc/port/sys/inotify.c
new file mode 100644
index 0000000000..5d637eaee1
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/inotify.c
@@ -0,0 +1,142 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <dirent.h>
+
+int
+inotify_init()
+{
+ return (open("/dev/inotify", O_RDWR));
+}
+
+int
+inotify_init1(int flags)
+{
+ int oflags = O_RDWR;
+
+ if (flags & IN_NONBLOCK)
+ oflags |= O_NONBLOCK;
+
+ if (flags & IN_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ return (open("/dev/inotify", oflags));
+}
+
+int
+inotify_add_watch(int fd, const char *pathname, uint32_t mask)
+{
+ inotify_addwatch_t ioc;
+ inotify_addchild_t cioc;
+ struct stat buf;
+ int dirfd, wd;
+ DIR *dir;
+ struct dirent *dp;
+ int oflags = O_RDONLY;
+
+ if (mask & IN_DONT_FOLLOW)
+ oflags |= O_NOFOLLOW;
+
+ if ((dirfd = open(pathname, oflags)) < 0)
+ return (-1);
+
+ if (fstat(dirfd, &buf) != 0) {
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ if ((mask & IN_ONLYDIR) && !(buf.st_mode & S_IFDIR)) {
+ (void) close(dirfd);
+ errno = ENOTDIR;
+ return (-1);
+ }
+
+ bzero(&ioc, sizeof (ioc));
+ ioc.inaw_fd = dirfd;
+ ioc.inaw_mask = mask;
+
+ if ((wd = ioctl(fd, INOTIFYIOC_ADD_WATCH, &ioc)) < 0) {
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ if (!(buf.st_mode & S_IFDIR) || !(mask & IN_CHILD_EVENTS)) {
+ (void) close(dirfd);
+ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd);
+ return (wd);
+ }
+
+ /*
+ * If we have a directory and we have a mask that denotes child events,
+ * we need to manually add a child watch to every directory entry.
+ * (Because our watch is in place, it will automatically be added to
+ * files that are newly created after this point.)
+ */
+ if ((dir = fdopendir(dirfd)) == NULL) {
+ (void) inotify_rm_watch(fd, wd);
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ bzero(&cioc, sizeof (cioc));
+ cioc.inac_fd = dirfd;
+
+ while ((dp = readdir(dir)) != NULL) {
+ if (strcmp(dp->d_name, ".") == 0)
+ continue;
+
+ if (strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ cioc.inac_name = dp->d_name;
+
+ if (ioctl(fd, INOTIFYIOC_ADD_CHILD, &cioc) != 0) {
+ /*
+ * If we get an error that indicates clear internal
+ * malfunctioning, we propagate the error. Otherwise
+ * we eat it: this could be a file that no longer
+ * exists or a symlink or something else that we
+ * can't lookup.
+ */
+ switch (errno) {
+ case ENXIO:
+ case EFAULT:
+ case EBADF:
+ (void) closedir(dir);
+ inotify_rm_watch(fd, wd);
+ return (-1);
+ default:
+ break;
+ }
+ }
+ }
+
+ (void) closedir(dir);
+ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd);
+
+ return (wd);
+}
+
+int
+inotify_rm_watch(int fd, int wd)
+{
+ return (ioctl(fd, INOTIFYIOC_RM_WATCH, wd));
+}
diff --git a/usr/src/lib/libc/port/sys/zone.c b/usr/src/lib/libc/port/sys/zone.c
index 4a4c70043d..8cf28c3ccf 100644
--- a/usr/src/lib/libc/port/sys/zone.c
+++ b/usr/src/lib/libc/port/sys/zone.c
@@ -22,9 +22,11 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent Inc. All rights reserved.
*/
#include "lint.h"
+#include "thr_uberdata.h"
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/zone.h>
@@ -39,7 +41,8 @@
zoneid_t
zone_create(const char *name, const char *root, const struct priv_set *privs,
const char *rctls, size_t rctlsz, const char *zfs, size_t zfssz,
- int *extended_error, int match, int doi, const bslabel_t *label, int flags)
+ int *extended_error, int match, int doi, const bslabel_t *label, int flags,
+ zoneid_t req_zoneid)
{
zone_def zd;
priv_data_t *d;
@@ -59,6 +62,7 @@ zone_create(const char *name, const char *root, const struct priv_set *privs,
zd.doi = doi;
zd.label = label;
zd.flags = flags;
+ zd.zoneid = req_zoneid;
return ((zoneid_t)syscall(SYS_zone, ZONE_CREATE, &zd));
}
@@ -241,3 +245,10 @@ zone_list_datalink(zoneid_t zoneid, int *dlnump, datalink_id_t *linkids)
{
return (syscall(SYS_zone, ZONE_LIST_DATALINK, zoneid, dlnump, linkids));
}
+
+const char *
+zone_get_nroot()
+{
+ uberdata_t *udp = curthread->ul_uberdata;
+ return (udp->ub_broot);
+}
diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c
index 8e9df7ab60..dd7e6159fb 100644
--- a/usr/src/lib/libc/port/threads/sigaction.c
+++ b/usr/src/lib/libc/port/threads/sigaction.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include "lint.h"
@@ -458,11 +459,12 @@ sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
}
/*
- * This is a private interface for the linux brand interface.
+ * This is a private interface for the lx brand.
*/
void
setsigacthandler(void (*nsigacthandler)(int, siginfo_t *, void *),
- void (**osigacthandler)(int, siginfo_t *, void *))
+ void (**osigacthandler)(int, siginfo_t *, void *),
+ int (*brsetctxt)(const ucontext_t *))
{
ulwp_t *self = curthread;
uberdata_t *udp = self->ul_uberdata;
@@ -471,6 +473,9 @@ setsigacthandler(void (*nsigacthandler)(int, siginfo_t *, void *),
*osigacthandler = udp->sigacthandler;
udp->sigacthandler = nsigacthandler;
+
+ if (brsetctxt != NULL)
+ udp->setctxt = brsetctxt;
}
/*
@@ -522,6 +527,7 @@ int
setcontext(const ucontext_t *ucp)
{
ulwp_t *self = curthread;
+ uberdata_t *udp = self->ul_uberdata;
int ret;
ucontext_t uc;
@@ -578,7 +584,7 @@ setcontext(const ucontext_t *ucp)
*/
set_parking_flag(self, 0);
self->ul_sp = 0;
- ret = __setcontext(&uc);
+ ret = udp->setctxt(&uc);
/*
* It is OK for setcontext() to return if the user has not specified
diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c
index b5d848449d..485d7f8edf 100644
--- a/usr/src/lib/libc/port/threads/thr.c
+++ b/usr/src/lib/libc/port/threads/thr.c
@@ -23,7 +23,7 @@
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include "lint.h"
@@ -124,6 +124,7 @@ uberdata_t __uberdata = {
0, /* ndaemons */
0, /* pid */
sigacthandler, /* sigacthandler */
+ __setcontext, /* setctxt */
NULL, /* lwp_stacks */
NULL, /* lwp_laststack */
0, /* nfreestack */
@@ -136,6 +137,7 @@ uberdata_t __uberdata = {
NULL, /* robustlocks */
NULL, /* robustlist */
NULL, /* progname */
+ NULL, /* ub_broot */
NULL, /* __tdb_bootstrap */
{ /* tdb */
NULL, /* tdb_sync_addr_hash */
@@ -1219,6 +1221,24 @@ extern void atfork_init(void);
extern void __proc64id(void);
#endif
+static void
+init_brandroot(uberdata_t *udp)
+{
+ Dl_argsinfo_t args;
+
+ udp->ub_broot = NULL;
+ if (dlinfo(RTLD_SELF, RTLD_DI_ARGSINFO, &args) < 0)
+ return;
+
+ while (args.dla_auxv->a_type != AT_NULL) {
+ if (args.dla_auxv->a_type == AT_SUN_BRAND_NROOT) {
+ udp->ub_broot = args.dla_auxv->a_un.a_ptr;
+ return;
+ }
+ args.dla_auxv++;
+ }
+}
+
/*
* libc_init() is called by ld.so.1 for library initialization.
* We perform minimal initialization; enough to work with the main thread.
@@ -1255,6 +1275,13 @@ libc_init(void)
(void) _atexit(__cleanup);
/*
+ * Every libc, regardless of link map, needs to go through and check its
+ * aux vectors so as to indicate whether or not this has been given a
+ * brand root with which we use to qualify various other data.
+ */
+ init_brandroot(udp);
+
+ /*
* We keep our uberdata on one of (a) the first alternate link map
* or (b) the primary link map. We switch to the primary link map
* and stay there once we see it. All intermediate link maps are
diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com
index 15c5c2b092..4c5bbcfc96 100644
--- a/usr/src/lib/libc/sparc/Makefile.com
+++ b/usr/src/lib/libc/sparc/Makefile.com
@@ -20,9 +20,9 @@
#
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Use is subject to license terms.
@@ -925,6 +925,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ epoll.o \
exacctsys.o \
execl.o \
execle.o \
@@ -932,6 +933,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
diff --git a/usr/src/lib/libc/sparc/crt/_rtld.c b/usr/src/lib/libc/sparc/crt/_rtld.c
index a9e9c6d98a..843cfe03a5 100644
--- a/usr/src/lib/libc/sparc/crt/_rtld.c
+++ b/usr/src/lib/libc/sparc/crt/_rtld.c
@@ -62,6 +62,15 @@
#define SYSCONFIG (*(funcs[SYSCONFIG_F]))
/*
+ * GCC will not emit unused static functions unless specifically told it must
+ */
+#ifdef __GNUC__
+#define __USED __attribute__((used))
+#else
+#define __USED
+#endif
+
+/*
* Alias ld.so entry point -- receives a bootstrap structure and a vector
* of strings. The vector is "well-known" to us, and consists of pointers
* to string constants. This aliasing bootstrap requires no relocation in
diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com
index 0c3f0a2e32..c67d0734d1 100644
--- a/usr/src/lib/libc/sparcv9/Makefile.com
+++ b/usr/src/lib/libc/sparcv9/Makefile.com
@@ -20,9 +20,9 @@
#
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Use is subject to license terms.
@@ -871,6 +871,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ epoll.o \
exacctsys.o \
execl.o \
execle.o \
diff --git a/usr/src/lib/libc_db/common/thread_db.c b/usr/src/lib/libc_db/common/thread_db.c
index 77c44b2782..b58971dfec 100644
--- a/usr/src/lib/libc_db/common/thread_db.c
+++ b/usr/src/lib/libc_db/common/thread_db.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
@@ -146,6 +150,7 @@ static td_err_e
td_read_uberdata(td_thragent_t *ta_p)
{
struct ps_prochandle *ph_p = ta_p->ph_p;
+ int i;
if (ta_p->model == PR_MODEL_NATIVE) {
uberdata_t uberdata;
@@ -163,12 +168,10 @@ td_read_uberdata(td_thragent_t *ta_p)
if (ps_pdread(ph_p, (psaddr_t)uberdata.tdb.tdb_events,
ta_p->tdb_events, sizeof (ta_p->tdb_events)) != PS_OK)
return (TD_DBERR);
-
} else {
#if defined(_LP64) && defined(_SYSCALL32)
uberdata32_t uberdata;
caddr32_t tdb_events[TD_MAX_EVENT_NUM - TD_MIN_EVENT_NUM + 1];
- int i;
if (ps_pdread(ph_p, ta_p->uberdata_addr,
&uberdata, sizeof (uberdata)) != PS_OK)
@@ -189,6 +192,29 @@ td_read_uberdata(td_thragent_t *ta_p)
return (TD_DBERR);
#endif
}
+
+ /*
+ * Unfortunately, we are (implicitly) assuming that our uberdata
+ * definition precisely matches that of our target. If this is not
+ * true (that is, if we're examining a core file from a foreign
+ * system that has a different definition of uberdata), the failure
+ * modes can be frustratingly non-explicit. In an effort to catch
+ * this upon initialization (when the debugger may still be able to
+ * opt for another thread model or may be able to fail explicitly), we
+ * check that each of our tdb_events points to valid memory (these are
+ * putatively text upon which a breakpoint can be issued), with the
+ * hope that this is enough of a self-consistency check to lead to
+ * explicit failure on a mismatch.
+ */
+ for (i = 0; i < TD_MAX_EVENT_NUM - TD_MIN_EVENT_NUM + 1; i++) {
+ uint8_t check;
+
+ if (ps_pdread(ph_p, (psaddr_t)ta_p->tdb_events[i],
+ &check, sizeof (check)) != PS_OK) {
+ return (TD_DBERR);
+ }
+ }
+
if (ta_p->hash_size != 1) { /* multi-threaded */
ta_p->initialized = 2;
ta_p->single_lwpid = 0;
diff --git a/usr/src/lib/libctf/Makefile.com b/usr/src/lib/libctf/Makefile.com
index 4d1e01d4eb..0169c2a367 100644
--- a/usr/src/lib/libctf/Makefile.com
+++ b/usr/src/lib/libctf/Makefile.com
@@ -23,43 +23,9 @@
# Use is subject to license terms.
#
-LIBRARY = libctf.a
-VERS = .1
-
-COMMON_OBJS = \
- ctf_create.o \
- ctf_decl.o \
- ctf_error.o \
- ctf_hash.o \
- ctf_labels.o \
- ctf_lookup.o \
- ctf_open.o \
- ctf_types.o \
- ctf_util.o
-
-LIB_OBJS = \
- ctf_lib.o \
- ctf_subr.o
-
-OBJECTS = $(COMMON_OBJS) $(LIB_OBJS)
-
-include ../../Makefile.lib
+include ../Makefile.shared.com
include ../../Makefile.rootfs
-SRCS = $(COMMON_OBJS:%.o=../../../common/ctf/%.c) $(LIB_OBJS:%.o=../common/%.c)
-LIBS = $(DYNLIB) $(LINTLIB)
-
-SRCDIR = ../common
-
-CPPFLAGS += -I../common -I../../../common/ctf -DCTF_OLD_VERSIONS
-CFLAGS += $(CCVERBOSE)
-
-CERRWARN += -_gcc=-Wno-uninitialized
-
-LDLIBS += -lc
-
-$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
-
.KEEP_STATE:
all: $(LIBS)
@@ -67,7 +33,4 @@ all: $(LIBS)
lint: lintcheck
include ../../Makefile.targ
-
-objs/%.o pics/%.o: ../../../common/ctf/%.c
- $(COMPILE.c) -o $@ $<
- $(POST_PROCESS_O)
+include ../Makefile.shared.targ
diff --git a/usr/src/lib/libctf/Makefile.shared.com b/usr/src/lib/libctf/Makefile.shared.com
new file mode 100644
index 0000000000..92f33a93b6
--- /dev/null
+++ b/usr/src/lib/libctf/Makefile.shared.com
@@ -0,0 +1,67 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+#
+# This Makefile is shared between the libctf native build in tools and
+# the libctf build here for the system.
+#
+LIBRARY = libctf.a
+VERS = .1
+
+COMMON_OBJS = \
+ ctf_create.o \
+ ctf_decl.o \
+ ctf_error.o \
+ ctf_hash.o \
+ ctf_labels.o \
+ ctf_lookup.o \
+ ctf_open.o \
+ ctf_types.o \
+ ctf_util.o
+
+LIB_OBJS = \
+ ctf_diff.o \
+ ctf_lib.o \
+ ctf_subr.o
+
+OBJECTS = $(COMMON_OBJS) $(LIB_OBJS)
+MAPFILEDIR = $(SRC)/lib/libctf
+
+include $(SRC)/lib/Makefile.lib
+
+SRCS = $(COMMON_OBJS:%.o=$(SRC)/common/ctf/%.c) $(LIB_OBJS:%.o=$(SRC)/lib/libctf/common/%.c)
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS = -lc
+
+SRCDIR = $(SRC)/lib/libctf/common
+
+CPPFLAGS += -I$(SRC)/lib/libctf/common -I$(SRC)/common/ctf -DCTF_OLD_VERSIONS
+CFLAGS += $(CCVERBOSE)
+
+CERRWARN += -_gcc=-Wno-uninitialized
+
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
diff --git a/usr/src/lib/libctf/Makefile.shared.targ b/usr/src/lib/libctf/Makefile.shared.targ
new file mode 100644
index 0000000000..7fc28aefcd
--- /dev/null
+++ b/usr/src/lib/libctf/Makefile.shared.targ
@@ -0,0 +1,22 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+#
+# This Makefile is shared between both the tools and the normal library build.
+#
+
+pics/%.o: $(SRC)/common/ctf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/libctf/common/ctf_diff.c b/usr/src/lib/libctf/common/ctf_diff.c
new file mode 100644
index 0000000000..aabfd38fcf
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_diff.c
@@ -0,0 +1,783 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <libctf.h>
+#include <ctf_impl.h>
+#include <sys/debug.h>
+
+/* typedef in libctf.h */
+struct ctf_diff {
+ uint_t cds_flags;
+ ctf_file_t *cds_ifp;
+ ctf_file_t *cds_ofp;
+ ctf_idhash_t cds_forward;
+ ctf_idhash_t cds_reverse;
+ ctf_idhash_t cds_fneg;
+ ctf_idhash_t cds_f_visited;
+ ctf_idhash_t cds_r_visited;
+ ctf_diff_type_f cds_func;
+ int cds_visitid;
+ void *cds_arg;
+};
+
+/*
+ * Team Diff
+ */
+static int ctf_diff_type(ctf_diff_t *, ctf_file_t *, ctf_id_t, ctf_file_t *,
+ ctf_id_t);
+
+static int
+ctf_diff_name(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ const char *iname, *oname;
+ const ctf_type_t *itp, *otp;
+
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(ifp, iid));
+
+ iname = ctf_strptr(ifp, itp->ctt_name);
+ oname = ctf_strptr(ofp, otp->ctt_name);
+
+ if ((iname == NULL || oname == NULL) && (iname != oname))
+ return (B_TRUE);
+
+ /* Two anonymous names are the same */
+ if (iname == NULL && oname == NULL)
+ return (B_FALSE);
+
+ return (strcmp(iname, oname) == 0 ? B_FALSE: B_TRUE);
+}
+
+/*
+ * For floats and ints
+ */
+static int
+ctf_diff_number(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_encoding_t ien, den;
+
+ if (ctf_type_encoding(ifp, iid, &ien) != 0)
+ return (CTF_ERR);
+
+ if (ctf_type_encoding(ofp, oid, &den) != 0)
+ return (ctf_set_errno(ifp, iid));
+
+ if (bcmp(&ien, &den, sizeof (ctf_encoding_t)) != 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
+ * Two qualifiers are equivalent iff they point to two equivalent types.
+ */
+static int
+ctf_diff_qualifier(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid,
+ ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_id_t iref, oref;
+
+ iref = ctf_type_reference(ifp, iid);
+ if (iref == CTF_ERR)
+ return (CTF_ERR);
+
+ oref = ctf_type_reference(ofp, oid);
+ if (oref == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ return (ctf_diff_type(cds, ifp, iref, ofp, oref));
+}
+
+/*
+ * Two arrays are the same iff they have the same type for contents, the same
+ * type for the index, and the same number of elements.
+ */
+static int
+ctf_diff_array(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret;
+ ctf_arinfo_t iar, oar;
+
+ if (ctf_array_info(ifp, iid, &iar) == CTF_ERR)
+ return (CTF_ERR);
+
+ if (ctf_array_info(ofp, oid, &oar) == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ ret = ctf_diff_type(cds, ifp, iar.ctr_contents, ofp, oar.ctr_contents);
+ if (ret != B_FALSE)
+ return (ret);
+
+ if (iar.ctr_nelems == oar.ctr_nelems)
+ return (B_FALSE);
+
+ ret = ctf_diff_type(cds, ifp, iar.ctr_index, ofp, oar.ctr_index);
+ if (ret != B_FALSE)
+ return (ret);
+
+ return (B_TRUE);
+}
+
+/*
+ * Two function pointers are the same if the following is all true:
+ *
+ * o They have the same return type
+ * o They have the same number of arguments
+ * o The arguments are of the same type
+ * o They have the same flags
+ */
+static int
+ctf_diff_func(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret, i;
+ ctf_funcinfo_t ifunc, ofunc;
+ ctf_id_t *iids, *oids;
+
+ if (ctf_func_info_by_id(ifp, iid, &ifunc) == CTF_ERR)
+ return (CTF_ERR);
+
+ if (ctf_func_info_by_id(ofp, oid, &ofunc) == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ if (ifunc.ctc_argc != ofunc.ctc_argc)
+ return (B_TRUE);
+
+ if (ifunc.ctc_flags != ofunc.ctc_flags)
+ return (B_TRUE);
+
+ ret = ctf_diff_type(cds, ifp, ifunc.ctc_return, ofp, ofunc.ctc_return);
+ if (ret != B_FALSE)
+ return (ret);
+
+ iids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc);
+ if (iids == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+
+ oids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc);
+ if (oids == NULL) {
+ ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc);
+ return (ctf_set_errno(ifp, ENOMEM));
+ }
+
+ if (ctf_func_args_by_id(ifp, iid, ifunc.ctc_argc, iids) == CTF_ERR) {
+ ret = CTF_ERR;
+ goto out;
+ }
+
+ if (ctf_func_args_by_id(ofp, oid, ofunc.ctc_argc, oids) == CTF_ERR) {
+ ret = ctf_set_errno(ifp, ctf_errno(ofp));
+ goto out;
+ }
+
+ ret = B_TRUE;
+ for (i = 0; i < ifunc.ctc_argc; i++) {
+ ret = ctf_diff_type(cds, ifp, iids[i], ofp, oids[i]);
+ if (ret != B_FALSE)
+ goto out;
+ }
+ ret = B_FALSE;
+
+out:
+ ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc);
+ ctf_free(oids, sizeof (ctf_id_t) * ofunc.ctc_argc);
+ return (ret);
+}
+
+/*
+ * Two structures are the same if every member is identical to its corresponding
+ * type, at the same offset, and has the same name.
+ */
+static int
+ctf_diff_struct(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ssize_t isize, iincr, osize, oincr;
+ const ctf_member_t *imp, *omp;
+ const ctf_lmember_t *ilmp, *olmp;
+ int n;
+
+ oifp = ifp;
+
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ (void) ctf_get_ctt_size(ifp, itp, &isize, &iincr);
+ (void) ctf_get_ctt_size(ofp, otp, &osize, &oincr);
+
+ if (ifp->ctf_version == CTF_VERSION_1 || isize < CTF_LSTRUCT_THRESH) {
+ imp = (const ctf_member_t *)((uintptr_t)itp + iincr);
+ ilmp = NULL;
+ } else {
+ imp = NULL;
+ ilmp = (const ctf_lmember_t *)((uintptr_t)itp + iincr);
+ }
+
+ if (ofp->ctf_version == CTF_VERSION_1 || osize < CTF_LSTRUCT_THRESH) {
+ omp = (const ctf_member_t *)((uintptr_t)otp + oincr);
+ olmp = NULL;
+ } else {
+ omp = NULL;
+ olmp = (const ctf_lmember_t *)((uintptr_t)otp + oincr);
+ }
+
+ for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0; n--) {
+ const char *iname, *oname;
+ ulong_t ioff, ooff;
+ ctf_id_t itype, otype;
+ int ret;
+
+ if (imp != NULL) {
+ iname = ctf_strptr(ifp, imp->ctm_name);
+ ioff = imp->ctm_offset;
+ itype = imp->ctm_type;
+ } else {
+ iname = ctf_strptr(ifp, ilmp->ctlm_name);
+ ioff = CTF_LMEM_OFFSET(ilmp);
+ itype = ilmp->ctlm_type;
+ }
+
+ if (omp != NULL) {
+ oname = ctf_strptr(ofp, omp->ctm_name);
+ ooff = omp->ctm_offset;
+ otype = omp->ctm_type;
+ } else {
+ oname = ctf_strptr(ofp, olmp->ctlm_name);
+ ooff = CTF_LMEM_OFFSET(olmp);
+ otype = olmp->ctlm_type;
+ }
+
+ if (ioff != ooff) {
+ return (B_TRUE);
+ }
+ if (strcmp(iname, oname) != 0) {
+ return (B_TRUE);
+ }
+ ret = ctf_diff_type(cds, ifp, itype, ofp, otype);
+ if (ret != B_FALSE)
+ return (ret);
+
+ /* Advance our pointers */
+ if (imp != NULL)
+ imp++;
+ if (ilmp != NULL)
+ ilmp++;
+ if (omp != NULL)
+ omp++;
+ if (olmp != NULL)
+ olmp++;
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Two unions are the same if they have the same set of members. This is similar
+ * to, but slightly different from a struct. The offsets of members don't
+ * matter. However, their is no guarantee of ordering so we have to fall back to
+ * doing an O(N^2) scan.
+ */
+typedef struct ctf_diff_union_member {
+ ctf_diff_t *cdum_cds;
+ ctf_file_t *cdum_fp;
+ ctf_file_t *cdum_iterfp;
+ const char *cdum_name;
+ ctf_id_t cdum_type;
+ int cdum_ret;
+} ctf_diff_union_member_t;
+
+typedef struct ctf_diff_union_fp {
+ ctf_diff_t *cduf_cds;
+ ctf_file_t *cduf_curfp;
+ ctf_file_t *cduf_altfp;
+ ctf_id_t cduf_type;
+ int cduf_ret;
+} ctf_diff_union_fp_t;
+
+/* ARGSUSED */
+static int
+ctf_diff_union_check_member(const char *name, ctf_id_t id, ulong_t off,
+ void *arg)
+{
+ int ret;
+ ctf_diff_union_member_t *cdump = arg;
+
+ if (strcmp(name, cdump->cdum_name) != 0)
+ return (0);
+
+ ret = ctf_diff_type(cdump->cdum_cds, cdump->cdum_fp, cdump->cdum_type,
+ cdump->cdum_iterfp, id);
+ if (ret == CTF_ERR) {
+ cdump->cdum_ret = CTF_ERR;
+ return (1);
+ }
+
+ if (ret == B_FALSE) {
+ cdump->cdum_ret = B_FALSE;
+ /* Return non-zero to stop iteration as we have a match */
+ return (1);
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ctf_diff_union_check_fp(const char *name, ctf_id_t id, ulong_t off, void *arg)
+{
+ int ret;
+ ctf_diff_union_member_t cdum;
+ ctf_diff_union_fp_t *cdufp = arg;
+
+ cdum.cdum_cds = cdufp->cduf_cds;
+ cdum.cdum_fp = cdufp->cduf_curfp;
+ cdum.cdum_iterfp = cdufp->cduf_altfp;
+ cdum.cdum_name = name;
+ cdum.cdum_type = id;
+ cdum.cdum_ret = B_TRUE;
+
+ ret = ctf_member_iter(cdum.cdum_iterfp, cdufp->cduf_type,
+ ctf_diff_union_check_member, &cdum);
+ if (ret == 0 || cdum.cdum_ret == CTF_ERR) {
+ /* No match found or error, terminate now */
+ cdufp->cduf_ret = cdum.cdum_ret;
+ return (1);
+ } else if (ret == CTF_ERR) {
+ (void) ctf_set_errno(cdum.cdum_fp, ctf_errno(cdum.cdum_iterfp));
+ cdufp->cduf_ret = CTF_ERR;
+ return (1);
+ } else {
+ ASSERT(cdum.cdum_ret == B_FALSE);
+ cdufp->cduf_ret = cdum.cdum_ret;
+ return (0);
+ }
+}
+
+static int
+ctf_diff_union(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ctf_diff_union_fp_t cduf;
+ int ret;
+
+ oifp = ifp;
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ cduf.cduf_cds = cds;
+ cduf.cduf_curfp = ifp;
+ cduf.cduf_altfp = ofp;
+ cduf.cduf_type = oid;
+ cduf.cduf_ret = B_TRUE;
+ ret = ctf_member_iter(ifp, iid, ctf_diff_union_check_fp, &cduf);
+ if (ret != CTF_ERR)
+ ret = cduf.cduf_ret;
+
+ return (ret);
+}
+
+/*
+ * Two enums are equivalent if they share the same underlying type and they have
+ * the same set of members.
+ */
+static int
+ctf_diff_enum(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ssize_t iincr, oincr;
+ const ctf_enum_t *iep, *oep;
+ int n;
+
+ oifp = ifp;
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ (void) ctf_get_ctt_size(ifp, itp, NULL, &iincr);
+ (void) ctf_get_ctt_size(ofp, otp, NULL, &oincr);
+ iep = (const ctf_enum_t *)((uintptr_t)itp + iincr);
+ oep = (const ctf_enum_t *)((uintptr_t)otp + oincr);
+
+ for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0;
+ n--, iep++, oep++) {
+ if (strcmp(ctf_strptr(ifp, iep->cte_name),
+ ctf_strptr(ofp, oep->cte_name)) != 0)
+ return (B_TRUE);
+
+ if (iep->cte_value != oep->cte_value)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Two forwards are equivalent in one of two cases. If both are forwards, than
+ * they are the same. Otherwise, they're equivalent if one is a struct or union
+ * and the other is a forward.
+ */
+static int
+ctf_diff_forward(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ int ikind, okind;
+
+ ikind = ctf_type_kind(ifp, iid);
+ okind = ctf_type_kind(ofp, oid);
+
+ if (ikind == okind) {
+ ASSERT(ikind == CTF_K_FORWARD);
+ return (B_FALSE);
+ } else if (ikind == CTF_K_FORWARD) {
+ return (okind != CTF_K_UNION && okind != CTF_K_STRUCT);
+ } else {
+ return (ikind != CTF_K_UNION && ikind != CTF_K_STRUCT);
+ }
+}
+
+/*
+ * Are two types equivalent?
+ */
+int
+ctf_diff_type(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret, ikind, okind;
+ ctf_ihelem_t *lookup, *fv, *rv;
+
+ /* Do a quick short circuit */
+ if (ifp == ofp && iid == oid)
+ return (B_FALSE);
+
+ /*
+ * Check if it's something we've already encountered in a forward
+ * reference or forward negative table.
+ */
+ if ((lookup = ctf_idhash_lookup(&cds->cds_forward, iid)) != NULL) {
+ if (lookup->ih_value == oid)
+ return (B_FALSE);
+ else
+ return (B_TRUE);
+ }
+
+ if (ctf_idhash_lookup(&cds->cds_forward, iid) != NULL) {
+ return (B_TRUE);
+ }
+
+ fv = ctf_idhash_lookup(&cds->cds_f_visited, iid);
+ rv = ctf_idhash_lookup(&cds->cds_r_visited, oid);
+ if (fv != NULL && rv != NULL)
+ return (fv->ih_value != rv->ih_value);
+ else if (fv != NULL || rv != NULL)
+ return (B_TRUE);
+
+ ikind = ctf_type_kind(ifp, iid);
+ okind = ctf_type_kind(ofp, oid);
+
+ /* Check names */
+ if ((ret = ctf_diff_name(ifp, iid, ofp, oid)) != B_FALSE) {
+ if (ikind != okind || ikind != CTF_K_INTEGER ||
+ (cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0)
+ return (ret);
+ }
+
+ if (ikind != okind) {
+ if (ikind == CTF_K_FORWARD || okind == CTF_K_FORWARD)
+ return (ctf_diff_forward(ifp, iid, ofp, oid));
+ else
+ return (B_TRUE);
+ }
+
+ switch (ikind) {
+ case CTF_K_INTEGER:
+ case CTF_K_FLOAT:
+ ret = ctf_diff_number(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_ARRAY:
+ ret = ctf_diff_array(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_FUNCTION:
+ ret = ctf_diff_func(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_STRUCT:
+ VERIFY(ctf_idhash_insert(&cds->cds_f_visited, iid,
+ cds->cds_visitid) == 0);
+ VERIFY(ctf_idhash_insert(&cds->cds_r_visited, oid,
+ cds->cds_visitid) == 0);
+ cds->cds_visitid++;
+ ret = ctf_diff_struct(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_UNION:
+ VERIFY(ctf_idhash_insert(&cds->cds_f_visited, iid,
+ cds->cds_visitid) == 0);
+ VERIFY(ctf_idhash_insert(&cds->cds_r_visited, oid,
+ cds->cds_visitid) == 0);
+ cds->cds_visitid++;
+ ret = ctf_diff_union(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_ENUM:
+ ret = ctf_diff_enum(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_FORWARD:
+ ret = ctf_diff_forward(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_POINTER:
+ case CTF_K_TYPEDEF:
+ case CTF_K_VOLATILE:
+ case CTF_K_CONST:
+ case CTF_K_RESTRICT:
+ ret = ctf_diff_qualifier(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_UNKNOWN:
+ /*
+ * The current CTF tools use CTF_K_UNKNOWN as a padding type. We
+ * always declare two instances of CTF_K_UNKNOWN as different,
+ * even though this leads to additional diff noise.
+ */
+ ret = B_TRUE;
+ break;
+ default:
+ abort();
+ }
+
+ return (ret);
+}
+
+/*
+ * Walk every type in the first container and try to find a match in the second.
+ * If there is a match, then update both the forward and reverse mapping tables.
+ */
+static int
+ctf_diff_pass1(ctf_diff_t *cds)
+{
+ int i, j, diff;
+ int istart, iend, jstart, jend;
+
+ if (cds->cds_ifp->ctf_flags & LCTF_CHILD) {
+ istart = 0x8001;
+ iend = cds->cds_ifp->ctf_typemax + 0x8000;
+ } else {
+ istart = 1;
+ iend = cds->cds_ifp->ctf_typemax;
+ }
+
+ if (cds->cds_ofp->ctf_flags & LCTF_CHILD) {
+ jstart = 0x8001;
+ jend = cds->cds_ofp->ctf_typemax + 0x8000;
+ } else {
+ jstart = 1;
+ jend = cds->cds_ofp->ctf_typemax;
+ }
+
+ for (i = istart; i <= iend; i++) {
+ diff = B_TRUE;
+ for (j = jstart; j <= jend; j++) {
+ cds->cds_visitid = 1;
+ ctf_idhash_clear(&cds->cds_f_visited);
+ ctf_idhash_clear(&cds->cds_r_visited);
+
+ diff = ctf_diff_type(cds, cds->cds_ifp, i,
+ cds->cds_ofp, j);
+ if (diff == CTF_ERR)
+ return (CTF_ERR);
+
+ /* Found a hit, update the tables */
+ if (diff == B_FALSE) {
+ VERIFY(ctf_idhash_lookup(&cds->cds_forward,
+ i) == NULL);
+ VERIFY(ctf_idhash_insert(&cds->cds_forward,
+ i, j) == 0);
+ if (ctf_idhash_lookup(&cds->cds_reverse, j) ==
+ NULL) {
+ VERIFY(ctf_idhash_lookup(
+ &cds->cds_reverse, j) == NULL);
+ VERIFY(ctf_idhash_insert(
+ &cds->cds_reverse, j, i) == 0);
+ }
+ break;
+ }
+ }
+
+ /* Call the callback at this point */
+ if (diff == B_TRUE) {
+ VERIFY(ctf_idhash_insert(&cds->cds_fneg, i, 1) == 0);
+ cds->cds_func(cds->cds_ifp, i, B_FALSE, NULL, CTF_ERR,
+ cds->cds_arg);
+ } else {
+ cds->cds_func(cds->cds_ifp, i, B_TRUE, cds->cds_ofp, j,
+ cds->cds_arg);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Now we need to walk the second container and emit anything that we didn't
+ * find as common in the first pass.
+ */
+static int
+ctf_diff_pass2(ctf_diff_t *cds)
+{
+ int i;
+
+ for (i = 1; i <= cds->cds_ofp->ctf_typemax; i++) {
+ if (ctf_idhash_lookup(&cds->cds_reverse, i) != NULL)
+ continue;
+ cds->cds_func(cds->cds_ofp, i, B_FALSE, NULL, CTF_ERR,
+ cds->cds_arg);
+ }
+
+ return (0);
+}
+
+int
+ctf_diff_init(ctf_file_t *ifp, ctf_file_t *ofp, ctf_diff_t **cdsp)
+{
+ int ret;
+ ctf_diff_t *cds;
+
+ cds = ctf_alloc(sizeof (ctf_diff_t));
+ if (cds == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+
+ bzero(cds, sizeof (ctf_diff_t));
+ cds->cds_ifp = ifp;
+ cds->cds_ofp = ofp;
+ ret = ctf_idhash_create(&cds->cds_forward, ifp->ctf_typemax);
+ if (ret != 0) {
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ret));
+ }
+ ret = ctf_idhash_create(&cds->cds_reverse, ofp->ctf_typemax);
+ if (ret != 0) {
+ ctf_idhash_destroy(&cds->cds_forward);
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ret));
+ }
+ ret = ctf_idhash_create(&cds->cds_f_visited, ifp->ctf_typemax);
+ if (ret != 0) {
+ ctf_idhash_destroy(&cds->cds_reverse);
+ ctf_idhash_destroy(&cds->cds_forward);
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ret));
+ }
+ ret = ctf_idhash_create(&cds->cds_r_visited, ofp->ctf_typemax);
+ if (ret != 0) {
+ ctf_idhash_destroy(&cds->cds_f_visited);
+ ctf_idhash_destroy(&cds->cds_reverse);
+ ctf_idhash_destroy(&cds->cds_forward);
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ret));
+ }
+ ret = ctf_idhash_create(&cds->cds_fneg, ifp->ctf_typemax);
+ if (ret != 0) {
+ ctf_idhash_destroy(&cds->cds_r_visited);
+ ctf_idhash_destroy(&cds->cds_f_visited);
+ ctf_idhash_destroy(&cds->cds_reverse);
+ ctf_idhash_destroy(&cds->cds_forward);
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ret));
+ }
+
+ cds->cds_ifp->ctf_refcnt++;
+ cds->cds_ofp->ctf_refcnt++;
+ *cdsp = cds;
+ return (0);
+}
+
+int
+ctf_diff_types(ctf_diff_t *cds, ctf_diff_type_f cb, void *arg)
+{
+ int ret;
+
+ cds->cds_func = cb;
+ cds->cds_arg = arg;
+
+ /*
+ * For the moment clear all idhashes and rerun this phase. Ideally we
+ * should reuse this, but we can save that for when we add things like
+ * taking the diff of the objects and the like.
+ */
+ ctf_idhash_clear(&cds->cds_forward);
+ ctf_idhash_clear(&cds->cds_reverse);
+ ctf_idhash_clear(&cds->cds_fneg);
+ ctf_idhash_clear(&cds->cds_f_visited);
+ ctf_idhash_clear(&cds->cds_r_visited);
+
+ ret = ctf_diff_pass1(cds);
+ if (ret == 0)
+ ret = ctf_diff_pass2(cds);
+
+ cds->cds_func = NULL;
+ cds->cds_arg = NULL;
+ return (ret);
+}
+
+void
+ctf_diff_fini(ctf_diff_t *cds)
+{
+ cds->cds_ifp->ctf_refcnt--;
+ cds->cds_ofp->ctf_refcnt--;
+
+ ctf_idhash_destroy(&cds->cds_fneg);
+ ctf_idhash_destroy(&cds->cds_r_visited);
+ ctf_idhash_destroy(&cds->cds_f_visited);
+ ctf_idhash_destroy(&cds->cds_reverse);
+ ctf_idhash_destroy(&cds->cds_forward);
+ ctf_free(cds, sizeof (ctf_diff_t));
+}
+
+uint_t
+ctf_diff_getflags(ctf_diff_t *cds)
+{
+ return (cds->cds_flags);
+}
+
+int
+ctf_diff_setflags(ctf_diff_t *cds, uint_t flags)
+{
+ if ((flags & ~CTF_DIFF_F_MASK) != 0)
+ return (ctf_set_errno(cds->cds_ifp, EINVAL));
+
+ cds->cds_flags = flags;
+ return (0);
+}
diff --git a/usr/src/lib/libctf/common/libctf.h b/usr/src/lib/libctf/common/libctf.h
index 3fd69318de..07601b4ee4 100644
--- a/usr/src/lib/libctf/common/libctf.h
+++ b/usr/src/lib/libctf/common/libctf.h
@@ -23,6 +23,9 @@
* Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
/*
* This header file defines the interfaces available from the CTF debugger
@@ -40,8 +43,6 @@
#ifndef _LIBCTF_H
#define _LIBCTF_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/ctf_api.h>
#ifdef __cplusplus
@@ -53,6 +54,21 @@ extern "C" {
*/
extern int _libctf_debug;
+typedef enum ctf_diff_flag {
+ CTF_DIFF_F_IGNORE_INTNAMES = 0x01,
+ CTF_DIFF_F_MASK = 0x01
+} ctf_diff_flag_t;
+
+typedef struct ctf_diff ctf_diff_t;
+typedef void (*ctf_diff_type_f)(ctf_file_t *, ctf_id_t, boolean_t, ctf_file_t *,
+ ctf_id_t, void *);
+
+extern int ctf_diff_init(ctf_file_t *, ctf_file_t *, ctf_diff_t **);
+extern uint_t ctf_diff_getflags(ctf_diff_t *);
+extern int ctf_diff_setflags(ctf_diff_t *, uint_t);
+extern int ctf_diff_types(ctf_diff_t *, ctf_diff_type_f, void *);
+extern void ctf_diff_fini(ctf_diff_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libctf/common/mapfile-vers b/usr/src/lib/libctf/common/mapfile-vers
index 5573e8db25..d09b88fd01 100644
--- a/usr/src/lib/libctf/common/mapfile-vers
+++ b/usr/src/lib/libctf/common/mapfile-vers
@@ -65,9 +65,26 @@ SYMBOL_VERSION SUNWprivate_1.2 {
ctf_add_volatile;
ctf_create;
ctf_delete_type;
+ ctf_diff_init;
+ ctf_diff_fini;
+ ctf_diff_getflags;
+ ctf_diff_setflags;
+ ctf_diff_types;
ctf_discard;
ctf_dup;
ctf_enum_value;
+ ctf_func_args_by_id;
+ ctf_func_info_by_id;
+ ctf_idhash_clear;
+ ctf_idhash_create;
+ ctf_idhash_define;
+ ctf_idhash_insert;
+ ctf_idhash_iter;
+ ctf_idhash_iter_init;
+ ctf_idhash_iter_fini;
+ ctf_idhash_lookup;
+ ctf_idhash_size;
+ ctf_idhash_destroy;
ctf_label_info;
ctf_label_iter;
ctf_label_topmost;
diff --git a/usr/src/lib/libcurses/screen/setupterm.c b/usr/src/lib/libcurses/screen/setupterm.c
index c1f4bdeb04..f67e8751b7 100644
--- a/usr/src/lib/libcurses/screen/setupterm.c
+++ b/usr/src/lib/libcurses/screen/setupterm.c
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*LINTLIBRARY*/
#include <stdio.h>
@@ -48,6 +46,7 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <zone.h>
#include "curses_inc.h"
#define TERMPATH "/usr/share/lib/terminfo/"
@@ -272,9 +271,11 @@ setupterm(char *term, int filenum, int *errret)
}
if (tfd < 0) {
+ const char *zroot = zone_get_nroot();
/* /usr/share/lib/terminfo/?/$TERM */
if (snprintf(fname, sizeof (fname),
- "%s/%c/%s", TERMPATH, *term, term) >= sizeof (fname)) {
+ "%s/%s/%c/%s", zroot == NULL ? "" : zroot, TERMPATH,
+ *term, term) >= sizeof (fname)) {
term_errno = TERMINFO_TOO_LONG;
goto out_err;
}
diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h
index c2fceb25ab..f3347a0ede 100644
--- a/usr/src/lib/libdladm/common/libdladm.h
+++ b/usr/src/lib/libdladm/common/libdladm.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#ifndef _LIBDLADM_H
@@ -71,6 +72,10 @@ extern "C" {
* - DLADM_OPT_BOOT:
* Bypass check functions during boot (used by pool property since pools
* can come up after link properties are set)
+ *
+ * - DLADM_OPT_TRANSIENT:
+ * Indicates that the link assigned to a zone is transient and will be
+ * removed when the zone shuts down.
*/
#define DLADM_OPT_ACTIVE 0x00000001
#define DLADM_OPT_PERSIST 0x00000002
@@ -81,6 +86,7 @@ extern "C" {
#define DLADM_OPT_VLAN 0x00000040
#define DLADM_OPT_NOREFRESH 0x00000080
#define DLADM_OPT_BOOT 0x00000100
+#define DLADM_OPT_TRANSIENT 0x00000200
#define DLADM_WALK_TERMINATE 0
#define DLADM_WALK_CONTINUE -1
diff --git a/usr/src/lib/libdladm/common/libdllink.c b/usr/src/lib/libdladm/common/libdllink.c
index 8a3c5759ee..303885e929 100644
--- a/usr/src/lib/libdladm/common/libdllink.c
+++ b/usr/src/lib/libdladm/common/libdllink.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -386,10 +387,14 @@ dladm_linkduplex2str(link_duplex_t duplex, char *buf)
/*
* Case 1: rename an existing link1 to a link2 that does not exist.
* Result: <linkid1, link2>
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
static dladm_status_t
i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
- const char *link1, const char *link2, uint32_t flags)
+ const char *link1, const char *link2, uint32_t flags, const char *zonename)
{
dld_ioc_rename_t dir;
dladm_status_t status = DLADM_STATUS_OK;
@@ -402,6 +407,10 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
dir.dir_linkid1 = linkid1;
dir.dir_linkid2 = DATALINK_INVALID_LINKID;
(void) strlcpy(dir.dir_link, link2, MAXLINKNAMELEN);
+ if (zonename != NULL)
+ dir.dir_zoneinit = B_TRUE;
+ else
+ dir.dir_zoneinit = B_FALSE;
if (ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir) < 0) {
status = dladm_errno2status(errno);
@@ -412,6 +421,7 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
status = dladm_remap_datalink_id(handle, linkid1, link2);
if (status != DLADM_STATUS_OK && (flags & DLADM_OPT_ACTIVE)) {
(void) strlcpy(dir.dir_link, link1, MAXLINKNAMELEN);
+ dir.dir_zoneinit = B_FALSE;
(void) ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir);
}
return (status);
@@ -508,6 +518,7 @@ i_dladm_rename_link_c2(dladm_handle_t handle, datalink_id_t linkid1,
*/
dir.dir_linkid1 = linkid1;
dir.dir_linkid2 = linkid2;
+ dir.dir_zoneinit = B_FALSE;
if (ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir) < 0)
status = dladm_errno2status(errno);
@@ -617,7 +628,8 @@ done:
}
dladm_status_t
-dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
+dladm_rename_link(dladm_handle_t handle, const char *zonename,
+ const char *link1, const char *link2)
{
datalink_id_t linkid1 = DATALINK_INVALID_LINKID;
datalink_id_t linkid2 = DATALINK_INVALID_LINKID;
@@ -627,11 +639,11 @@ dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
boolean_t remphy2 = B_FALSE;
dladm_status_t status;
- (void) dladm_name2info(handle, link1, &linkid1, &flags1, &class1,
- &media1);
- if ((dladm_name2info(handle, link2, &linkid2, &flags2, &class2,
- &media2) == DLADM_STATUS_OK) && (class2 == DATALINK_CLASS_PHYS) &&
- (flags2 == DLADM_OPT_PERSIST)) {
+ (void) dladm_zname2info(handle, zonename, link1, &linkid1, &flags1,
+ &class1, &media1);
+ if ((dladm_zname2info(handle, zonename, link2, &linkid2, &flags2,
+ &class2, &media2) == DLADM_STATUS_OK) &&
+ (class2 == DATALINK_CLASS_PHYS) && (flags2 == DLADM_OPT_PERSIST)) {
/*
* see whether link2 is a removed physical link.
*/
@@ -645,7 +657,7 @@ dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
* does not exist.
*/
status = i_dladm_rename_link_c1(handle, linkid1, link1,
- link2, flags1);
+ link2, flags1, zonename);
} else if (remphy2) {
/*
* case 2: rename an available link to a REMOVED
diff --git a/usr/src/lib/libdladm/common/libdllink.h b/usr/src/lib/libdladm/common/libdllink.h
index a2830b5e37..a858e78aa3 100644
--- a/usr/src/lib/libdladm/common/libdllink.h
+++ b/usr/src/lib/libdladm/common/libdllink.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#ifndef _LIBDLLINK_H
@@ -121,7 +122,7 @@ extern dladm_status_t dladm_info(dladm_handle_t, datalink_id_t,
dladm_attr_t *);
extern dladm_status_t dladm_rename_link(dladm_handle_t, const char *,
- const char *);
+ const char *, const char *);
extern dladm_status_t dladm_set_linkprop(dladm_handle_t, datalink_id_t,
const char *, char **, uint_t, uint_t);
@@ -170,6 +171,9 @@ extern dladm_status_t dladm_up_datalink_id(dladm_handle_t, datalink_id_t);
extern dladm_status_t dladm_name2info(dladm_handle_t, const char *,
datalink_id_t *, uint32_t *, datalink_class_t *,
uint32_t *);
+extern dladm_status_t dladm_zname2info(dladm_handle_t, const char *,
+ const char *, datalink_id_t *, uint32_t *,
+ datalink_class_t *, uint32_t *);
extern dladm_status_t dladm_datalink_id2info(dladm_handle_t, datalink_id_t,
uint32_t *, datalink_class_t *, uint32_t *, char *,
size_t);
diff --git a/usr/src/lib/libdladm/common/libdlmgmt.c b/usr/src/lib/libdladm/common/libdlmgmt.c
index 4b0753417c..c9c7906934 100644
--- a/usr/src/lib/libdladm/common/libdlmgmt.c
+++ b/usr/src/lib/libdladm/common/libdlmgmt.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <door.h>
@@ -528,12 +529,24 @@ dladm_getnext_conf_linkprop(dladm_handle_t handle, dladm_conf_t conf,
}
/*
- * Get the link ID that is associated with the given name.
+ * Get the link ID that is associated with the given name in the current zone.
*/
dladm_status_t
dladm_name2info(dladm_handle_t handle, const char *link, datalink_id_t *linkidp,
uint32_t *flagp, datalink_class_t *classp, uint32_t *mediap)
{
+ return (dladm_zname2info(handle, NULL, link, linkidp, flagp, classp,
+ mediap));
+}
+
+/*
+ * Get the link ID that is associated with the given zone/name pair.
+ */
+dladm_status_t
+dladm_zname2info(dladm_handle_t handle, const char *zonename, const char *link,
+ datalink_id_t *linkidp, uint32_t *flagp, datalink_class_t *classp,
+ uint32_t *mediap)
+{
dlmgmt_door_getlinkid_t getlinkid;
dlmgmt_getlinkid_retval_t retval;
datalink_id_t linkid;
@@ -542,6 +555,10 @@ dladm_name2info(dladm_handle_t handle, const char *link, datalink_id_t *linkidp,
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ if (zonename != NULL)
+ getlinkid.ld_zoneid = getzoneidbyname(zonename);
+ else
+ getlinkid.ld_zoneid = -1;
if ((status = dladm_door_call(handle, &getlinkid, sizeof (getlinkid),
&retval, &sz)) != DLADM_STATUS_OK) {
diff --git a/usr/src/lib/libdladm/common/libdlvnic.c b/usr/src/lib/libdladm/common/libdlvnic.c
index 44f8bb2726..4cd5d8e56d 100644
--- a/usr/src/lib/libdladm/common/libdlvnic.c
+++ b/usr/src/lib/libdladm/common/libdlvnic.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <stdio.h>
@@ -539,27 +540,35 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid,
vnic_created = B_TRUE;
/* Save vnic configuration and its properties */
- if (!(flags & DLADM_OPT_PERSIST))
- goto done;
+ if (flags & DLADM_OPT_PERSIST) {
+ status = dladm_vnic_persist_conf(handle, name, &attr, class);
+ if (status == DLADM_STATUS_OK)
+ conf_set = B_TRUE;
+ }
- status = dladm_vnic_persist_conf(handle, name, &attr, class);
- if (status != DLADM_STATUS_OK)
- goto done;
- conf_set = B_TRUE;
+done:
+ if (status == DLADM_STATUS_OK && proplist != NULL) {
+ uint32_t flg;
+
+ flg = (flags & DLADM_OPT_PERSIST) ?
+ DLADM_OPT_PERSIST : DLADM_OPT_ACTIVE;
- if (proplist != NULL) {
for (i = 0; i < proplist->al_count; i++) {
dladm_arg_info_t *aip = &proplist->al_info[i];
+ if (strcmp(aip->ai_name, "zone") == 0 &&
+ flags & DLADM_OPT_TRANSIENT)
+ flg |= DLADM_OPT_TRANSIENT;
+ else
+ flg &= ~DLADM_OPT_TRANSIENT;
+
status = dladm_set_linkprop(handle, vnic_id,
- aip->ai_name, aip->ai_val, aip->ai_count,
- DLADM_OPT_PERSIST);
+ aip->ai_name, aip->ai_val, aip->ai_count, flg);
if (status != DLADM_STATUS_OK)
break;
}
}
-done:
if (status != DLADM_STATUS_OK) {
if (conf_set)
(void) dladm_remove_conf(handle, vnic_id);
diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c
index c3eea9a7f1..8d43e2d6b9 100644
--- a/usr/src/lib/libdladm/common/linkprop.c
+++ b/usr/src/lib/libdladm/common/linkprop.c
@@ -153,11 +153,13 @@ static pd_getf_t get_zone, get_autopush, get_rate_mod, get_rate,
get_bridge_pvid, get_protection, get_rxrings,
get_txrings, get_cntavail, get_secondary_macs,
get_allowedips, get_allowedcids, get_pool,
- get_rings_range, get_linkmode_prop;
+ get_rings_range, get_linkmode_prop,
+ get_promisc_filtered;
static pd_setf_t set_zone, set_rate, set_powermode, set_radio,
set_public_prop, set_resource, set_stp_prop,
- set_bridge_forward, set_bridge_pvid, set_secondary_macs;
+ set_bridge_forward, set_bridge_pvid, set_secondary_macs,
+ set_promisc_filtered;
static pd_checkf_t check_zone, check_autopush, check_rate, check_hoplimit,
check_encaplim, check_uint32, check_maxbw, check_cpus,
@@ -365,6 +367,8 @@ static link_attr_t link_attr[] = {
{ MAC_PROP_IB_LINKMODE, sizeof (uint32_t), "linkmode"},
+ { MAC_PROP_VN_PROMISC_FILTERED, sizeof (boolean_t), "promisc-filtered"},
+
{ MAC_PROP_SECONDARY_ADDRS, sizeof (mac_secondary_addr_t),
"secondary-macs"},
@@ -422,6 +426,11 @@ static val_desc_t link_protect_vals[] = {
{ "dhcp-nospoof", MPT_DHCPNOSPOOF },
};
+static val_desc_t link_promisc_filtered_vals[] = {
+ { "off", B_FALSE },
+ { "on", B_TRUE },
+};
+
static val_desc_t dladm_wlan_radio_vals[] = {
{ "on", DLADM_WLAN_RADIO_ON },
{ "off", DLADM_WLAN_RADIO_OFF }
@@ -698,6 +707,12 @@ static prop_desc_t prop_table[] = {
set_resource, NULL, get_protection, check_prop, 0,
DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE },
+ { "promisc-filtered", { "on", 1 },
+ link_promisc_filtered_vals, VALCNT(link_promisc_filtered_vals),
+ set_promisc_filtered, NULL, get_promisc_filtered, check_prop, 0,
+ DATALINK_CLASS_VNIC, DATALINK_ANY_MEDIATYPE },
+
+
{ "allowed-ips", { "--", 0 },
NULL, 0, set_resource, NULL,
get_allowedips, check_allowedips, PD_CHECK_ALLOC,
@@ -1529,6 +1544,9 @@ set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid,
if (zid_new == zid_old)
return (DLADM_STATUS_OK);
+ if (flags & DLADM_OPT_TRANSIENT)
+ dzp->diz_transient = B_TRUE;
+
if ((status = set_public_prop(handle, pdp, linkid, vdp, val_cnt,
flags, media)) != DLADM_STATUS_OK)
return (status);
@@ -4770,3 +4788,50 @@ get_linkmode_prop(dladm_handle_t handle, prop_desc_t *pdp,
*val_cnt = 1;
return (DLADM_STATUS_OK);
}
+
+/*ARGSUSED*/
+static dladm_status_t
+get_promisc_filtered(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, char **prop_val, uint_t *val_cnt,
+ datalink_media_t media, uint_t flags, uint_t *perm_flags)
+{
+ char *s;
+ dladm_status_t status;
+ boolean_t filt;
+
+ status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags,
+ perm_flags, &filt, sizeof (filt));
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ if (filt != 0)
+ s = link_promisc_filtered_vals[1].vd_name;
+ else
+ s = link_promisc_filtered_vals[0].vd_name;
+ (void) snprintf(prop_val[0], DLADM_STRSIZE, "%s", s);
+
+ *val_cnt = 1;
+ return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static dladm_status_t
+set_promisc_filtered(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags,
+ datalink_media_t media)
+{
+ dld_ioc_macprop_t *dip;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name,
+ 0, &status);
+
+ if (dip == NULL)
+ return (status);
+
+ (void) memcpy(dip->pr_val, &vdp->vd_val, dip->pr_valsize);
+ status = i_dladm_macprop(handle, dip, B_TRUE);
+
+ free(dip);
+ return (status);
+}
diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers
index b781c93aff..3eaeea656e 100644
--- a/usr/src/lib/libdladm/common/mapfile-vers
+++ b/usr/src/lib/libdladm/common/mapfile-vers
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
#
#
@@ -134,6 +135,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dladm_remap_datalink_id;
dladm_up_datalink_id;
dladm_name2info;
+ dladm_zname2info;
dladm_datalink_id2info;
dladm_walk_datalink_id;
dladm_create_conf;
diff --git a/usr/src/lib/libdlpi/common/libdlpi.c b/usr/src/lib/libdlpi/common/libdlpi.c
index 1a639a5db9..aa97afee04 100644
--- a/usr/src/lib/libdlpi/common/libdlpi.c
+++ b/usr/src/lib/libdlpi/common/libdlpi.c
@@ -22,6 +22,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
/*
* Data-Link Provider Interface (Version 2)
@@ -51,7 +54,7 @@
#include "libdlpi_impl.h"
-static int i_dlpi_open(const char *, int *, uint_t, boolean_t);
+static int i_dlpi_open(const char *, const char *, int *, uint_t, boolean_t);
static int i_dlpi_style1_open(dlpi_impl_t *);
static int i_dlpi_style2_open(dlpi_impl_t *);
static int i_dlpi_checkstyle(dlpi_impl_t *, t_uscalar_t);
@@ -130,7 +133,8 @@ dlpi_walk(dlpi_walkfunc_t *fn, void *arg, uint_t flags)
}
int
-dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
+dlpi_open_zone(const char *linkname, const char *zonename, dlpi_handle_t *dhp,
+ uint_t flags)
{
int retval, on = 1;
ifspec_t ifsp;
@@ -164,6 +168,16 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
if (getenv("DLPI_DEVONLY") != NULL)
dip->dli_oflags |= DLPI_DEVONLY;
+ if (zonename == NULL) {
+ dip->dli_zonename[0] = '\0';
+ } else {
+ if (strlcpy(dip->dli_zonename, zonename,
+ sizeof (dip->dli_zonename)) >= sizeof (dip->dli_zonename)) {
+ free(dip);
+ return (DLPI_EZONENAMEINVAL);
+ }
+ }
+
/* Copy linkname provided to the function. */
if (strlcpy(dip->dli_linkname, linkname, sizeof (dip->dli_linkname)) >=
sizeof (dip->dli_linkname)) {
@@ -237,6 +251,12 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
return (DLPI_SUCCESS);
}
+int
+dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
+{
+ return (dlpi_open_zone(linkname, NULL, dhp, flags));
+}
+
void
dlpi_close(dlpi_handle_t dh)
{
@@ -1013,6 +1033,15 @@ dlpi_iftype(uint_t dlpitype)
* /dev - if DLPI_DEVONLY is specified, or if there is no
* data-link with the specified name (could be /dev/ip)
*
+ * If a zone's name has been specified, eg. via dlpi_open_zone, then we instead
+ * will check in:
+ *
+ * /dev/ipnet/zone/%z/ - if DLPI_DEVIPNET is specified
+ * /dev/net/zone/%z/ - if a data-link with the specified name exists.
+ *
+ * When a zone name is specified, all of the fallback procedures that we opt for
+ * in the normal case are not used.
+ *
* In particular, if DLPI_DEVIPNET is not specified, this function is used to
* open a data-link node, or "/dev/ip" node. It is usually be called firstly
* with style1 being B_TRUE, and if that fails and the return value is not
@@ -1040,7 +1069,8 @@ dlpi_iftype(uint_t dlpitype)
* the second style-2 open attempt.
*/
static int
-i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
+i_dlpi_open(const char *provider, const char *zonename, int *fd, uint_t flags,
+ boolean_t style1)
{
char path[MAXPATHLEN];
int oflags;
@@ -1051,7 +1081,13 @@ i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
oflags |= O_EXCL;
if (flags & DLPI_DEVIPNET) {
- (void) snprintf(path, sizeof (path), "/dev/ipnet/%s", provider);
+ if (*zonename != '\0') {
+ (void) snprintf(path, sizeof (path),
+ "/dev/ipnet/zone/%s/%s", zonename, provider);
+ } else {
+ (void) snprintf(path, sizeof (path), "/dev/ipnet/%s",
+ provider);
+ }
if ((*fd = open(path, oflags)) != -1)
return (DLPI_SUCCESS);
else
@@ -1070,7 +1106,13 @@ i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
if (dlpi_parselink(provider, driver, &ppa) != DLPI_SUCCESS)
goto fallback;
- (void) snprintf(path, sizeof (path), "/dev/net/%s", provider);
+ if (*zonename != '\0') {
+ (void) snprintf(path, sizeof (path),
+ "/dev/net/zone/%s/%s", zonename, provider);
+ } else {
+ (void) snprintf(path, sizeof (path), "/dev/net/%s",
+ provider);
+ }
if ((*fd = open(path, oflags)) != -1)
return (DLPI_SUCCESS);
@@ -1130,7 +1172,8 @@ i_dlpi_style1_open(dlpi_impl_t *dip)
int retval, save_errno;
int fd;
- retval = i_dlpi_open(dip->dli_linkname, &fd, dip->dli_oflags, B_TRUE);
+ retval = i_dlpi_open(dip->dli_linkname, dip->dli_zonename, &fd,
+ dip->dli_oflags, B_TRUE);
if (retval != DLPI_SUCCESS)
return (retval);
dip->dli_fd = fd;
@@ -1153,7 +1196,8 @@ i_dlpi_style2_open(dlpi_impl_t *dip)
int fd;
int retval, save_errno;
- retval = i_dlpi_open(dip->dli_provider, &fd, dip->dli_oflags, B_FALSE);
+ retval = i_dlpi_open(dip->dli_provider, dip->dli_zonename, &fd,
+ dip->dli_oflags, B_FALSE);
if (retval != DLPI_SUCCESS)
return (retval);
dip->dli_fd = fd;
@@ -1571,7 +1615,8 @@ static const char *libdlpi_errlist[] = {
/* DLPI_ENOTENOTSUP */
"invalid DLPI notification type", /* DLPI_ENOTEINVAL */
"invalid DLPI notification id", /* DLPI_ENOTEIDINVAL */
- "DLPI_IPNETINFO not supported" /* DLPI_EIPNETINFONOTSUP */
+ "DLPI_IPNETINFO not supported", /* DLPI_EIPNETINFONOTSUP */
+ "invalid zone name" /* DLPI_EZONENAMEINVAL */
};
const char *
diff --git a/usr/src/lib/libdlpi/common/libdlpi.h b/usr/src/lib/libdlpi/common/libdlpi.h
index 993ac1b7a4..364413ee3a 100644
--- a/usr/src/lib/libdlpi/common/libdlpi.h
+++ b/usr/src/lib/libdlpi/common/libdlpi.h
@@ -93,6 +93,7 @@ enum {
DLPI_ENOTENOTSUP, /* DLPI notification not supported by link */
DLPI_ENOTEIDINVAL, /* invalid DLPI notification id */
DLPI_EIPNETINFONOTSUP, /* DLPI_IPNETINFO not supported */
+ DLPI_EZONENAMEINVAL, /* invalid zone name */
DLPI_ERRMAX /* Highest + 1 libdlpi error code */
};
@@ -184,6 +185,7 @@ typedef boolean_t dlpi_walkfunc_t(const char *, void *);
extern void dlpi_walk(dlpi_walkfunc_t *, void *, uint_t);
extern int dlpi_open(const char *, dlpi_handle_t *, uint_t);
+extern int dlpi_open_zone(const char *, const char *, dlpi_handle_t *, uint_t);
extern void dlpi_close(dlpi_handle_t);
extern int dlpi_info(dlpi_handle_t, dlpi_info_t *, uint_t);
extern int dlpi_bind(dlpi_handle_t, uint_t, uint_t *);
diff --git a/usr/src/lib/libdlpi/common/libdlpi_impl.h b/usr/src/lib/libdlpi/common/libdlpi_impl.h
index 70708ff5af..8969cce7cb 100644
--- a/usr/src/lib/libdlpi/common/libdlpi_impl.h
+++ b/usr/src/lib/libdlpi/common/libdlpi_impl.h
@@ -28,6 +28,7 @@
#include <libdlpi.h>
#include <sys/sysmacros.h>
+#include <sys/zone.h>
#ifdef __cplusplus
extern "C" {
@@ -112,6 +113,8 @@ typedef struct dlpi_impl_s {
/* full linkname including PPA */
char dli_provider[DLPI_LINKNAME_MAX];
/* only provider name */
+ char dli_zonename[ZONENAME_MAX];
+ /* optionally specified zone */
t_uscalar_t dli_style; /* style 1 or 2 */
uint_t dli_saplen; /* bound SAP length */
uint_t dli_sap; /* bound SAP value */
diff --git a/usr/src/lib/libdlpi/common/mapfile-vers b/usr/src/lib/libdlpi/common/mapfile-vers
index ed3231dc92..c818e5e660 100644
--- a/usr/src/lib/libdlpi/common/mapfile-vers
+++ b/usr/src/lib/libdlpi/common/mapfile-vers
@@ -67,6 +67,11 @@ SYMBOL_VERSION SUNW_1.1 { # first release of libdlpi, Solaris 11
SYMBOL_VERSION SUNWprivate {
global:
+ #
+ # dlpi_open_zone should be moved to a new public section once it is
+ # upstreamed into illumos-gate .
+ #
+ dlpi_open_zone;
dlpi_parselink;
dlpi_makelink;
dlpi_style;
diff --git a/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java b/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
index 366d83476b..6983e279fa 100644
--- a/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
+++ b/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
@@ -5,9 +5,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -25,9 +25,6 @@ Re-licensed mDNSResponder daemon source code under Apache License, Version 2.0
Revision 1.1 2006/06/20 23:00:12 rpantos
<rdar://problem/3839132> Java needs to implement DNSServiceRegisterRecord equivalent
-
-ident "%Z%%M% %I% %E% SMI"
-
*/
@@ -38,7 +35,7 @@ package com.apple.dnssd;
public interface DNSSDRecordRegistrar extends DNSSDService
{
- /** Register an independent {@link DNSRecord}.<P>
+ /** Register an independent {@link DNSRecord}.<P>
@param flags
Possible values are SHARED or UNIQUE (see flag type definitions for details).
<P>
@@ -55,25 +52,22 @@ public interface DNSSDRecordRegistrar extends DNSSDService
as defined in nameser.h.
<P>
@param rrclass
- The class of the resource record, as defined in nameser.h
+ The class of the resource record, as defined in nameser.h
(usually 1 for the Internet class).
<P>
- @param rData
+ @param rdata
The new rdata as it is to appear in the DNS record.
<P>
@param ttl
The time to live of the resource record, in seconds. Pass 0 to use a default value.
<P>
- @param listener
- This object will get called when the service is registered.
- <P>
@return A {@link DNSSDService} that can be used to abort the record registration.
@throws SecurityException If a security manager is present and denies <tt>RuntimePermission("getDNSSDInstance")</tt>.
@see RuntimePermission
*/
- public DNSRecord registerRecord( int flags, int ifIndex, String fullname, int rrtype,
+ public DNSRecord registerRecord( int flags, int ifIndex, String fullname, int rrtype,
int rrclass, byte[] rdata, int ttl)
throws DNSSDException;
-}
+}
diff --git a/usr/src/lib/libdtrace/Makefile.com b/usr/src/lib/libdtrace/Makefile.com
index 071e9f22b8..42ab0e4b78 100644
--- a/usr/src/lib/libdtrace/Makefile.com
+++ b/usr/src/lib/libdtrace/Makefile.com
@@ -86,6 +86,7 @@ DLIBSRCS += \
io.d \
ip.d \
iscsit.d \
+ mac.d \
net.d \
nfs.d \
nfssrv.d \
@@ -98,7 +99,8 @@ DLIBSRCS += \
sysevent.d \
tcp.d \
udp.d \
- unistd.d
+ unistd.d \
+ vnd.d
include ../../Makefile.lib
@@ -111,6 +113,7 @@ CLEANFILES += dt_lex.c dt_grammar.c dt_grammar.h y.output
CLEANFILES += ../common/procfs.sed ../common/procfs.d
CLEANFILES += ../common/io.sed ../common/io.d
CLEANFILES += ../common/ip.sed ../common/ip.d
+CLEANFILES += ../common/mac.sed ../common/mac.d
CLEANFILES += ../common/net.sed ../common/net.d
CLEANFILES += ../common/errno.d ../common/signal.d
CLEANFILES += ../common/dt_errtags.c ../common/dt_names.c
@@ -132,7 +135,7 @@ CERRWARN += -_gcc=-Wno-uninitialized
CERRWARN += -_gcc=-Wno-switch
YYCFLAGS =
-LDLIBS += -lgen -lproc -lrtld_db -lnsl -lsocket -lctf -lelf -lc
+LDLIBS += -lgen -lproc -lrtld_db -lnsl -lsocket -lctf -lelf -lc -lzonecfg
DRTILDLIBS = $(LDLIBS.lib) -lc
yydebug := YYCFLAGS += -DYYDEBUG
@@ -203,6 +206,9 @@ pics/dt_lex.o pics/dt_grammar.o := CCVERBOSE =
../common/ip.d: ../common/ip.sed ../common/ip.d.in
sed -f ../common/ip.sed < ../common/ip.d.in > $@
+../common/mac.d: ../common/mac.sed ../common/mac.d.in
+ sed -f ../common/mac.sed < ../common/mac.d.in > $@
+
../common/net.d: ../common/net.sed ../common/net.d.in
sed -f ../common/net.sed < ../common/net.d.in > $@
diff --git a/usr/src/lib/libdtrace/common/dt_impl.h b/usr/src/lib/libdtrace/common/dt_impl.h
index c6ed8ceaac..2d824ba640 100644
--- a/usr/src/lib/libdtrace/common/dt_impl.h
+++ b/usr/src/lib/libdtrace/common/dt_impl.h
@@ -267,6 +267,7 @@ struct dtrace_hdl {
uint_t dt_droptags; /* boolean: set via -xdroptags */
uint_t dt_active; /* boolean: set once tracing is active */
uint_t dt_stopped; /* boolean: set once tracing is stopped */
+ uint_t dt_optset; /* boolean: set once options have been set */
processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
processorid_t dt_endedon; /* CPU that executed END probe (if any) */
uint_t dt_oflags; /* dtrace open-time options (see dtrace.h) */
diff --git a/usr/src/lib/libdtrace/common/dt_open.c b/usr/src/lib/libdtrace/common/dt_open.c
index ffb2fe9a8f..7edeac8f10 100644
--- a/usr/src/lib/libdtrace/common/dt_open.c
+++ b/usr/src/lib/libdtrace/common/dt_open.c
@@ -40,6 +40,7 @@
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
+#include <zone.h>
#define _POSIX_PTHREAD_SEMANTICS
#include <dirent.h>
@@ -683,8 +684,8 @@ const dtrace_pattr_t _dtrace_prvdesc = {
{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON },
};
-const char *_dtrace_defcpp = "/usr/ccs/lib/cpp"; /* default cpp(1) to invoke */
-const char *_dtrace_defld = "/usr/ccs/bin/ld"; /* default ld(1) to invoke */
+const char *_dtrace_defcpp = "/usr/lib/cpp"; /* default cpp(1) to invoke */
+const char *_dtrace_defld = "/usr/bin/ld"; /* default ld(1) to invoke */
const char *_dtrace_libdir = "/usr/lib/dtrace"; /* default library directory */
const char *_dtrace_provdir = "/dev/dtrace/provider"; /* provider directory */
@@ -824,6 +825,8 @@ dt_vopen(int version, int flags, int *errp,
dt_provmod_t *provmod = NULL;
int i, err;
struct rlimit rl;
+ const char *zroot;
+ char *libpath = NULL;
const dt_intrinsic_t *dinp;
const dt_typedef_t *dtyp;
@@ -956,11 +959,19 @@ alloc:
dtp->dt_provs = calloc(dtp->dt_provbuckets, sizeof (dt_provider_t *));
dt_proc_init(dtp);
dtp->dt_vmax = DT_VERS_LATEST;
- dtp->dt_cpp_path = strdup(_dtrace_defcpp);
+ zroot = zone_get_nroot();
+ if (zroot != NULL) {
+ (void) asprintf(&dtp->dt_ld_path, "%s/%s", zroot,
+ _dtrace_defld);
+ (void) asprintf(&dtp->dt_cpp_path, "%s/%s", zroot,
+ _dtrace_defcpp);
+ } else {
+ dtp->dt_ld_path = strdup(_dtrace_defld);
+ dtp->dt_cpp_path = strdup(_dtrace_defcpp);
+ }
dtp->dt_cpp_argv = malloc(sizeof (char *));
dtp->dt_cpp_argc = 1;
dtp->dt_cpp_args = 1;
- dtp->dt_ld_path = strdup(_dtrace_defld);
dtp->dt_provmod = provmod;
dtp->dt_vector = vector;
dtp->dt_varg = arg;
@@ -1305,9 +1316,15 @@ alloc:
* compile, and to provide better error reporting (because the full
* reporting of compiler errors requires dtrace_open() to succeed).
*/
- if (dtrace_setopt(dtp, "libdir", _dtrace_libdir) != 0)
+ if (zroot != NULL)
+ (void) asprintf(&libpath, "%s/%s", zroot, _dtrace_libdir);
+ if (dtrace_setopt(dtp, "libdir",
+ libpath != NULL ? libpath : _dtrace_libdir) != 0)
return (set_open_errno(dtp, errp, dtp->dt_errno));
+ if (libpath != NULL)
+ free(libpath);
+
return (dtp);
}
diff --git a/usr/src/lib/libdtrace/common/dt_options.c b/usr/src/lib/libdtrace/common/dt_options.c
index 201b50a177..be985d6dab 100644
--- a/usr/src/lib/libdtrace/common/dt_options.c
+++ b/usr/src/lib/libdtrace/common/dt_options.c
@@ -41,6 +41,8 @@
#include <alloca.h>
#include <errno.h>
#include <fcntl.h>
+#include <zone.h>
+#include <libzonecfg.h>
#include <dt_impl.h>
#include <dt_string.h>
@@ -854,6 +856,44 @@ dt_opt_bufresize(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
return (0);
}
+/*ARGSUSED*/
+static int
+dt_opt_zone(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
+{
+ zoneid_t z, did;
+
+ if (arg == NULL)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+
+ /*
+ * If the specified zone is currently running, we'll query the kernel
+ * for its debugger ID. If it doesn't appear to be running, we'll look
+ * for it for among all installed zones (thereby allowing a zdefs
+ * enabling against a halted zone).
+ */
+ if ((z = getzoneidbyname(arg)) != -1) {
+ if (zone_getattr(z, ZONE_ATTR_DID, &did, sizeof (did)) < 0)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+ } else {
+ zone_dochandle_t handle;
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return (dt_set_errno(dtp, errno));
+
+ if (zonecfg_get_handle(arg, handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+ }
+
+ did = zonecfg_get_did(handle);
+ zonecfg_fini_handle(handle);
+ }
+
+ dtp->dt_options[DTRACEOPT_ZONE] = did;
+
+ return (0);
+}
+
int
dt_options_load(dtrace_hdl_t *dtp)
{
@@ -988,6 +1028,7 @@ static const dt_option_t _dtrace_rtoptions[] = {
{ "statusrate", dt_opt_rate, DTRACEOPT_STATUSRATE },
{ "strsize", dt_opt_strsize, DTRACEOPT_STRSIZE },
{ "ustackframes", dt_opt_runtime, DTRACEOPT_USTACKFRAMES },
+ { "zone", dt_opt_zone, DTRACEOPT_ZONE },
{ "temporal", dt_opt_runtime, DTRACEOPT_TEMPORAL },
{ NULL }
};
@@ -1068,9 +1109,41 @@ dtrace_setopt(dtrace_hdl_t *dtp, const char *opt, const char *val)
if (dtp->dt_active)
return (dt_set_errno(dtp, EDT_ACTIVE));
+ /*
+ * If our options had been previously ioctl'd down,
+ * clear dt_optset to indicate that a run-time option
+ * has since been set.
+ */
+ dtp->dt_optset = B_FALSE;
+
return (op->o_func(dtp, val, op->o_option));
}
}
return (dt_set_errno(dtp, EDT_BADOPTNAME));
}
+
+int
+dtrace_setopts(dtrace_hdl_t *dtp)
+{
+ void *dof;
+ int err;
+
+ if (dtp->dt_optset)
+ return (0);
+
+ if ((dof = dtrace_getopt_dof(dtp)) == NULL)
+ return (-1); /* dt_errno has been set for us */
+
+ if ((err = dt_ioctl(dtp, DTRACEIOC_ENABLE, dof)) == -1)
+ (void) dt_set_errno(dtp, errno);
+
+ dtrace_dof_destroy(dtp, dof);
+
+ if (err == -1)
+ return (-1);
+
+ dtp->dt_optset = B_TRUE;
+
+ return (0);
+}
diff --git a/usr/src/lib/libdtrace/common/dt_program.c b/usr/src/lib/libdtrace/common/dt_program.c
index 7d725bd0af..e4f9d8dd1c 100644
--- a/usr/src/lib/libdtrace/common/dt_program.c
+++ b/usr/src/lib/libdtrace/common/dt_program.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
@@ -154,6 +155,14 @@ dtrace_program_exec(dtrace_hdl_t *dtp, dtrace_prog_t *pgp,
void *dof;
int n, err;
+ /*
+ * If we have not yet ioctl'd down our options DOF, we'll do that
+ * before enabling any probes (some options will affect which probes
+ * we match).
+ */
+ if (dtrace_setopts(dtp) != 0)
+ return (-1);
+
dtrace_program_info(dtp, pgp, pip);
if ((dof = dtrace_dof_create(dtp, pgp, DTRACE_D_STRIP)) == NULL)
diff --git a/usr/src/lib/libdtrace/common/dt_work.c b/usr/src/lib/libdtrace/common/dt_work.c
index 97a7f62d69..c330394027 100644
--- a/usr/src/lib/libdtrace/common/dt_work.c
+++ b/usr/src/lib/libdtrace/common/dt_work.c
@@ -25,7 +25,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ */
#include <dt_impl.h>
#include <stddef.h>
@@ -164,13 +166,22 @@ dtrace_status(dtrace_hdl_t *dtp)
int
dtrace_go(dtrace_hdl_t *dtp)
{
- void *dof;
- int err;
-
if (dtp->dt_active)
return (dt_set_errno(dtp, EINVAL));
/*
+ * In most cases, we will have already ioctl'd down our options DOF
+ * by this point -- but if a libdtrace does a dtrace_setopt() after
+ * calling dtrace_program_exec() but before calling dtrace_go(),
+ * dt_optset will be cleared and we need to ioctl down the options
+ * DOF now.
+ */
+ if (dtrace_setopts(dtp) != 0 &&
+ (dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL)) {
+ return (-1);
+ }
+
+ /*
* If a dtrace:::ERROR program and callback are registered, enable the
* program before we start tracing. If this fails for a vector open
* with ENOTTY, we permit dtrace_go() to succeed so that vector clients
@@ -178,19 +189,10 @@ dtrace_go(dtrace_hdl_t *dtp)
* though they do not provide support for the DTRACEIOC_ENABLE ioctl.
*/
if (dtp->dt_errprog != NULL &&
- dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 && (
- dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL))
- return (-1); /* dt_errno has been set for us */
-
- if ((dof = dtrace_getopt_dof(dtp)) == NULL)
+ dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 &&
+ (dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL))
return (-1); /* dt_errno has been set for us */
- err = dt_ioctl(dtp, DTRACEIOC_ENABLE, dof);
- dtrace_dof_destroy(dtp, dof);
-
- if (err == -1 && (errno != ENOTTY || dtp->dt_vector == NULL))
- return (dt_set_errno(dtp, errno));
-
if (dt_ioctl(dtp, DTRACEIOC_GO, &dtp->dt_beganon) == -1) {
if (errno == EACCES)
return (dt_set_errno(dtp, EDT_DESTRUCTIVE));
diff --git a/usr/src/lib/libdtrace/common/dtrace.h b/usr/src/lib/libdtrace/common/dtrace.h
index 065d99f300..8c65ff8bec 100644
--- a/usr/src/lib/libdtrace/common/dtrace.h
+++ b/usr/src/lib/libdtrace/common/dtrace.h
@@ -82,6 +82,7 @@ extern const char *dtrace_subrstr(dtrace_hdl_t *, int);
extern int dtrace_setopt(dtrace_hdl_t *, const char *, const char *);
extern int dtrace_getopt(dtrace_hdl_t *, const char *, dtrace_optval_t *);
+extern int dtrace_setopts(dtrace_hdl_t *);
extern void dtrace_update(dtrace_hdl_t *);
extern int dtrace_ctlfd(dtrace_hdl_t *);
diff --git a/usr/src/lib/libdtrace/common/mac.d.in b/usr/src/lib/libdtrace/common/mac.d.in
new file mode 100644
index 0000000000..6263d51bdd
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/mac.d.in
@@ -0,0 +1,66 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#pragma D depends_on library ip.d
+
+inline int ETHERTYPE_PUP = @ETHERTYPE_PUP@;
+inline int ETHERTYPE_802_MIN = @ETHERTYPE_802_MIN@;
+inline int ETHERTYPE_IP = @ETHERTYPE_IP@;
+inline int ETHERTYPE_ARP = @ETHERTYPE_ARP@;
+inline int ETHERTYPE_REVARP = @ETHERTYPE_REVARP@;
+inline int ETHERTYPE_AT = @ETHERTYPE_AT@;
+inline int ETHERTYPE_AARP = @ETHERTYPE_AARP@;
+inline int ETHERTYPE_VLAN = @ETHERTYPE_VLAN@;
+inline int ETHERTYPE_IPV6 = @ETHERTYPE_IPV6@;
+inline int ETHERTYPE_SLOW = @ETHERTYPE_SLOW@;
+inline int ETHERTYPE_PPPOED = @ETHERTYPE_PPPOED@;
+inline int ETHERTYPE_PPPOES = @ETHERTYPE_PPPOES@;
+inline int ETHERTYPE_EAPOL = @ETHERTYPE_EAPOL@;
+inline int ETHERTYPE_RSN_PREAUTH = @ETHERTYPE_RSN_PREAUTH@;
+inline int ETHERTYPE_TRILL = @ETHERTYPE_TRILL@;
+inline int ETHERTYPE_FCOE = @ETHERTYPE_FCOE@;
+inline int ETHERTYPE_MAX = @ETHERTYPE_MAX@;
+
+
+typedef struct etherinfo {
+ uint8_t eth_dst[6]; /* Destination MAC addr */
+ uint8_t eth_src[6]; /* Source MAC addr */
+ uint16_t eth_type; /* Ethertype */
+ boolean_t eth_istagged; /* Is the VLAN tag present */
+ uint8_t eth_priority; /* Priority tag */
+ uint8_t eth_dei; /* drop eligible indicator */
+ uint16_t eth_vlanid; /* VLAN ID */
+ uintptr_t eth_header; /* Pointer to start of header */
+ uintptr_t eth_mblk; /* Pointer to the mblk containing header */
+} etherinfo_t;
+
+#pragma D binding "1.12.1" translator
+translator etherinfo_t < mblk_t *mp > {
+ eth_dst = mp->b_rptr;
+ eth_src = mp->b_rptr + 6;
+ eth_type = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 16)) :
+ ntohs(*(uint16_t *)(mp->b_rptr + 12));
+ eth_istagged = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ 1 : 0;
+ eth_priority = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0xe000: 0;
+ eth_dei = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0x1000: 0;
+ eth_vlanid = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0x0fff: 0;
+ eth_header = (uintptr_t)mp->b_rptr;
+ eth_mblk = (uintptr_t)mp;
+};
diff --git a/usr/src/lib/libdtrace/common/mac.sed.in b/usr/src/lib/libdtrace/common/mac.sed.in
new file mode 100644
index 0000000000..00e149d000
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/mac.sed.in
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+/*
+ * This file is a sed script which is first preprocessed by cpp or cc -E to
+ * define a set of sed directives which replace #define tokens with their
+ * values. After preprocessing, the sed script is run over vnd.d.in to
+ * replace the #define tokens listed below to create the finished vnd.d.
+ * Refer to the rules in libdtrace/Makefile.com for more information.
+ */
+
+#include <sys/ethernet.h>
+
+#define SED_REPLACE(x) s/#x/x/g
+
+SED_REPLACE(ETHERTYPE_PUP)
+SED_REPLACE(ETHERTYPE_802_MIN)
+SED_REPLACE(ETHERTYPE_IP)
+SED_REPLACE(ETHERTYPE_ARP)
+SED_REPLACE(ETHERTYPE_REVARP)
+SED_REPLACE(ETHERTYPE_AT)
+SED_REPLACE(ETHERTYPE_AARP)
+SED_REPLACE(ETHERTYPE_VLAN)
+SED_REPLACE(ETHERTYPE_IPV6)
+SED_REPLACE(ETHERTYPE_SLOW)
+SED_REPLACE(ETHERTYPE_PPPOED)
+SED_REPLACE(ETHERTYPE_PPPOES)
+SED_REPLACE(ETHERTYPE_EAPOL)
+SED_REPLACE(ETHERTYPE_RSN_PREAUTH)
+SED_REPLACE(ETHERTYPE_TRILL)
+SED_REPLACE(ETHERTYPE_FCOE)
+SED_REPLACE(ETHERTYPE_MAX)
diff --git a/usr/src/lib/libdtrace/common/vnd.d b/usr/src/lib/libdtrace/common/vnd.d
new file mode 100644
index 0000000000..356c412150
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/vnd.d
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+#pragma D depends_on module vnd
+#pragma D depends_on provider vnd
+#pragma D depends_on library ip.d
+#pragma D depends_on library mac.d
+
+#pragma D binding "1.6.3" translator
+translator ifinfo_t < vnd_str_t *vsp > {
+ if_name = vsp != NULL ? stringof(vsp->vns_dev->vdd_lname) : "<null>";
+ if_local = 0;
+ if_ipstack = vsp != NULL ? vsp->vns_nsd->vpnd_nsid : 0;
+ if_addr = (uintptr_t)vsp;
+};
diff --git a/usr/src/lib/libgrubmgmt/common/libgrub_fs.c b/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
index d53b89761a..ff2884b6b3 100644
--- a/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
+++ b/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
@@ -21,6 +21,8 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
diff --git a/usr/src/lib/libipadm/common/libipadm.c b/usr/src/lib/libipadm/common/libipadm.c
index 21aeab72ba..e897771556 100644
--- a/usr/src/lib/libipadm/common/libipadm.c
+++ b/usr/src/lib/libipadm/common/libipadm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -283,11 +284,19 @@ ipadm_close(ipadm_handle_t iph)
boolean_t
ipadm_check_auth(void)
{
+ int uid;
struct passwd pwd;
char buf[NSS_BUFLEN_PASSWD];
+ /*
+ * Branded zones may have different kinds of auth, but root always
+ * allowed.
+ */
+ if ((uid = getuid()) == 0)
+ return (B_TRUE);
+
/* get the password entry for the given user ID */
- if (getpwuid_r(getuid(), &pwd, buf, sizeof (buf)) == NULL)
+ if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) == NULL)
return (B_FALSE);
/* check for presence of given authorization */
diff --git a/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com b/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
index 76ecd38479..42e2d976fd 100644
--- a/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
+++ b/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
@@ -37,8 +37,8 @@ KMFINC= -I../../../include -I../../../ber_der/inc
BERLIB= -lkmf -lkmfberder
BERLIB64= $(BERLIB)
-OPENSSLLIBS= $(BERLIB) -lcrypto -lcryptoutil -lc
-OPENSSLLIBS64= $(BERLIB64) -lcrypto -lcryptoutil -lc
+OPENSSLLIBS= $(BERLIB) -lsunw_crypto -lcryptoutil -lc
+OPENSSLLIBS64= $(BERLIB64) -lsunw_crypto -lcryptoutil -lc
LINTSSLLIBS = $(BERLIB) -lcrypto -lcryptoutil -lc
LINTSSLLIBS64 = $(BERLIB64) -lcrypto -lcryptoutil -lc
diff --git a/usr/src/lib/libnisdb/db_mindex3.cc b/usr/src/lib/libnisdb/db_mindex3.cc
index 886487f8c5..28182544a4 100644
--- a/usr/src/lib/libnisdb/db_mindex3.cc
+++ b/usr/src/lib/libnisdb/db_mindex3.cc
@@ -282,7 +282,7 @@ entriesFromLDAPthread(void *voidarg) {
/* Lock to prevent removal */
(void) __nis_lock_db_table(arg->tableName, 1, 0,
- "entriesFromLDAPthread");
+ (char *)"entriesFromLDAPthread");
/*
* It's possible that the db_mindex for the table has changed,
@@ -314,7 +314,7 @@ entriesFromLDAPthread(void *voidarg) {
stat = entriesFromLDAPreal(arg);
(void) __nis_ulock_db_table(arg->tableName, 1, 0,
- "entriesFromLDAPthread");
+ (char *)"entriesFromLDAPthread");
freeQuery(arg->q);
if (arg->dirObj != 0)
diff --git a/usr/src/lib/libnisdb/db_table.cc b/usr/src/lib/libnisdb/db_table.cc
index 2995f1a9c9..8044148355 100644
--- a/usr/src/lib/libnisdb/db_table.cc
+++ b/usr/src/lib/libnisdb/db_table.cc
@@ -599,7 +599,7 @@ db_table::setEntryExp(entryp where, entry_obj *obj, int initialLoad) {
if (o != 0) {
__nis_buffer_t b = {0, 0};
- bp2buf(myself, &b, "%s.%s",
+ bp2buf(myself, &b, (char *)"%s.%s",
o->zo_name, o->zo_domain);
t = getObjMapping(b.buf, 0, 1, 0, 0);
sfree(b.buf);
@@ -967,7 +967,7 @@ db_table::setEnumMode(long enumNum) {
if (stat != DB_SUCCESS) {
enumMode.flag = 0;
enumCount.flag = 0;
- logmsg(MSG_NOTIMECHECK, LOG_ERR,
+ logmsg(MSG_NOTIMECHECK, LOG_ERR, (char *)
"%s: No memory for enum check array; entry removal disabled",
myself);
}
diff --git a/usr/src/lib/libnisdb/nis_db.cc b/usr/src/lib/libnisdb/nis_db.cc
index 2908cc99b0..b27889b147 100644
--- a/usr/src/lib/libnisdb/nis_db.cc
+++ b/usr/src/lib/libnisdb/nis_db.cc
@@ -527,7 +527,7 @@ dbFindObject(char *objName, db_status *statP) {
/* If not the root dir, find the directory where the entry lives */
sfree(table);
- name = entryName(myself, objName, &table);
+ name = entryName((char *)myself, objName, &table);
if (name == 0 || table == 0) {
sfree(name);
RETSTAT(0, DB_MEMORY_LIMIT);
@@ -737,7 +737,7 @@ dbDeleteObj(char *objName) {
nod->objType = o->zo_data.zo_type;
nis_destroy_object(o);
- nod->objName = sdup(myself, T, objName);
+ nod->objName = sdup((char *)myself, T, objName);
if (nod->objName == 0) {
sfree(nod);
return (DB_MEMORY_LIMIT);
@@ -789,7 +789,7 @@ dbTouchObj(char *objName) {
sfree(table);
table = 0;
- ent = entryName(myself, objName, &table);
+ ent = entryName((char *)myself, objName, &table);
if (ent == 0 || table == 0) {
sfree(ent);
return (DB_MEMORY_LIMIT);
@@ -989,7 +989,7 @@ dbRefreshObj(char *name, nis_object *o) {
int lstat;
/* Find parent */
- ent = entryName(myself, objName, &table);
+ ent = entryName((char *)myself, objName, &table);
if (ent == 0 || table == 0) {
sfree(b.buf);
sfree(objTable);
diff --git a/usr/src/lib/libnsl/nss/getipnodeby.c b/usr/src/lib/libnsl/nss/getipnodeby.c
index 112b54ec5a..f84b641157 100644
--- a/usr/src/lib/libnsl/nss/getipnodeby.c
+++ b/usr/src/lib/libnsl/nss/getipnodeby.c
@@ -73,6 +73,8 @@
* IPNODE_LOOKUPIPNODES getipnodebyname() needs to lookup the name in ipnodes.
* IPNODE_LOOKUPHOSTS getipnodebyname() needs to lookup the name in hosts.
* IPNODE_ISLITERAL The name supplied is a literal address string.
+ * IPNODE_UNMAP The user doesn't want v4 mapped addresses if no IPv6
+ * interfaces are plumbed on the system.
*/
#define IPNODE_WANTIPV6 0x00000001u
#define IPNODE_WANTIPV4 0x00000002u
@@ -80,9 +82,16 @@
#define IPNODE_LOOKUPIPNODES 0x00000008u
#define IPNODE_LOOKUPHOSTS 0x00000010u
#define IPNODE_LITERAL 0x00000020u
+#define IPNODE_UNMAP 0x00000040u
#define IPNODE_IPV4 (IPNODE_WANTIPV4 | IPNODE_IPV4IFNOIPV6)
/*
+ * The private flag between libsocket and libnsl. See
+ * lib/libsocket/inet/getaddrinfo.c for more information.
+ */
+#define AI_ADDRINFO 0x8000
+
+/*
* The default set of bits corresponding to a getipnodebyname() flags
* argument of AI_DEFAULT.
*/
@@ -162,6 +171,10 @@ getipnodebyname_processflags(const char *name, int af, int flags)
} else if (flags & AI_ALL) {
ipnode_bits &= ~IPNODE_IPV4IFNOIPV6;
}
+ if ((flags & AI_ADDRCONFIG) && !ipv6configured &&
+ (flags & AI_ADDRINFO)) {
+ ipnode_bits |= IPNODE_UNMAP;
+ }
} else {
ipnode_bits &= ~IPNODE_IPV4;
}
@@ -343,7 +356,18 @@ getipnodebyname(const char *name, int af, int flags, int *error_num)
else if (!(ipnode_bits & IPNODE_WANTIPV6))
hp = __filter_addresses(AF_INET6, buf6->result);
- if (hp == NULL)
+ /*
+ * We've been asked to unmap v4 addresses. This
+ * situation implies IPNODE_WANTIPV4 and
+ * !IPNODE_WANTIPV6.
+ */
+ if (hp != NULL && (ipnode_bits & IPNODE_UNMAP)) {
+ /*
+ * Just set hp to a new value, cleanup: will
+ * free the old one
+ */
+ hp = __mappedtov4(hp, error_num);
+ } else if (hp == NULL)
*error_num = NO_ADDRESS;
}
diff --git a/usr/src/lib/libpkg/Makefile.com b/usr/src/lib/libpkg/Makefile.com
index 8f56a61423..0106d8288f 100644
--- a/usr/src/lib/libpkg/Makefile.com
+++ b/usr/src/lib/libpkg/Makefile.com
@@ -64,7 +64,7 @@ $(LINTLIB):= SRCS = $(SRCDIR)/$(LINTSRC)
LIBS = $(DYNLIB) $(LINTLIB)
-LDLIBS += -lc -lssl -lwanboot -lcrypto -lscf -ladm
+LDLIBS += -lc -lsunw_ssl -lwanboot -lsunw_crypto -lscf -ladm
CFLAGS += $(CCVERBOSE)
CERRWARN += -_gcc=-Wno-unused-label
diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c
index 751c0c3f8a..bde48d1416 100644
--- a/usr/src/lib/libproc/common/Pcontrol.c
+++ b/usr/src/lib/libproc/common/Pcontrol.c
@@ -337,10 +337,16 @@ static const ps_ops_t P_live_ops = {
void
_libproc_init(void)
{
+ const char *root;
+
_libproc_debug = getenv("LIBPROC_DEBUG") != NULL;
_libproc_no_qsort = getenv("LIBPROC_NO_QSORT") != NULL;
_libproc_incore_elf = getenv("LIBPROC_INCORE_ELF") != NULL;
+ if ((root = zone_get_nroot()) != NULL)
+ (void) snprintf(procfs_path, sizeof (procfs_path), "%s/proc",
+ root);
+
(void) sigfillset(&blockable_sigs);
(void) sigdelset(&blockable_sigs, SIGKILL);
(void) sigdelset(&blockable_sigs, SIGSTOP);
diff --git a/usr/src/lib/libproc/common/Pzone.c b/usr/src/lib/libproc/common/Pzone.c
index 3c4999bbe4..c8e2bfcd02 100644
--- a/usr/src/lib/libproc/common/Pzone.c
+++ b/usr/src/lib/libproc/common/Pzone.c
@@ -28,6 +28,9 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include <assert.h>
#include <dlfcn.h>
#include <errno.h>
diff --git a/usr/src/lib/libresolv2_joy/Makefile b/usr/src/lib/libresolv2_joy/Makefile
new file mode 100644
index 0000000000..052dfdd092
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/Makefile
@@ -0,0 +1,76 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+
+include ../../Makefile.master
+include ../Makefile.lib
+
+$(ROOTSVCMETHOD) := FILEMODE = 0555
+
+SUBDIRS= include $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+LIBRARY= libresolv_joy.a
+TEXT_DOMAIN= SUNW_OST_OSLIB
+XGETFLAGS= -a
+POFILE= $(LIBRARY:.a=.po)
+POFILES= generic.po
+
+SED= sed
+GREP= grep
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS)
+
+check: $(CHECKHDRS) $(CHKMANIFEST)
+
+_msg: $(MSGDOMAIN) $(POFILE)
+ $(RM) $(MSGDOMAIN)/$(POFILE)
+ $(CP) $(POFILE) $(MSGDOMAIN)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(CAT) $(POFILES) > $@
+
+$(POFILES):
+ $(RM) messages.po
+ $(XGETTEXT) $(XGETFLAGS) *.[ch]* */*.[ch]*
+ $(SED) -e '/^# msg/d' -e '/^domain/d' messages.po > $@
+ $(RM) messages.po
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET); echo
+
+FRC:
+
diff --git a/usr/src/lib/libresolv2_joy/Makefile.com b/usr/src/lib/libresolv2_joy/Makefile.com
new file mode 100644
index 0000000000..bbabcbcede
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/Makefile.com
@@ -0,0 +1,162 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+LIBRARY= libresolv_joy.a
+VERS= .2
+
+BSDOBJS= daemon.o putenv.o strcasecmp.o strsep.o \
+ ftruncate.o readv.o strdup.o strtoul.o \
+ gettimeofday.o setenv.o strerror.o utimes.o \
+ mktemp.o setitimer.o strpbrk.o writev.o
+
+DSTOBJS= dst_api.o support.o hmac_link.o
+
+# inet_addr, inet_pton, inet_ntop, and inet_ntoa removed due to overlap with
+# libnsl
+INETOBJS= inet_net_pton.o inet_neta.o inet_lnaof.o \
+ inet_netof.o nsap_addr.o inet_makeaddr.o \
+ inet_network.o inet_net_ntop.o inet_cidr_ntop.o \
+ inet_cidr_pton.o inet_data.o
+
+# build only the IRS objects that the ISC libbind's make would
+IRSTHROBJS= gethostent_r.o getnetent_r.o getnetgrent_r.o \
+ getprotoent_r.o getservent_r.o
+IRSOBJS= ${IRSTHROBJS} \
+ dns.o dns_ho.o dns_nw.o dns_pr.o \
+ dns_sv.o gai_strerror.o gen.o gen_ho.o \
+ gen_ng.o gen_nw.o gen_pr.o gen_sv.o \
+ getaddrinfo.o gethostent.o getnameinfo.o getnetent.o \
+ getnetgrent.o getprotoent.o getservent.o hesiod.o \
+ irp.o irp_ho.o irp_ng.o irp_nw.o \
+ irp_pr.o irp_sv.o irpmarshall.o irs_data.o \
+ lcl.o lcl_ho.o lcl_ng.o lcl_nw.o \
+ lcl_pr.o lcl_sv.o nis.o nul_ng.o \
+ util.o
+
+ISCOBJS= assertions.o base64.o bitncmp.o ctl_clnt.o \
+ ctl_p.o ctl_srvr.o ev_connects.o ev_files.o \
+ ev_streams.o ev_timers.o ev_waits.o eventlib.o \
+ heap.o hex.o logging.o memcluster.o \
+ movefile.o tree.o
+
+NAMESEROBJS= ns_date.o ns_name.o ns_netint.o ns_parse.o \
+ ns_print.o ns_samedomain.o ns_sign.o ns_ttl.o \
+ ns_verify.o ns_rdata.o ns_newmsg.o
+
+RESOLVOBJS= herror.o mtctxres.o res_comp.o res_data.o \
+ res_debug.o res_findzonecut.o res_init.o \
+ res_mkquery.o res_mkupdate.o res_query.o res_send.o \
+ res_sendsigned.o res_update.o
+
+SUNWOBJS= sunw_mtctxres.o sunw_updrec.o sunw_wrappers.o
+
+OBJECTS= $(BSDOBJS) $(DSTOBJS) $(INETOBJS) $(IRSOBJS) $(ISCOBJS) \
+ $(NAMESEROBJS) $(RESOLVOBJS) $(SUNWOBJS)
+
+# include library definitions
+include ../../Makefile.lib
+
+# install this library in the root filesystem
+include ../../Makefile.rootfs
+
+# CC -v complains about things we aren't going to change in the ISC code
+CCVERBOSE=
+
+SRCDIR = ../common
+SRCS= $(BSDOBJS:%.o=../common/bsd/%.c) \
+ $(DSTOBJS:%.o=../common/dst/%.c) \
+ $(INETOBJS:%.o=../common/inet/%.c) \
+ $(IRSOBJS:%.o=../common/irs/%.c) \
+ $(ISCOBJS:%.o=../common/isc/%.c) \
+ $(NAMESEROBJS:%.o=../common/nameser/%.c) \
+ $(RESOLVOBJS:%.o=../common/resolv/%.c) \
+ $(SUNWOBJS:%.o=../common/sunw/%.c)
+
+LIBS = $(DYNLIB) $(LINTLIB)
+
+$(LINTLIB):= SRCS = ../common/llib-lresolv_joy
+
+# Local Libresolv definitions
+
+SOLCOMPAT = -Dsocket=_socket
+CRYPTFLAGS= -DHMAC_MD5 -DUSE_MD5
+
+LOCFLAGS += $(CRYPTFLAGS)
+LOCFLAGS += -D_SYS_STREAM_H -D_REENTRANT -DSVR4 -DSUNW_OPTIONS \
+ $(SOLCOMPAT) -I../include -I../../common/inc
+
+CPPFLAGS += $(LOCFLAGS)
+
+CERRWARN += -_gcc=-Wno-implicit-function-declaration
+
+DYNFLAGS += $(ZNODELETE)
+
+LDLIBS += -lsocket -lnsl -lc -lmd
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+# include library targets
+include ../../Makefile.targ
+
+pics/%.o: ../common/bsd/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/dst/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/inet/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/irs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/isc/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/nameser/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/resolv/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/sunw/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+# install rule for lint library target
+$(ROOTLINTDIR)/%: ../common/%
+ $(INS.file)
diff --git a/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..719d77fea2
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE
@@ -0,0 +1,185 @@
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+
+ * Copyright (c) 1983, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+
+/* Copyright (C) RSA Data Security, Inc. created 1986-1987, 1990,
+ 1992-1994, 1996. This is an
+ unpublished work protected as such under copyright law. This work
+ contains proprietary, confidential, and trade secret information of
+ RSA Data Security, Inc. Use, disclosure or reproduction without the
+ express written authorization of RSA Data Security, Inc. is
+ prohibited.
+ */
+
+ DNSSAFE LICENSE TERMS
+
+ This BIND software includes the DNSsafe software from RSA Data
+ Security, Inc., which is copyrighted software that can only be
+ distributed under the terms of this license agreement.
+
+ The DNSsafe software cannot be used or distributed separately from the
+ BIND software. You only have the right to use it or distribute it as
+ a bundled, integrated product.
+
+ The DNSsafe software can ONLY be used to provide authentication for
+ resource records in the Domain Name System, as specified in RFC 2065
+ and successors. You cannot modify the BIND software to use the
+ DNSsafe software for other purposes, or to make its cryptographic
+ functions available to end-users for other uses.
+
+ If you modify the DNSsafe software itself, you cannot modify its
+ documented API, and you must grant RSA Data Security the right to use,
+ modify, and distribute your modifications, including the right to use
+ any patents or other intellectual property that your modifications
+ depend upon.
+
+ You must not remove, alter, or destroy any of RSA's copyright notices
+ or license information. When distributing the software to the Federal
+ Government, it must be licensed to them as "commercial computer
+ software" protected under 48 CFR 12.212 of the FAR, or 48 CFR
+ 227.7202.1 of the DFARS.
+
+ You must not violate United States export control laws by distributing
+ the DNSsafe software or information about it, when such distribution
+ is prohibited by law.
+
+ THE DNSSAFE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY
+ WHATSOEVER. RSA HAS NO OBLIGATION TO SUPPORT, CORRECT, UPDATE OR
+ MAINTAIN THE RSA SOFTWARE. RSA DISCLAIMS ALL WARRANTIES, EXPRESS,
+ IMPLIED OR STATUTORY, AS TO ANY MATTER WHATSOEVER, INCLUDING ALL
+ IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE AND NON-INFRINGEMENT OF THIRD PARTY RIGHTS.
+
+ If you desire to use DNSsafe in ways that these terms do not permit,
+ please contact RSA Data Security, Inc., 100 Marine Parkway, Redwood
+ City, California 94065, USA, to discuss alternate licensing
+ arrangements.
+
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by WIDE Project and
+ * its contributors.
+ * 4. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
diff --git a/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..67315d8f33
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+BIND SOFTWARE
diff --git a/usr/src/lib/libresolv2_joy/amd64/Makefile b/usr/src/lib/libresolv2_joy/amd64/Makefile
new file mode 100644
index 0000000000..2e8cdecf75
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/daemon.c b/usr/src/lib/libresolv2_joy/common/bsd/daemon.c
new file mode 100644
index 0000000000..54ff83b753
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/daemon.c
@@ -0,0 +1,81 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)daemon.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: daemon.c,v 1.2 2005/04/27 04:56:10 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <fcntl.h>
+#include <paths.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+#ifndef NEED_DAEMON
+int __bind_daemon__;
+#else
+
+int
+daemon(int nochdir, int noclose) {
+ int fd;
+
+ switch (fork()) {
+ case -1:
+ return (-1);
+ case 0:
+ break;
+ default:
+ _exit(0);
+ }
+
+ if (setsid() == -1)
+ return (-1);
+
+ if (!nochdir)
+ (void)chdir("/");
+
+ if (!noclose && (fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) {
+ (void)dup2(fd, STDIN_FILENO);
+ (void)dup2(fd, STDOUT_FILENO);
+ (void)dup2(fd, STDERR_FILENO);
+ if (fd > 2)
+ (void)close (fd);
+ }
+ return (0);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c b/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c
new file mode 100644
index 0000000000..5ac4ebac9b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c
@@ -0,0 +1,64 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: ftruncate.c,v 1.3 2005/04/27 18:16:45 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * ftruncate - set file size, BSD Style
+ *
+ * shortens or enlarges the file as neeeded
+ * uses some undocumented locking call. It is known to work on SCO unix,
+ * other vendors should try.
+ * The #error directive prevents unsupported OSes
+ */
+
+#include "port_before.h"
+
+#if defined(M_UNIX)
+#define OWN_FTRUNCATE
+#include <stdio.h>
+#ifdef _XOPEN_SOURCE
+#undef _XOPEN_SOURCE
+#endif
+#ifdef _POSIX_SOURCE
+#undef _POSIX_SOURCE
+#endif
+
+#include <fcntl.h>
+
+#include "port_after.h"
+
+int
+__ftruncate(int fd, long wantsize) {
+ long cursize;
+
+ /* determine current file size */
+ if ((cursize = lseek(fd, 0L, 2)) == -1)
+ return (-1);
+
+ /* maybe lengthen... */
+ if (cursize < wantsize) {
+ if (lseek(fd, wantsize - 1, 0) == -1 ||
+ write(fd, "", 1) == -1) {
+ return (-1);
+ }
+ return (0);
+ }
+
+ /* maybe shorten... */
+ if (wantsize < cursize) {
+ struct flock fl;
+
+ fl.l_whence = 0;
+ fl.l_len = 0;
+ fl.l_start = wantsize;
+ fl.l_type = F_WRLCK;
+ return (fcntl(fd, F_FREESP, &fl));
+ }
+ return (0);
+}
+#endif
+
+#ifndef OWN_FTRUNCATE
+int __bindcompat_ftruncate;
+#endif
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c b/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c
new file mode 100644
index 0000000000..2926a3575e
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c
@@ -0,0 +1,62 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: gettimeofday.c,v 1.4 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include <stdio.h>
+#include <syslog.h>
+#include <sys/time.h>
+#include "port_after.h"
+
+#if !defined(NEED_GETTIMEOFDAY)
+/*%
+ * gettimeofday() occasionally returns invalid tv_usec on some platforms.
+ */
+#define MILLION 1000000
+#undef gettimeofday
+
+int
+isc__gettimeofday(struct timeval *tp, struct timezone *tzp) {
+ int res;
+
+ res = gettimeofday(tp, tzp);
+ if (res < 0)
+ return (res);
+ if (tp == NULL)
+ return (res);
+ if (tp->tv_usec < 0) {
+ do {
+ tp->tv_usec += MILLION;
+ tp->tv_sec--;
+ } while (tp->tv_usec < 0);
+ goto log;
+ } else if (tp->tv_usec > MILLION) {
+ do {
+ tp->tv_usec -= MILLION;
+ tp->tv_sec++;
+ } while (tp->tv_usec > MILLION);
+ goto log;
+ }
+ return (res);
+ log:
+ syslog(LOG_ERR, "gettimeofday: tv_usec out of range\n");
+ return (res);
+}
+#else
+int
+gettimeofday(struct timeval *tvp, struct _TIMEZONE *tzp) {
+ time_t clock, time(time_t *);
+
+ if (time(&clock) == (time_t) -1)
+ return (-1);
+ if (tvp) {
+ tvp->tv_sec = clock;
+ tvp->tv_usec = 0;
+ }
+ if (tzp) {
+ tzp->tz_minuteswest = 0;
+ tzp->tz_dsttime = 0;
+ }
+ return (0);
+}
+#endif /*NEED_GETTIMEOFDAY*/
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/mktemp.c b/usr/src/lib/libresolv2_joy/common/bsd/mktemp.c
new file mode 100644
index 0000000000..001b24b58f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/mktemp.c
@@ -0,0 +1,156 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)mktemp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: mktemp.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "port_after.h"
+
+#if (!defined(NEED_MKTEMP)) && (!defined(NEED_MKSTEMP))
+int __mktemp_unneeded__;
+#else
+
+static int gettemp(char *path, int *doopen);
+
+#ifdef NEED_MKSTEMP
+mkstemp(char *path) {
+ int fd;
+
+ return (gettemp(path, &fd) ? fd : -1);
+}
+#endif
+
+#ifdef NEED_MKTEMP
+char *
+mktemp(char *path) {
+ return(gettemp(path, (int *)NULL) ? path : (char *)NULL);
+}
+#endif
+
+static int
+gettemp(char *path, int *doopen) {
+ char *start, *trv;
+ struct stat sbuf;
+ u_int pid;
+
+ pid = getpid();
+ for (trv = path; *trv; ++trv); /*%< extra X's get set to 0's */
+ while (*--trv == 'X') {
+ *trv = (pid % 10) + '0';
+ pid /= 10;
+ }
+
+ /*
+ * check the target directory; if you have six X's and it
+ * doesn't exist this runs for a *very* long time.
+ */
+ for (start = trv + 1;; --trv) {
+ if (trv <= path)
+ break;
+ if (*trv == '/') {
+ *trv = '\0';
+ if (stat(path, &sbuf))
+ return(0);
+ if (!S_ISDIR(sbuf.st_mode)) {
+ errno = ENOTDIR;
+ return(0);
+ }
+ *trv = '/';
+ break;
+ }
+ }
+
+ for (;;) {
+ if (doopen) {
+ if ((*doopen =
+ open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0)
+ return(1);
+ if (errno != EEXIST)
+ return(0);
+ }
+ else if (stat(path, &sbuf))
+ return(errno == ENOENT ? 1 : 0);
+
+ /* tricky little algorithm for backward compatibility */
+ for (trv = start;;) {
+ if (!*trv)
+ return(0);
+ if (*trv == 'z')
+ *trv++ = 'a';
+ else {
+ if (isdigit(*trv))
+ *trv = 'a';
+ else
+ ++*trv;
+ break;
+ }
+ }
+ }
+ /*NOTREACHED*/
+}
+
+#endif /*NEED_MKTEMP*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/putenv.c b/usr/src/lib/libresolv2_joy/common/bsd/putenv.c
new file mode 100644
index 0000000000..2dcbc57e6c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/putenv.c
@@ -0,0 +1,27 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: putenv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "port_after.h"
+
+/*%
+ * To give a little credit to Sun, SGI,
+ * and many vendors in the SysV world.
+ */
+
+#if !defined(NEED_PUTENV)
+int __bindcompat_putenv;
+#else
+int
+putenv(char *str) {
+ char *tmp;
+
+ for (tmp = str; *tmp && (*tmp != '='); tmp++)
+ ;
+
+ return (setenv(str, tmp, 1));
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/readv.c b/usr/src/lib/libresolv2_joy/common/bsd/readv.c
new file mode 100644
index 0000000000..5fa691a92f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/readv.c
@@ -0,0 +1,39 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: readv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "port_after.h"
+
+#ifndef NEED_READV
+int __bindcompat_readv;
+#else
+
+int
+__readv(fd, vp, vpcount)
+ int fd;
+ const struct iovec *vp;
+ int vpcount;
+{
+ int count = 0;
+
+ while (vpcount-- > 0) {
+ int bytes = read(fd, vp->iov_base, vp->iov_len);
+
+ if (bytes < 0)
+ return (-1);
+ count += bytes;
+ if (bytes != vp->iov_len)
+ break;
+ vp++;
+ }
+ return (count);
+}
+#endif /* NEED_READV */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/setenv.c b/usr/src/lib/libresolv2_joy/common/bsd/setenv.c
new file mode 100644
index 0000000000..baf00f6ff2
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/setenv.c
@@ -0,0 +1,151 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)setenv.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: setenv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#if !defined(NEED_SETENV)
+int __bindcompat_setenv;
+#else
+
+extern char **environ;
+
+static char *findenv(const char *name, int *offset);
+
+/*%
+ * setenv --
+ * Set the value of the environmental variable "name" to be
+ * "value". If rewrite is set, replace any current value.
+ */
+setenv(const char *name, const char *value, int rewrite) {
+ extern char **environ;
+ static int alloced; /*%< if allocated space before */
+ char *c;
+ int l_value, offset;
+
+ if (*value == '=') /*%< no `=' in value */
+ ++value;
+ l_value = strlen(value);
+ if ((c = findenv(name, &offset))) { /*%< find if already exists */
+ if (!rewrite)
+ return (0);
+ if (strlen(c) >= l_value) { /*%< old larger; copy over */
+ while (*c++ = *value++);
+ return (0);
+ }
+ } else { /*%< create new slot */
+ int cnt;
+ char **p;
+
+ for (p = environ, cnt = 0; *p; ++p, ++cnt);
+ if (alloced) { /*%< just increase size */
+ environ = (char **)realloc((char *)environ,
+ (size_t)(sizeof(char *) * (cnt + 2)));
+ if (!environ)
+ return (-1);
+ }
+ else { /*%< get new space */
+ alloced = 1; /*%< copy old entries into it */
+ p = malloc((size_t)(sizeof(char *) * (cnt + 2)));
+ if (!p)
+ return (-1);
+ memcpy(p, environ, cnt * sizeof(char *));
+ environ = p;
+ }
+ environ[cnt + 1] = NULL;
+ offset = cnt;
+ }
+ for (c = (char *)name; *c && *c != '='; ++c); /*%< no `=' in name */
+ if (!(environ[offset] = /*%< name + `=' + value */
+ malloc((size_t)((int)(c - name) + l_value + 2))))
+ return (-1);
+ for (c = environ[offset]; (*c = *name++) && *c != '='; ++c);
+ for (*c++ = '='; *c++ = *value++;);
+ return (0);
+}
+
+/*%
+ * unsetenv(name) --
+ * Delete environmental variable "name".
+ */
+void
+unsetenv(const char *name) {
+ char **p;
+ int offset;
+
+ while (findenv(name, &offset)) /*%< if set multiple times */
+ for (p = &environ[offset];; ++p)
+ if (!(*p = *(p + 1)))
+ break;
+}
+
+/*%
+ * findenv --
+ * Returns pointer to value associated with name, if any, else NULL.
+ * Sets offset to be the offset of the name/value combination in the
+ * environmental array, for use by setenv(3) and unsetenv(3).
+ * Explicitly removes '=' in argument name.
+ *
+ * This routine *should* be a static; don't use it.
+ */
+static char *
+findenv(const char *name, int *offset) {
+ const char *np;
+ char **p, *c;
+ int len;
+
+ if (name == NULL || environ == NULL)
+ return (NULL);
+ for (np = name; *np && *np != '='; ++np)
+ continue;
+ len = np - name;
+ for (p = environ; (c = *p) != NULL; ++p)
+ if (strncmp(c, name, len) == 0 && c[len] == '=') {
+ *offset = p - environ;
+ return (c + len + 1);
+ }
+ return (NULL);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c b/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c
new file mode 100644
index 0000000000..67881d7ca8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c
@@ -0,0 +1,29 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: setitimer.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/time.h>
+
+#include "port_after.h"
+
+/*%
+ * Setitimer emulation routine.
+ */
+#ifndef NEED_SETITIMER
+int __bindcompat_setitimer;
+#else
+
+int
+__setitimer(int which, const struct itimerval *value,
+ struct itimerval *ovalue)
+{
+ if (alarm(value->it_value.tv_sec) >= 0)
+ return (0);
+ else
+ return (-1);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c b/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c
new file mode 100644
index 0000000000..0c9f0dccf0
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c
@@ -0,0 +1,124 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strcasecmp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strcasecmp.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRCASECMP
+int __strcasecmp_unneeded__;
+#else
+
+/*%
+ * This array is designed for mapping upper and lower case letter
+ * together for a case independent comparison. The mappings are
+ * based upon ascii character sequences.
+ */
+static const u_char charmap[] = {
+ 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
+ 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
+ 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
+ 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
+ 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
+ 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
+ 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
+ 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
+ 0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
+ 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
+ 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
+ 0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
+ 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
+ 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
+ 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
+ 0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
+ 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+ 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+ 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+ 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+ 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+ 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+ 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+ 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+ 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+ 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+ 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+ 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+ 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+ 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+ 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+ 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+};
+
+int
+strcasecmp(const char *s1, const char *s2) {
+ const u_char *cm = charmap,
+ *us1 = (const u_char *)s1,
+ *us2 = (const u_char *)s2;
+
+ while (cm[*us1] == cm[*us2++])
+ if (*us1++ == '\0')
+ return (0);
+ return (cm[*us1] - cm[*--us2]);
+}
+
+int
+strncasecmp(const char *s1, const char *s2, size_t n) {
+ if (n != 0) {
+ const u_char *cm = charmap,
+ *us1 = (const u_char *)s1,
+ *us2 = (const u_char *)s2;
+
+ do {
+ if (cm[*us1] != cm[*us2++])
+ return (cm[*us1] - cm[*--us2]);
+ if (*us1++ == '\0')
+ break;
+ } while (--n != 0);
+ }
+ return (0);
+}
+
+#endif /*NEED_STRCASECMP*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strdup.c b/usr/src/lib/libresolv2_joy/common/bsd/strdup.c
new file mode 100644
index 0000000000..a8d31e9587
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strdup.c
@@ -0,0 +1,20 @@
+#include "port_before.h"
+
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRDUP
+int __bind_strdup_unneeded;
+#else
+char *
+strdup(const char *src) {
+ char *dst = malloc(strlen(src) + 1);
+
+ if (dst)
+ strcpy(dst, src);
+ return (dst);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strerror.c b/usr/src/lib/libresolv2_joy/common/bsd/strerror.c
new file mode 100644
index 0000000000..5973e63d5b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strerror.c
@@ -0,0 +1,94 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strerror.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strerror.c,v 1.6 2008/02/18 03:49:08 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRERROR
+int __strerror_unneeded__;
+#else
+
+#ifdef USE_SYSERROR_LIST
+extern int sys_nerr;
+extern char *sys_errlist[];
+#endif
+
+const char *
+isc_strerror(int num) {
+#define UPREFIX "Unknown error: "
+ static char ebuf[40] = UPREFIX; /*%< 64-bit number + slop */
+ u_int errnum;
+ char *p, *t;
+#ifndef USE_SYSERROR_LIST
+ const char *ret;
+#endif
+ char tmp[40];
+
+ errnum = num; /*%< convert to unsigned */
+#ifdef USE_SYSERROR_LIST
+ if (errnum < (u_int)sys_nerr)
+ return (sys_errlist[errnum]);
+#else
+#undef strerror
+ ret = strerror(num); /*%< call strerror() in libc */
+ if (ret != NULL)
+ return(ret);
+#endif
+
+ /* Do this by hand, so we don't include stdio(3). */
+ t = tmp;
+ do {
+ *t++ = "0123456789"[errnum % 10];
+ } while (errnum /= 10);
+ for (p = ebuf + sizeof(UPREFIX) - 1;;) {
+ *p++ = *--t;
+ if (t <= tmp)
+ break;
+ }
+ return (ebuf);
+}
+
+#endif /*NEED_STRERROR*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c b/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c
new file mode 100644
index 0000000000..4c12d88e1c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c
@@ -0,0 +1,70 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strpbrk.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strpbrk.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRPBRK
+int __strpbrk_unneeded__;
+#else
+
+/*%
+ * Find the first occurrence in s1 of a character in s2 (excluding NUL).
+ */
+char *
+strpbrk(const char *s1, const char *s2) {
+ const char *scanp;
+ int c, sc;
+
+ while ((c = *s1++) != 0) {
+ for (scanp = s2; (sc = *scanp++) != 0;)
+ if (sc == c)
+ return ((char *)(s1 - 1));
+ }
+ return (NULL);
+}
+
+#endif /*NEED_STRPBRK*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strsep.c b/usr/src/lib/libresolv2_joy/common/bsd/strsep.c
new file mode 100644
index 0000000000..c7969f0028
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strsep.c
@@ -0,0 +1,88 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "strsep.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strsep.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+#include <sys/cdefs.h>
+#include <string.h>
+#include <stdio.h>
+#include "port_after.h"
+
+#ifndef NEED_STRSEP
+int __strsep_unneeded__;
+#else
+
+/*%
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, strsep returns NULL.
+ */
+char *
+strsep(char **stringp, const char *delim) {
+ char *s;
+ const char *spanp;
+ int c, sc;
+ char *tok;
+
+ if ((s = *stringp) == NULL)
+ return (NULL);
+ for (tok = s;;) {
+ c = *s++;
+ spanp = delim;
+ do {
+ if ((sc = *spanp++) == c) {
+ if (c == 0)
+ s = NULL;
+ else
+ s[-1] = 0;
+ *stringp = s;
+ return (tok);
+ }
+ } while (sc != 0);
+ }
+ /* NOTREACHED */
+}
+
+#endif /*NEED_STRSEP*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c b/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c
new file mode 100644
index 0000000000..b37ff72729
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c
@@ -0,0 +1,119 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strtoul.c,v 1.4 2008/02/18 03:49:08 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRTOUL
+int __strtoul_unneeded__;
+#else
+
+/*%
+ * Convert a string to an unsigned long integer.
+ *
+ * Ignores `locale' stuff. Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ */
+u_long
+strtoul(const char *nptr, char **endptr, int base) {
+ const char *s = nptr;
+ u_long acc, cutoff;
+ int neg, c, any, cutlim;
+
+ neg = 0;
+
+ /*
+ * See strtol for comments as to the logic used.
+ */
+ do {
+ c = *(const unsigned char *)s++;
+ } while (isspace(c));
+ if (c == '-') {
+ neg = 1;
+ c = *s++;
+ } else if (c == '+')
+ c = *s++;
+ if ((base == 0 || base == 16) &&
+ c == '0' && (*s == 'x' || *s == 'X')) {
+ c = s[1];
+ s += 2;
+ base = 16;
+ }
+ if (base == 0)
+ base = c == '0' ? 8 : 10;
+ cutoff = (u_long)ULONG_MAX / (u_long)base;
+ cutlim = (u_long)ULONG_MAX % (u_long)base;
+ for (acc = 0, any = 0;; c = *(const unsigned char*)s++) {
+ if (isdigit(c))
+ c -= '0';
+ else if (isalpha(c))
+ c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ else
+ break;
+ if (c >= base)
+ break;
+ if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+ any = -1;
+ else {
+ any = 1;
+ acc *= base;
+ acc += c;
+ }
+ }
+ if (any < 0) {
+ acc = ULONG_MAX;
+ errno = ERANGE;
+ } else if (neg)
+ acc = -acc;
+ if (endptr != 0)
+ DE_CONST((any ? s - 1 : nptr), *endptr);
+ return (acc);
+}
+
+#endif /*NEED_STRTOUL*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/utimes.c b/usr/src/lib/libresolv2_joy/common/bsd/utimes.c
new file mode 100644
index 0000000000..2f65cffe25
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/utimes.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <utime.h>
+
+#include "port_after.h"
+
+#ifndef NEED_UTIMES
+int __bind_utimes_unneeded;
+#else
+
+int
+__utimes(char *filename, struct timeval *tvp) {
+ struct utimbuf utb;
+
+ utb.actime = (time_t)tvp[0].tv_sec;
+ utb.modtime = (time_t)tvp[1].tv_sec;
+ return (utime(filename, &utb));
+}
+
+#endif /* NEED_UTIMES */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/writev.c b/usr/src/lib/libresolv2_joy/common/bsd/writev.c
new file mode 100644
index 0000000000..65baa71cfc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/writev.c
@@ -0,0 +1,89 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: writev.c,v 1.3 2005/04/27 04:56:13 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "port_after.h"
+
+#ifndef NEED_WRITEV
+int __bindcompat_writev;
+#else
+
+#ifdef _CRAY
+#define OWN_WRITEV
+int
+__writev(int fd, struct iovec *iov, int iovlen)
+{
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0)
+ return (-1);
+
+ /*
+ * Allow for atomic writes to network.
+ */
+ if (statbuf.st_mode & S_IFSOCK) {
+ struct msghdr mesg;
+
+ memset(&mesg, 0, sizeof(mesg));
+ mesg.msg_name = 0;
+ mesg.msg_namelen = 0;
+ mesg.msg_iov = iov;
+ mesg.msg_iovlen = iovlen;
+ mesg.msg_accrights = 0;
+ mesg.msg_accrightslen = 0;
+ return (sendmsg(fd, &mesg, 0));
+ } else {
+ struct iovec *tv;
+ int i, rcode = 0, count = 0;
+
+ for (i = 0, tv = iov; i <= iovlen; tv++) {
+ rcode = write(fd, tv->iov_base, tv->iov_len);
+
+ if (rcode < 0)
+ break;
+
+ count += rcode;
+ }
+
+ if (count == 0)
+ return (rcode);
+ else
+ return (count);
+ }
+}
+
+#else /*_CRAY*/
+
+int
+__writev(fd, vp, vpcount)
+ int fd;
+ const struct iovec *vp;
+ int vpcount;
+{
+ int count = 0;
+
+ while (vpcount-- > 0) {
+ int written = write(fd, vp->iov_base, vp->iov_len);
+
+ if (written < 0)
+ return (-1);
+ count += written;
+ if (written != vp->iov_len)
+ break;
+ vp++;
+ }
+ return (count);
+}
+
+#endif /*_CRAY*/
+
+#endif /*NEED_WRITEV*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/dst_api.c b/usr/src/lib/libresolv2_joy/common/dst/dst_api.c
new file mode 100644
index 0000000000..a3e48077ad
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/dst_api.c
@@ -0,0 +1,1048 @@
+#ifndef LINT
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/dst_api.c,v 1.17 2007/09/24 17:18:25 each Exp $";
+#endif
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+/*
+ * This file contains the interface between the DST API and the crypto API.
+ * This is the only file that needs to be changed if the crypto system is
+ * changed. Exported functions are:
+ * void dst_init() Initialize the toolkit
+ * int dst_check_algorithm() Function to determines if alg is suppored.
+ * int dst_compare_keys() Function to compare two keys for equality.
+ * int dst_sign_data() Incremental signing routine.
+ * int dst_verify_data() Incremental verify routine.
+ * int dst_generate_key() Function to generate new KEY
+ * DST_KEY *dst_read_key() Function to retrieve private/public KEY.
+ * void dst_write_key() Function to write out a key.
+ * DST_KEY *dst_dnskey_to_key() Function to convert DNS KEY RR to a DST
+ * KEY structure.
+ * int dst_key_to_dnskey() Function to return a public key in DNS
+ * format binary
+ * DST_KEY *dst_buffer_to_key() Converst a data in buffer to KEY
+ * int *dst_key_to_buffer() Writes out DST_KEY key matterial in buffer
+ * void dst_free_key() Releases all memory referenced by key structure
+ */
+
+#include "port_before.h"
+#include <stdio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <memory.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+#include "port_after.h"
+
+/* static variables */
+static int done_init = 0;
+dst_func *dst_t_func[DST_MAX_ALGS];
+const char *key_file_fmt_str = "Private-key-format: v%s\nAlgorithm: %d (%s)\n";
+const char *dst_path = "";
+
+/* internal I/O functions */
+static DST_KEY *dst_s_read_public_key(const char *in_name,
+ const u_int16_t in_id, int in_alg);
+static int dst_s_read_private_key_file(char *name, DST_KEY *pk_key,
+ u_int16_t in_id, int in_alg);
+static int dst_s_write_public_key(const DST_KEY *key);
+static int dst_s_write_private_key(const DST_KEY *key);
+
+/* internal function to set up data structure */
+static DST_KEY *dst_s_get_key_struct(const char *name, const int alg,
+ const int flags, const int protocol,
+ const int bits);
+
+/*%
+ * dst_init
+ * This function initializes the Digital Signature Toolkit.
+ * Right now, it just checks the DSTKEYPATH environment variable.
+ * Parameters
+ * none
+ * Returns
+ * none
+ */
+void
+dst_init()
+{
+ char *s;
+ int len;
+
+ if (done_init != 0)
+ return;
+ done_init = 1;
+
+ s = getenv("DSTKEYPATH");
+ len = 0;
+ if (s) {
+ struct stat statbuf;
+
+ len = strlen(s);
+ if (len > PATH_MAX) {
+ EREPORT(("%s is longer than %d characters, ignoring\n",
+ s, PATH_MAX));
+ } else if (stat(s, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
+ EREPORT(("%s is not a valid directory\n", s));
+ } else {
+ char *tmp;
+ tmp = (char *) malloc(len + 2);
+ memcpy(tmp, s, len + 1);
+ if (tmp[strlen(tmp) - 1] != '/') {
+ tmp[strlen(tmp) + 1] = 0;
+ tmp[strlen(tmp)] = '/';
+ }
+ dst_path = tmp;
+ }
+ }
+ memset(dst_t_func, 0, sizeof(dst_t_func));
+ /* first one is selected */
+ dst_hmac_md5_init();
+}
+
+/*%
+ * dst_check_algorithm
+ * This function determines if the crypto system for the specified
+ * algorithm is present.
+ * Parameters
+ * alg 1 KEY_RSA
+ * 3 KEY_DSA
+ * 157 KEY_HMAC_MD5
+ * future algorithms TBD and registered with IANA.
+ * Returns
+ * 1 - The algorithm is available.
+ * 0 - The algorithm is not available.
+ */
+int
+dst_check_algorithm(const int alg)
+{
+ return (dst_t_func[alg] != NULL);
+}
+
+/*%
+ * dst_s_get_key_struct
+ * This function allocates key structure and fills in some of the
+ * fields of the structure.
+ * Parameters:
+ * name: the name of the key
+ * alg: the algorithm number
+ * flags: the dns flags of the key
+ * protocol: the dns protocol of the key
+ * bits: the size of the key
+ * Returns:
+ * NULL if error
+ * valid pointer otherwise
+ */
+static DST_KEY *
+dst_s_get_key_struct(const char *name, const int alg, const int flags,
+ const int protocol, const int bits)
+{
+ DST_KEY *new_key = NULL;
+
+ if (dst_check_algorithm(alg)) /*%< make sure alg is available */
+ new_key = (DST_KEY *) malloc(sizeof(*new_key));
+ if (new_key == NULL)
+ return (NULL);
+
+ memset(new_key, 0, sizeof(*new_key));
+ new_key->dk_key_name = strdup(name);
+ if (new_key->dk_key_name == NULL) {
+ free(new_key);
+ return (NULL);
+ }
+ new_key->dk_alg = alg;
+ new_key->dk_flags = flags;
+ new_key->dk_proto = protocol;
+ new_key->dk_KEY_struct = NULL;
+ new_key->dk_key_size = bits;
+ new_key->dk_func = dst_t_func[alg];
+ return (new_key);
+}
+
+/*%
+ * dst_compare_keys
+ * Compares two keys for equality.
+ * Parameters
+ * key1, key2 Two keys to be compared.
+ * Returns
+ * 0 The keys are equal.
+ * non-zero The keys are not equal.
+ */
+
+int
+dst_compare_keys(const DST_KEY *key1, const DST_KEY *key2)
+{
+ if (key1 == key2)
+ return (0);
+ if (key1 == NULL || key2 == NULL)
+ return (4);
+ if (key1->dk_alg != key2->dk_alg)
+ return (1);
+ if (key1->dk_key_size != key2->dk_key_size)
+ return (2);
+ if (key1->dk_id != key2->dk_id)
+ return (3);
+ return (key1->dk_func->compare(key1, key2));
+}
+
+/*%
+ * dst_sign_data
+ * An incremental signing function. Data is signed in steps.
+ * First the context must be initialized (SIG_MODE_INIT).
+ * Then data is hashed (SIG_MODE_UPDATE). Finally the signature
+ * itself is created (SIG_MODE_FINAL). This function can be called
+ * once with INIT, UPDATE and FINAL modes all set, or it can be
+ * called separately with a different mode set for each step. The
+ * UPDATE step can be repeated.
+ * Parameters
+ * mode A bit mask used to specify operation(s) to be performed.
+ * SIG_MODE_INIT 1 Initialize digest
+ * SIG_MODE_UPDATE 2 Add data to digest
+ * SIG_MODE_FINAL 4 Generate signature
+ * from signature
+ * SIG_MODE_ALL (SIG_MODE_INIT,SIG_MODE_UPDATE,SIG_MODE_FINAL
+ * data Data to be signed.
+ * len The length in bytes of data to be signed.
+ * in_key Contains a private key to sign with.
+ * KEY structures should be handled (created, converted,
+ * compared, stored, freed) by the DST.
+ * signature
+ * The location to which the signature will be written.
+ * sig_len Length of the signature field in bytes.
+ * Return
+ * 0 Successfull INIT or Update operation
+ * &gt;0 success FINAL (sign) operation
+ * &lt;0 failure
+ */
+
+int
+dst_sign_data(const int mode, DST_KEY *in_key, void **context,
+ const u_char *data, const int len,
+ u_char *signature, const int sig_len)
+{
+ DUMP(data, mode, len, "dst_sign_data()");
+
+ if (mode & SIG_MODE_FINAL &&
+ (in_key->dk_KEY_struct == NULL || signature == NULL))
+ return (MISSING_KEY_OR_SIGNATURE);
+
+ if (in_key->dk_func && in_key->dk_func->sign)
+ return (in_key->dk_func->sign(mode, in_key, context, data, len,
+ signature, sig_len));
+ return (UNKNOWN_KEYALG);
+}
+
+/*%
+ * dst_verify_data
+ * An incremental verify function. Data is verified in steps.
+ * First the context must be initialized (SIG_MODE_INIT).
+ * Then data is hashed (SIG_MODE_UPDATE). Finally the signature
+ * is verified (SIG_MODE_FINAL). This function can be called
+ * once with INIT, UPDATE and FINAL modes all set, or it can be
+ * called separately with a different mode set for each step. The
+ * UPDATE step can be repeated.
+ * Parameters
+ * mode Operations to perform this time.
+ * SIG_MODE_INIT 1 Initialize digest
+ * SIG_MODE_UPDATE 2 add data to digest
+ * SIG_MODE_FINAL 4 verify signature
+ * SIG_MODE_ALL
+ * (SIG_MODE_INIT,SIG_MODE_UPDATE,SIG_MODE_FINAL)
+ * data Data to pass through the hash function.
+ * len Length of the data in bytes.
+ * in_key Key for verification.
+ * signature Location of signature.
+ * sig_len Length of the signature in bytes.
+ * Returns
+ * 0 Verify success
+ * Non-Zero Verify Failure
+ */
+
+int
+dst_verify_data(const int mode, DST_KEY *in_key, void **context,
+ const u_char *data, const int len,
+ const u_char *signature, const int sig_len)
+{
+ DUMP(data, mode, len, "dst_verify_data()");
+ if (mode & SIG_MODE_FINAL &&
+ (in_key->dk_KEY_struct == NULL || signature == NULL))
+ return (MISSING_KEY_OR_SIGNATURE);
+
+ if (in_key->dk_func == NULL || in_key->dk_func->verify == NULL)
+ return (UNSUPPORTED_KEYALG);
+ return (in_key->dk_func->verify(mode, in_key, context, data, len,
+ signature, sig_len));
+}
+
+/*%
+ * dst_read_private_key
+ * Access a private key. First the list of private keys that have
+ * already been read in is searched, then the key accessed on disk.
+ * If the private key can be found, it is returned. If the key cannot
+ * be found, a null pointer is returned. The options specify required
+ * key characteristics. If the private key requested does not have
+ * these characteristics, it will not be read.
+ * Parameters
+ * in_keyname The private key name.
+ * in_id The id of the private key.
+ * options DST_FORCE_READ Read from disk - don't use a previously
+ * read key.
+ * DST_CAN_SIGN The key must be useable for signing.
+ * DST_NO_AUTHEN The key must be useable for authentication.
+ * DST_STANDARD Return any key
+ * Returns
+ * NULL If there is no key found in the current directory or
+ * this key has not been loaded before.
+ * !NULL Success - KEY structure returned.
+ */
+
+DST_KEY *
+dst_read_key(const char *in_keyname, const u_int16_t in_id,
+ const int in_alg, const int type)
+{
+ char keyname[PATH_MAX];
+ DST_KEY *dg_key = NULL, *pubkey = NULL;
+
+ if (!dst_check_algorithm(in_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_read_private_key(): Algorithm %d not suppored\n",
+ in_alg));
+ return (NULL);
+ }
+ if ((type & (DST_PUBLIC | DST_PRIVATE)) == 0)
+ return (NULL);
+ if (in_keyname == NULL) {
+ EREPORT(("dst_read_private_key(): Null key name passed in\n"));
+ return (NULL);
+ } else if (strlen(in_keyname) >= sizeof(keyname)) {
+ EREPORT(("dst_read_private_key(): keyname too big\n"));
+ return (NULL);
+ } else
+ strcpy(keyname, in_keyname);
+
+ /* before I read in the public key, check if it is allowed to sign */
+ if ((pubkey = dst_s_read_public_key(keyname, in_id, in_alg)) == NULL)
+ return (NULL);
+
+ if (type == DST_PUBLIC)
+ return pubkey;
+
+ if (!(dg_key = dst_s_get_key_struct(keyname, pubkey->dk_alg,
+ pubkey->dk_flags, pubkey->dk_proto,
+ 0)))
+ return (dg_key);
+ /* Fill in private key and some fields in the general key structure */
+ if (dst_s_read_private_key_file(keyname, dg_key, pubkey->dk_id,
+ pubkey->dk_alg) == 0)
+ dg_key = dst_free_key(dg_key);
+
+ (void)dst_free_key(pubkey);
+ return (dg_key);
+}
+
+int
+dst_write_key(const DST_KEY *key, const int type)
+{
+ int pub = 0, priv = 0;
+
+ if (key == NULL)
+ return (0);
+ if (!dst_check_algorithm(key->dk_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_write_key(): Algorithm %d not suppored\n",
+ key->dk_alg));
+ return (UNSUPPORTED_KEYALG);
+ }
+ if ((type & (DST_PRIVATE|DST_PUBLIC)) == 0)
+ return (0);
+
+ if (type & DST_PUBLIC)
+ if ((pub = dst_s_write_public_key(key)) < 0)
+ return (pub);
+ if (type & DST_PRIVATE)
+ if ((priv = dst_s_write_private_key(key)) < 0)
+ return (priv);
+ return (priv+pub);
+}
+
+/*%
+ * dst_write_private_key
+ * Write a private key to disk. The filename will be of the form:
+ * K&lt;key-&gt;dk_name&gt;+&lt;key-&gt;dk_alg+&gt;&lt;key-d&gt;k_id.&gt;&lt;private key suffix&gt;.
+ * If there is already a file with this name, an error is returned.
+ *
+ * Parameters
+ * key A DST managed key structure that contains
+ * all information needed about a key.
+ * Return
+ * &gt;= 0 Correct behavior. Returns length of encoded key value
+ * written to disk.
+ * &lt; 0 error.
+ */
+
+static int
+dst_s_write_private_key(const DST_KEY *key)
+{
+ u_char encoded_block[RAW_KEY_SIZE];
+ char file[PATH_MAX];
+ int len;
+ FILE *fp;
+
+ /* First encode the key into the portable key format */
+ if (key == NULL)
+ return (-1);
+ if (key->dk_KEY_struct == NULL)
+ return (0); /*%< null key has no private key */
+ if (key->dk_func == NULL || key->dk_func->to_file_fmt == NULL) {
+ EREPORT(("dst_write_private_key(): Unsupported operation %d\n",
+ key->dk_alg));
+ return (-5);
+ } else if ((len = key->dk_func->to_file_fmt(key, (char *)encoded_block,
+ sizeof(encoded_block))) <= 0) {
+ EREPORT(("dst_write_private_key(): Failed encoding private RSA bsafe key %d\n", len));
+ return (-8);
+ }
+ /* Now I can create the file I want to use */
+ dst_s_build_filename(file, key->dk_key_name, key->dk_id, key->dk_alg,
+ PRIVATE_KEY, PATH_MAX);
+
+ /* Do not overwrite an existing file */
+ if ((fp = dst_s_fopen(file, "w", 0600)) != NULL) {
+ int nn;
+ if ((nn = fwrite(encoded_block, 1, len, fp)) != len) {
+ EREPORT(("dst_write_private_key(): Write failure on %s %d != %d errno=%d\n",
+ file, len, nn, errno));
+ fclose(fp);
+ return (-5);
+ }
+ fclose(fp);
+ } else {
+ EREPORT(("dst_write_private_key(): Can not create file %s\n"
+ ,file));
+ return (-6);
+ }
+ memset(encoded_block, 0, len);
+ return (len);
+}
+
+/*%
+*
+ * dst_read_public_key
+ * Read a public key from disk and store in a DST key structure.
+ * Parameters
+ * in_name K&lt;in_name&gt;&lt;in_id&gt;.&lt;public key suffix&gt; is the
+ * filename of the key file to be read.
+ * Returns
+ * NULL If the key does not exist or no name is supplied.
+ * NON-NULL Initialized key structure if the key exists.
+ */
+
+static DST_KEY *
+dst_s_read_public_key(const char *in_name, const u_int16_t in_id, int in_alg)
+{
+ int flags, proto, alg, len, dlen;
+ int c;
+ char name[PATH_MAX], enckey[RAW_KEY_SIZE], *notspace;
+ u_char deckey[RAW_KEY_SIZE];
+ FILE *fp;
+
+ if (in_name == NULL) {
+ EREPORT(("dst_read_public_key(): No key name given\n"));
+ return (NULL);
+ }
+ if (dst_s_build_filename(name, in_name, in_id, in_alg, PUBLIC_KEY,
+ PATH_MAX) == -1) {
+ EREPORT(("dst_read_public_key(): Cannot make filename from %s, %d, and %s\n",
+ in_name, in_id, PUBLIC_KEY));
+ return (NULL);
+ }
+ /*
+ * Open the file and read it's formatted contents up to key
+ * File format:
+ * domain.name [ttl] [IN] KEY &lt;flags&gt; &lt;protocol&gt; &lt;algorithm&gt; &lt;key&gt;
+ * flags, proto, alg stored as decimal (or hex numbers FIXME).
+ * (FIXME: handle parentheses for line continuation.)
+ */
+ if ((fp = dst_s_fopen(name, "r", 0)) == NULL) {
+ EREPORT(("dst_read_public_key(): Public Key not found %s\n",
+ name));
+ return (NULL);
+ }
+ /* Skip domain name, which ends at first blank */
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ /* Skip blank to get to next field */
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+
+ /* Skip optional TTL -- if initial digit, skip whole word. */
+ if (isdigit(c)) {
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ }
+ /* Skip optional "IN" */
+ if (c == 'I' || c == 'i') {
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ }
+ /* Locate and skip "KEY" */
+ if (c != 'K' && c != 'k') {
+ EREPORT(("\"KEY\" doesn't appear in file: %s", name));
+ return NULL;
+ }
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ ungetc(c, fp); /*%< return the charcter to the input field */
+ /* Handle hex!! FIXME. */
+
+ if (fscanf(fp, "%d %d %d", &flags, &proto, &alg) != 3) {
+ EREPORT(("dst_read_public_key(): Can not read flag/proto/alg field from %s\n"
+ ,name));
+ return (NULL);
+ }
+ /* read in the key string */
+ fgets(enckey, sizeof(enckey), fp);
+
+ /* If we aren't at end-of-file, something is wrong. */
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ if (!feof(fp)) {
+ EREPORT(("Key too long in file: %s", name));
+ return NULL;
+ }
+ fclose(fp);
+
+ if ((len = strlen(enckey)) <= 0)
+ return (NULL);
+
+ /* discard \n */
+ enckey[--len] = '\0';
+
+ /* remove leading spaces */
+ for (notspace = (char *) enckey; isspace((*notspace)&0xff); len--)
+ notspace++;
+
+ dlen = b64_pton(notspace, deckey, sizeof(deckey));
+ if (dlen < 0) {
+ EREPORT(("dst_read_public_key: bad return from b64_pton = %d",
+ dlen));
+ return (NULL);
+ }
+ /* store key and info in a key structure that is returned */
+/* return dst_store_public_key(in_name, alg, proto, 666, flags, deckey,
+ dlen);*/
+ return dst_buffer_to_key(in_name, alg, flags, proto, deckey, dlen);
+}
+
+/*%
+ * dst_write_public_key
+ * Write a key to disk in DNS format.
+ * Parameters
+ * key Pointer to a DST key structure.
+ * Returns
+ * 0 Failure
+ * 1 Success
+ */
+
+static int
+dst_s_write_public_key(const DST_KEY *key)
+{
+ FILE *fp;
+ char filename[PATH_MAX];
+ u_char out_key[RAW_KEY_SIZE];
+ char enc_key[RAW_KEY_SIZE];
+ int len = 0;
+ int mode;
+
+ memset(out_key, 0, sizeof(out_key));
+ if (key == NULL) {
+ EREPORT(("dst_write_public_key(): No key specified \n"));
+ return (0);
+ } else if ((len = dst_key_to_dnskey(key, out_key, sizeof(out_key)))< 0)
+ return (0);
+
+ /* Make the filename */
+ if (dst_s_build_filename(filename, key->dk_key_name, key->dk_id,
+ key->dk_alg, PUBLIC_KEY, PATH_MAX) == -1) {
+ EREPORT(("dst_write_public_key(): Cannot make filename from %s, %d, and %s\n",
+ key->dk_key_name, key->dk_id, PUBLIC_KEY));
+ return (0);
+ }
+ /* XXX in general this should be a check for symmetric keys */
+ mode = (key->dk_alg == KEY_HMAC_MD5) ? 0600 : 0644;
+ /* create public key file */
+ if ((fp = dst_s_fopen(filename, "w+", mode)) == NULL) {
+ EREPORT(("DST_write_public_key: open of file:%s failed (errno=%d)\n",
+ filename, errno));
+ return (0);
+ }
+ /*write out key first base64 the key data */
+ if (key->dk_flags & DST_EXTEND_FLAG)
+ b64_ntop(&out_key[6], len - 6, enc_key, sizeof(enc_key));
+ else
+ b64_ntop(&out_key[4], len - 4, enc_key, sizeof(enc_key));
+ fprintf(fp, "%s IN KEY %d %d %d %s\n",
+ key->dk_key_name,
+ key->dk_flags, key->dk_proto, key->dk_alg, enc_key);
+ fclose(fp);
+ return (1);
+}
+
+/*%
+ * dst_dnskey_to_public_key
+ * This function converts the contents of a DNS KEY RR into a DST
+ * key structure.
+ * Paramters
+ * len Length of the RDATA of the KEY RR RDATA
+ * rdata A pointer to the the KEY RR RDATA.
+ * in_name Key name to be stored in key structure.
+ * Returns
+ * NULL Failure
+ * NON-NULL Success. Pointer to key structure.
+ * Caller's responsibility to free() it.
+ */
+
+DST_KEY *
+dst_dnskey_to_key(const char *in_name, const u_char *rdata, const int len)
+{
+ DST_KEY *key_st;
+ int alg ;
+ int start = DST_KEY_START;
+
+ if (rdata == NULL || len <= DST_KEY_ALG) /*%< no data */
+ return (NULL);
+ alg = (u_int8_t) rdata[DST_KEY_ALG];
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_dnskey_to_key(): Algorithm %d not suppored\n",
+ alg));
+ return (NULL);
+ }
+
+ if (in_name == NULL)
+ return (NULL);
+
+ if ((key_st = dst_s_get_key_struct(in_name, alg, 0, 0, 0)) == NULL)
+ return (NULL);
+
+ key_st->dk_id = dst_s_dns_key_id(rdata, len);
+ key_st->dk_flags = dst_s_get_int16(rdata);
+ key_st->dk_proto = (u_int16_t) rdata[DST_KEY_PROT];
+ if (key_st->dk_flags & DST_EXTEND_FLAG) {
+ u_int32_t ext_flags;
+ ext_flags = (u_int32_t) dst_s_get_int16(&rdata[DST_EXT_FLAG]);
+ key_st->dk_flags = key_st->dk_flags | (ext_flags << 16);
+ start += 2;
+ }
+ /*
+ * now point to the begining of the data representing the encoding
+ * of the key
+ */
+ if (key_st->dk_func && key_st->dk_func->from_dns_key) {
+ if (key_st->dk_func->from_dns_key(key_st, &rdata[start],
+ len - start) > 0)
+ return (key_st);
+ } else
+ EREPORT(("dst_dnskey_to_public_key(): unsuppored alg %d\n",
+ alg));
+
+ SAFE_FREE(key_st);
+ return (key_st);
+}
+
+/*%
+ * dst_public_key_to_dnskey
+ * Function to encode a public key into DNS KEY wire format
+ * Parameters
+ * key Key structure to encode.
+ * out_storage Location to write the encoded key to.
+ * out_len Size of the output array.
+ * Returns
+ * <0 Failure
+ * >=0 Number of bytes written to out_storage
+ */
+
+int
+dst_key_to_dnskey(const DST_KEY *key, u_char *out_storage,
+ const int out_len)
+{
+ u_int16_t val;
+ int loc = 0;
+ int enc_len = 0;
+ if (key == NULL)
+ return (-1);
+
+ if (!dst_check_algorithm(key->dk_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_key_to_dnskey(): Algorithm %d not suppored\n",
+ key->dk_alg));
+ return (UNSUPPORTED_KEYALG);
+ }
+ memset(out_storage, 0, out_len);
+ val = (u_int16_t)(key->dk_flags & 0xffff);
+ dst_s_put_int16(out_storage, val);
+ loc += 2;
+
+ out_storage[loc++] = (u_char) key->dk_proto;
+ out_storage[loc++] = (u_char) key->dk_alg;
+
+ if (key->dk_flags > 0xffff) { /*%< Extended flags */
+ val = (u_int16_t)((key->dk_flags >> 16) & 0xffff);
+ dst_s_put_int16(&out_storage[loc], val);
+ loc += 2;
+ }
+ if (key->dk_KEY_struct == NULL)
+ return (loc);
+ if (key->dk_func && key->dk_func->to_dns_key) {
+ enc_len = key->dk_func->to_dns_key(key,
+ (u_char *) &out_storage[loc],
+ out_len - loc);
+ if (enc_len > 0)
+ return (enc_len + loc);
+ else
+ return (-1);
+ } else
+ EREPORT(("dst_key_to_dnskey(): Unsupported ALG %d\n",
+ key->dk_alg));
+ return (-1);
+}
+
+/*%
+ * dst_buffer_to_key
+ * Function to encode a string of raw data into a DST key
+ * Parameters
+ * alg The algorithm (HMAC only)
+ * key A pointer to the data
+ * keylen The length of the data
+ * Returns
+ * NULL an error occurred
+ * NON-NULL the DST key
+ */
+DST_KEY *
+dst_buffer_to_key(const char *key_name, /*!< name of the key */
+ const int alg, /*!< algorithm */
+ const int flags, /*!< dns flags */
+ const int protocol, /*!< dns protocol */
+ const u_char *key_buf, /*!< key in dns wire fmt */
+ const int key_len) /*!< size of key */
+{
+
+ DST_KEY *dkey = NULL;
+ int dnslen;
+ u_char dns[2048];
+
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_buffer_to_key(): Algorithm %d not suppored\n", alg));
+ return (NULL);
+ }
+
+ dkey = dst_s_get_key_struct(key_name, alg, flags, protocol, -1);
+
+ if (dkey == NULL || dkey->dk_func == NULL ||
+ dkey->dk_func->from_dns_key == NULL)
+ return (dst_free_key(dkey));
+
+ if (dkey->dk_func->from_dns_key(dkey, key_buf, key_len) < 0) {
+ EREPORT(("dst_buffer_to_key(): dst_buffer_to_hmac failed\n"));
+ return (dst_free_key(dkey));
+ }
+
+ dnslen = dst_key_to_dnskey(dkey, dns, sizeof(dns));
+ dkey->dk_id = dst_s_dns_key_id(dns, dnslen);
+ return (dkey);
+}
+
+int
+dst_key_to_buffer(DST_KEY *key, u_char *out_buff, int buf_len)
+{
+ int len;
+ /* this function will extrac the secret of HMAC into a buffer */
+ if (key == NULL)
+ return (0);
+ if (key->dk_func != NULL && key->dk_func->to_dns_key != NULL) {
+ len = key->dk_func->to_dns_key(key, out_buff, buf_len);
+ if (len < 0)
+ return (0);
+ return (len);
+ }
+ return (0);
+}
+
+/*%
+ * dst_s_read_private_key_file
+ * Function reads in private key from a file.
+ * Fills out the KEY structure.
+ * Parameters
+ * name Name of the key to be read.
+ * pk_key Structure that the key is returned in.
+ * in_id Key identifier (tag)
+ * Return
+ * 1 if everthing works
+ * 0 if there is any problem
+ */
+
+static int
+dst_s_read_private_key_file(char *name, DST_KEY *pk_key, u_int16_t in_id,
+ int in_alg)
+{
+ int cnt, alg, len, major, minor, file_major, file_minor;
+ int ret, id;
+ char filename[PATH_MAX];
+ u_char in_buff[RAW_KEY_SIZE], *p;
+ FILE *fp;
+ int dnslen;
+ u_char dns[2048];
+
+ if (name == NULL || pk_key == NULL) {
+ EREPORT(("dst_read_private_key_file(): No key name given\n"));
+ return (0);
+ }
+ /* Make the filename */
+ if (dst_s_build_filename(filename, name, in_id, in_alg, PRIVATE_KEY,
+ PATH_MAX) == -1) {
+ EREPORT(("dst_read_private_key(): Cannot make filename from %s, %d, and %s\n",
+ name, in_id, PRIVATE_KEY));
+ return (0);
+ }
+ /* first check if we can find the key file */
+ if ((fp = dst_s_fopen(filename, "r", 0)) == NULL) {
+ EREPORT(("dst_s_read_private_key_file: Could not open file %s in directory %s\n",
+ filename, dst_path[0] ? dst_path :
+ (char *) getcwd(NULL, PATH_MAX - 1)));
+ return (0);
+ }
+ /* now read the header info from the file */
+ if ((cnt = fread(in_buff, 1, sizeof(in_buff), fp)) < 5) {
+ fclose(fp);
+ EREPORT(("dst_s_read_private_key_file: error reading file %s (empty file)\n",
+ filename));
+ return (0);
+ }
+ /* decrypt key */
+ fclose(fp);
+ if (memcmp(in_buff, "Private-key-format: v", 20) != 0)
+ goto fail;
+ len = cnt;
+ p = in_buff;
+
+ if (!dst_s_verify_str((const char **) (void *)&p,
+ "Private-key-format: v")) {
+ EREPORT(("dst_s_read_private_key_file(): Not a Key file/Decrypt failed %s\n", name));
+ goto fail;
+ }
+ /* read in file format */
+ sscanf((char *)p, "%d.%d", &file_major, &file_minor);
+ sscanf(KEY_FILE_FORMAT, "%d.%d", &major, &minor);
+ if (file_major < 1) {
+ EREPORT(("dst_s_read_private_key_file(): Unknown keyfile %d.%d version for %s\n",
+ file_major, file_minor, name));
+ goto fail;
+ } else if (file_major > major || file_minor > minor)
+ EREPORT((
+ "dst_s_read_private_key_file(): Keyfile %s version higher than mine %d.%d MAY FAIL\n",
+ name, file_major, file_minor));
+
+ while (*p++ != '\n') ; /*%< skip to end of line */
+
+ if (!dst_s_verify_str((const char **) (void *)&p, "Algorithm: "))
+ goto fail;
+
+ if (sscanf((char *)p, "%d", &alg) != 1)
+ goto fail;
+ while (*p++ != '\n') ; /*%< skip to end of line */
+
+ if (pk_key->dk_key_name && !strcmp(pk_key->dk_key_name, name))
+ SAFE_FREE2(pk_key->dk_key_name, strlen(pk_key->dk_key_name));
+ pk_key->dk_key_name = (char *) strdup(name);
+
+ /* allocate and fill in key structure */
+ if (pk_key->dk_func == NULL || pk_key->dk_func->from_file_fmt == NULL)
+ goto fail;
+
+ ret = pk_key->dk_func->from_file_fmt(pk_key, (char *)p, &in_buff[len] - p);
+ if (ret < 0)
+ goto fail;
+
+ dnslen = dst_key_to_dnskey(pk_key, dns, sizeof(dns));
+ id = dst_s_dns_key_id(dns, dnslen);
+
+ /* Make sure the actual key tag matches the input tag used in the filename
+ */
+ if (id != in_id) {
+ EREPORT(("dst_s_read_private_key_file(): actual tag of key read %d != input tag used to build filename %d.\n", id, in_id));
+ goto fail;
+ }
+ pk_key->dk_id = (u_int16_t) id;
+ pk_key->dk_alg = alg;
+ memset(in_buff, 0, cnt);
+ return (1);
+
+ fail:
+ memset(in_buff, 0, cnt);
+ return (0);
+}
+
+/*%
+ * Generate and store a public/private keypair.
+ * Keys will be stored in formatted files.
+ *
+ * Parameters
+ &
+ *\par name Name of the new key. Used to create key files
+ *\li K&lt;name&gt;+&lt;alg&gt;+&lt;id&gt;.public and K&lt;name&gt;+&lt;alg&gt;+&lt;id&gt;.private.
+ *\par bits Size of the new key in bits.
+ *\par exp What exponent to use:
+ *\li 0 use exponent 3
+ *\li non-zero use Fermant4
+ *\par flags The default value of the DNS Key flags.
+ *\li The DNS Key RR Flag field is defined in RFC2065,
+ * section 3.3. The field has 16 bits.
+ *\par protocol
+ *\li Default value of the DNS Key protocol field.
+ *\li The DNS Key protocol field is defined in RFC2065,
+ * section 3.4. The field has 8 bits.
+ *\par alg What algorithm to use. Currently defined:
+ *\li KEY_RSA 1
+ *\li KEY_DSA 3
+ *\li KEY_HMAC 157
+ *\par out_id The key tag is returned.
+ *
+ * Return
+ *\li NULL Failure
+ *\li non-NULL the generated key pair
+ * Caller frees the result, and its dk_name pointer.
+ */
+DST_KEY *
+dst_generate_key(const char *name, const int bits, const int exp,
+ const int flags, const int protocol, const int alg)
+{
+ DST_KEY *new_key = NULL;
+ int dnslen;
+ u_char dns[2048];
+
+ if (name == NULL)
+ return (NULL);
+
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_generate_key(): Algorithm %d not suppored\n", alg));
+ return (NULL);
+ }
+
+ new_key = dst_s_get_key_struct(name, alg, flags, protocol, bits);
+ if (new_key == NULL)
+ return (NULL);
+ if (bits == 0) /*%< null key we are done */
+ return (new_key);
+ if (new_key->dk_func == NULL || new_key->dk_func->generate == NULL) {
+ EREPORT(("dst_generate_key_pair():Unsupported algorithm %d\n",
+ alg));
+ return (dst_free_key(new_key));
+ }
+ if (new_key->dk_func->generate(new_key, exp) <= 0) {
+ EREPORT(("dst_generate_key_pair(): Key generation failure %s %d %d %d\n",
+ new_key->dk_key_name, new_key->dk_alg,
+ new_key->dk_key_size, exp));
+ return (dst_free_key(new_key));
+ }
+
+ dnslen = dst_key_to_dnskey(new_key, dns, sizeof(dns));
+ if (dnslen != UNSUPPORTED_KEYALG)
+ new_key->dk_id = dst_s_dns_key_id(dns, dnslen);
+ else
+ new_key->dk_id = 0;
+
+ return (new_key);
+}
+
+/*%
+ * Release all data structures pointed to by a key structure.
+ *
+ * Parameters
+ *\li f_key Key structure to be freed.
+ */
+
+DST_KEY *
+dst_free_key(DST_KEY *f_key)
+{
+
+ if (f_key == NULL)
+ return (f_key);
+ if (f_key->dk_func && f_key->dk_func->destroy)
+ f_key->dk_KEY_struct =
+ f_key->dk_func->destroy(f_key->dk_KEY_struct);
+ else {
+ EREPORT(("dst_free_key(): Unknown key alg %d\n",
+ f_key->dk_alg));
+ }
+ if (f_key->dk_KEY_struct) {
+ free(f_key->dk_KEY_struct);
+ f_key->dk_KEY_struct = NULL;
+ }
+ if (f_key->dk_key_name)
+ SAFE_FREE(f_key->dk_key_name);
+ SAFE_FREE(f_key);
+ return (NULL);
+}
+
+/*%
+ * Return the maximim size of signature from the key specified in bytes
+ *
+ * Parameters
+ *\li key
+ *
+ * Returns
+ * \li bytes
+ */
+int
+dst_sig_size(DST_KEY *key) {
+ switch (key->dk_alg) {
+ case KEY_HMAC_MD5:
+ return (16);
+ case KEY_HMAC_SHA1:
+ return (20);
+ case KEY_RSA:
+ return (key->dk_key_size + 7) / 8;
+ case KEY_DSA:
+ return (40);
+ default:
+ EREPORT(("dst_sig_size(): Unknown key alg %d\n", key->dk_alg));
+ return -1;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h b/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h
new file mode 100644
index 0000000000..e9bc6fc08d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h
@@ -0,0 +1,155 @@
+#ifndef DST_INTERNAL_H
+#define DST_INTERNAL_H
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+#include <limits.h>
+#include <sys/param.h>
+#if (!defined(BSD)) || (BSD < 199306)
+# include <sys/bitypes.h>
+#else
+# include <sys/types.h>
+#endif
+
+#ifndef PATH_MAX
+# ifdef POSIX_PATH_MAX
+# define PATH_MAX POSIX_PATH_MAX
+# else
+# define PATH_MAX 255 /*%< this is the value of POSIX_PATH_MAX */
+# endif
+#endif
+
+typedef struct dst_key {
+ char *dk_key_name; /*%< name of the key */
+ int dk_key_size; /*%< this is the size of the key in bits */
+ int dk_proto; /*%< what protocols this key can be used for */
+ int dk_alg; /*%< algorithm number from key record */
+ u_int32_t dk_flags; /*%< and the flags of the public key */
+ u_int16_t dk_id; /*%< identifier of the key */
+ void *dk_KEY_struct; /*%< pointer to key in crypto pkg fmt */
+ struct dst_func *dk_func; /*%< point to cryptto pgk specific function table */
+} DST_KEY;
+#define HAS_DST_KEY
+
+#include <isc/dst.h>
+/*
+ * define what crypto systems are supported for RSA,
+ * BSAFE is prefered over RSAREF; only one can be set at any time
+ */
+#if defined(BSAFE) && defined(RSAREF)
+# error "Cannot have both BSAFE and RSAREF defined"
+#endif
+
+/* Declare dst_lib specific constants */
+#define KEY_FILE_FORMAT "1.2"
+
+/* suffixes for key file names */
+#define PRIVATE_KEY "private"
+#define PUBLIC_KEY "key"
+
+/* error handling */
+#ifdef REPORT_ERRORS
+#define EREPORT(str) printf str
+#else
+#define EREPORT(str) (void)0
+#endif
+
+/* use our own special macro to FRRE memory */
+
+#ifndef SAFE_FREE
+#define SAFE_FREE(a) \
+do{if(a != NULL){memset(a,0, sizeof(*a)); free(a); a=NULL;}} while (0)
+#define SAFE_FREE2(a,s) if (a != NULL && (long)s > 0){memset(a,0, s);free(a); a=NULL;}
+#endif
+
+typedef struct dst_func {
+ int (*sign)(const int mode, DST_KEY *key, void **context,
+ const u_int8_t *data, const int len,
+ u_int8_t *signature, const int sig_len);
+ int (*verify)(const int mode, DST_KEY *key, void **context,
+ const u_int8_t *data, const int len,
+ const u_int8_t *signature, const int sig_len);
+ int (*compare)(const DST_KEY *key1, const DST_KEY *key2);
+ int (*generate)(DST_KEY *key, int parms);
+ void *(*destroy)(void *key);
+ /* conversion functions */
+ int (*to_dns_key)(const DST_KEY *key, u_int8_t *out,
+ const int out_len);
+ int (*from_dns_key)(DST_KEY *key, const u_int8_t *str,
+ const int str_len);
+ int (*to_file_fmt)(const DST_KEY *key, char *out,
+ const int out_len);
+ int (*from_file_fmt)(DST_KEY *key, const char *out,
+ const int out_len);
+
+} dst_func;
+
+extern dst_func *dst_t_func[DST_MAX_ALGS];
+extern const char *key_file_fmt_str;
+extern const char *dst_path;
+
+#ifndef DST_HASH_SIZE
+#define DST_HASH_SIZE 20 /*%< RIPEMD160 and SHA-1 are 20 bytes MD5 is 16 */
+#endif
+
+int dst_bsafe_init(void);
+
+int dst_rsaref_init(void);
+
+int dst_hmac_md5_init(void);
+
+int dst_cylink_init(void);
+
+int dst_eay_dss_init(void);
+
+/* from higher level support routines */
+int dst_s_calculate_bits( const u_int8_t *str, const int max_bits);
+int dst_s_verify_str( const char **buf, const char *str);
+
+
+/* conversion between dns names and key file names */
+size_t dst_s_filename_length( const char *name, const char *suffix);
+int dst_s_build_filename( char *filename, const char *name,
+ u_int16_t id, int alg, const char *suffix,
+ size_t filename_length);
+
+FILE *dst_s_fopen (const char *filename, const char *mode, int perm);
+
+/*%
+ * read and write network byte order into u_int?_t
+ * all of these should be retired
+ */
+u_int16_t dst_s_get_int16( const u_int8_t *buf);
+void dst_s_put_int16( u_int8_t *buf, const u_int16_t val);
+
+u_int32_t dst_s_get_int32( const u_int8_t *buf);
+void dst_s_put_int32( u_int8_t *buf, const u_int32_t val);
+
+#ifdef DUMP
+# undef DUMP
+# define DUMP(a,b,c,d) dst_s_dump(a,b,c,d)
+#else
+# define DUMP(a,b,c,d)
+#endif
+void
+dst_s_dump(const int mode, const u_char *data, const int size,
+ const char *msg);
+
+
+
+#endif /* DST_INTERNAL_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c b/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c
new file mode 100644
index 0000000000..23925e4269
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifdef HMAC_MD5
+#ifndef LINT
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/hmac_link.c,v 1.8 2007/09/24 17:18:25 each Exp $";
+#endif
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+
+/*%
+ * This file contains an implementation of the HMAC-MD5 algorithm.
+ */
+#include "port_before.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+
+#ifdef USE_MD5
+# ifndef HAVE_MD5
+# include "md5.h"
+# else
+# ifdef SOLARIS2
+# include <sys/md5.h>
+# endif
+# endif
+# ifndef _MD5_H_
+# define _MD5_H_ 1 /*%< make sure we do not include rsaref md5.h file */
+# endif
+#endif
+
+#include "port_after.h"
+
+
+#define HMAC_LEN 64
+#define HMAC_IPAD 0x36
+#define HMAC_OPAD 0x5c
+#define MD5_LEN 16
+
+
+typedef struct hmackey {
+ u_char hk_ipad[64], hk_opad[64];
+} HMAC_Key;
+
+
+/**************************************************************************
+ * dst_hmac_md5_sign
+ * Call HMAC signing functions to sign a block of data.
+ * There are three steps to signing, INIT (initialize structures),
+ * UPDATE (hash (more) data), FINAL (generate a signature). This
+ * routine performs one or more of these steps.
+ * Parameters
+ * mode SIG_MODE_INIT, SIG_MODE_UPDATE and/or SIG_MODE_FINAL.
+ * priv_key key to use for signing.
+ * context the context to be used in this digest
+ * data data to be signed.
+ * len length in bytes of data.
+ * signature location to store signature.
+ * sig_len size of the signature location
+ * returns
+ * N Success on SIG_MODE_FINAL = returns signature length in bytes
+ * 0 Success on SIG_MODE_INIT and UPDATE
+ * <0 Failure
+ */
+
+static int
+dst_hmac_md5_sign(const int mode, DST_KEY *d_key, void **context,
+ const u_char *data, const int len,
+ u_char *signature, const int sig_len)
+{
+ HMAC_Key *key;
+ int sign_len = 0;
+ MD5_CTX *ctx = NULL;
+
+ if (d_key == NULL || d_key->dk_KEY_struct == NULL)
+ return (-1);
+
+ if (mode & SIG_MODE_INIT)
+ ctx = (MD5_CTX *) malloc(sizeof(*ctx));
+ else if (context)
+ ctx = (MD5_CTX *) *context;
+ if (ctx == NULL)
+ return (-1);
+
+ key = (HMAC_Key *) d_key->dk_KEY_struct;
+
+ if (mode & SIG_MODE_INIT) {
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_ipad, HMAC_LEN);
+ }
+
+ if ((mode & SIG_MODE_UPDATE) && (data && len > 0))
+ MD5Update(ctx, data, len);
+
+ if (mode & SIG_MODE_FINAL) {
+ if (signature == NULL || sig_len < MD5_LEN)
+ return (SIGN_FINAL_FAILURE);
+ MD5Final(signature, ctx);
+
+ /* perform outer MD5 */
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_opad, HMAC_LEN);
+ MD5Update(ctx, signature, MD5_LEN);
+ MD5Final(signature, ctx);
+ sign_len = MD5_LEN;
+ SAFE_FREE(ctx);
+ }
+ else {
+ if (context == NULL)
+ return (-1);
+ *context = (void *) ctx;
+ }
+ return (sign_len);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_verify()
+ * Calls HMAC verification routines. There are three steps to
+ * verification, INIT (initialize structures), UPDATE (hash (more) data),
+ * FINAL (generate a signature). This routine performs one or more of
+ * these steps.
+ * Parameters
+ * mode SIG_MODE_INIT, SIG_MODE_UPDATE and/or SIG_MODE_FINAL.
+ * dkey key to use for verify.
+ * data data signed.
+ * len length in bytes of data.
+ * signature signature.
+ * sig_len length in bytes of signature.
+ * returns
+ * 0 Success
+ * <0 Failure
+ */
+
+static int
+dst_hmac_md5_verify(const int mode, DST_KEY *d_key, void **context,
+ const u_char *data, const int len,
+ const u_char *signature, const int sig_len)
+{
+ HMAC_Key *key;
+ MD5_CTX *ctx = NULL;
+
+ if (d_key == NULL || d_key->dk_KEY_struct == NULL)
+ return (-1);
+
+ if (mode & SIG_MODE_INIT)
+ ctx = (MD5_CTX *) malloc(sizeof(*ctx));
+ else if (context)
+ ctx = (MD5_CTX *) *context;
+ if (ctx == NULL)
+ return (-1);
+
+ key = (HMAC_Key *) d_key->dk_KEY_struct;
+ if (mode & SIG_MODE_INIT) {
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_ipad, HMAC_LEN);
+ }
+ if ((mode & SIG_MODE_UPDATE) && (data && len > 0))
+ MD5Update(ctx, data, len);
+
+ if (mode & SIG_MODE_FINAL) {
+ u_char digest[MD5_LEN];
+ if (signature == NULL || key == NULL || sig_len != MD5_LEN)
+ return (VERIFY_FINAL_FAILURE);
+ MD5Final(digest, ctx);
+
+ /* perform outer MD5 */
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_opad, HMAC_LEN);
+ MD5Update(ctx, digest, MD5_LEN);
+ MD5Final(digest, ctx);
+
+ SAFE_FREE(ctx);
+ if (memcmp(digest, signature, MD5_LEN) != 0)
+ return (VERIFY_FINAL_FAILURE);
+ }
+ else {
+ if (context == NULL)
+ return (-1);
+ *context = (void *) ctx;
+ }
+ return (0);
+}
+
+
+/**************************************************************************
+ * dst_buffer_to_hmac_md5
+ * Converts key from raw data to an HMAC Key
+ * This function gets in a pointer to the data
+ * Parameters
+ * hkey the HMAC key to be filled in
+ * key the key in raw format
+ * keylen the length of the key
+ * Return
+ * 0 Success
+ * <0 Failure
+ */
+static int
+dst_buffer_to_hmac_md5(DST_KEY *dkey, const u_char *key, const int keylen)
+{
+ int i;
+ HMAC_Key *hkey = NULL;
+ MD5_CTX ctx;
+ int local_keylen = keylen;
+ u_char tk[MD5_LEN];
+
+ if (dkey == NULL || key == NULL || keylen < 0)
+ return (-1);
+
+ if ((hkey = (HMAC_Key *) malloc(sizeof(HMAC_Key))) == NULL)
+ return (-2);
+
+ memset(hkey->hk_ipad, 0, sizeof(hkey->hk_ipad));
+ memset(hkey->hk_opad, 0, sizeof(hkey->hk_opad));
+
+ /* if key is longer than HMAC_LEN bytes reset it to key=MD5(key) */
+ if (keylen > HMAC_LEN) {
+ MD5Init(&ctx);
+ MD5Update(&ctx, key, keylen);
+ MD5Final(tk, &ctx);
+ memset((void *) &ctx, 0, sizeof(ctx));
+ key = tk;
+ local_keylen = MD5_LEN;
+ }
+ /* start out by storing key in pads */
+ memcpy(hkey->hk_ipad, key, local_keylen);
+ memcpy(hkey->hk_opad, key, local_keylen);
+
+ /* XOR key with hk_ipad and opad values */
+ for (i = 0; i < HMAC_LEN; i++) {
+ hkey->hk_ipad[i] ^= HMAC_IPAD;
+ hkey->hk_opad[i] ^= HMAC_OPAD;
+ }
+ dkey->dk_key_size = local_keylen;
+ dkey->dk_KEY_struct = (void *) hkey;
+ return (1);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_key_to_file_format
+ * Encodes an HMAC Key into the portable file format.
+ * Parameters
+ * hkey HMAC KEY structure
+ * buff output buffer
+ * buff_len size of output buffer
+ * Return
+ * 0 Failure - null input hkey
+ * -1 Failure - not enough space in output area
+ * N Success - Length of data returned in buff
+ */
+
+static int
+dst_hmac_md5_key_to_file_format(const DST_KEY *dkey, char *buff,
+ const int buff_len)
+{
+ char *bp;
+ int len, i, key_len;
+ u_char key[HMAC_LEN];
+ HMAC_Key *hkey;
+
+ if (dkey == NULL || dkey->dk_KEY_struct == NULL)
+ return (0);
+ /*
+ * Using snprintf() would be so much simpler here.
+ */
+ if (buff == NULL ||
+ buff_len <= (int)(strlen(key_file_fmt_str) +
+ strlen(KEY_FILE_FORMAT) + 4))
+ return (-1); /*%< no OR not enough space in output area */
+ hkey = (HMAC_Key *) dkey->dk_KEY_struct;
+ memset(buff, 0, buff_len); /*%< just in case */
+ /* write file header */
+ sprintf(buff, key_file_fmt_str, KEY_FILE_FORMAT, KEY_HMAC_MD5, "HMAC");
+
+ bp = buff + strlen(buff);
+
+ memset(key, 0, HMAC_LEN);
+ for (i = 0; i < HMAC_LEN; i++)
+ key[i] = hkey->hk_ipad[i] ^ HMAC_IPAD;
+ for (i = HMAC_LEN - 1; i >= 0; i--)
+ if (key[i] != 0)
+ break;
+ key_len = i + 1;
+
+ if (buff_len - (bp - buff) < 6)
+ return (-1);
+ strcat(bp, "Key: ");
+ bp += strlen("Key: ");
+
+ len = b64_ntop(key, key_len, bp, buff_len - (bp - buff));
+ if (len < 0)
+ return (-1);
+ bp += len;
+ if (buff_len - (bp - buff) < 2)
+ return (-1);
+ *(bp++) = '\n';
+ *bp = '\0';
+
+ return (bp - buff);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_key_from_file_format
+ * Converts contents of a key file into an HMAC key.
+ * Parameters
+ * hkey structure to put key into
+ * buff buffer containing the encoded key
+ * buff_len the length of the buffer
+ * Return
+ * n >= 0 Foot print of the key converted
+ * n < 0 Error in conversion
+ */
+
+static int
+dst_hmac_md5_key_from_file_format(DST_KEY *dkey, const char *buff,
+ const int buff_len)
+{
+ const char *p = buff, *eol;
+ u_char key[HMAC_LEN+1]; /* b64_pton needs more than 64 bytes do decode
+ * it should probably be fixed rather than doing
+ * this
+ */
+ u_char *tmp;
+ int key_len, len;
+
+ if (dkey == NULL)
+ return (-2);
+ if (buff == NULL || buff_len < 0)
+ return (-1);
+
+ memset(key, 0, sizeof(key));
+
+ if (!dst_s_verify_str(&p, "Key: "))
+ return (-3);
+
+ eol = strchr(p, '\n');
+ if (eol == NULL)
+ return (-4);
+ len = eol - p;
+ tmp = malloc(len + 2);
+ if (tmp == NULL)
+ return (-5);
+ memcpy(tmp, p, len);
+ *(tmp + len) = 0x0;
+ key_len = b64_pton((char *)tmp, key, HMAC_LEN+1); /*%< see above */
+ SAFE_FREE2(tmp, len + 2);
+
+ if (dst_buffer_to_hmac_md5(dkey, key, key_len) < 0) {
+ return (-6);
+ }
+ return (0);
+}
+
+/*%
+ * dst_hmac_md5_to_dns_key()
+ * function to extract hmac key from DST_KEY structure
+ * intput:
+ * in_key: HMAC-MD5 key
+ * output:
+ * out_str: buffer to write ot
+ * out_len: size of output buffer
+ * returns:
+ * number of bytes written to output buffer
+ */
+static int
+dst_hmac_md5_to_dns_key(const DST_KEY *in_key, u_char *out_str,
+ const int out_len)
+{
+
+ HMAC_Key *hkey;
+ int i;
+
+ if (in_key == NULL || in_key->dk_KEY_struct == NULL ||
+ out_len <= in_key->dk_key_size || out_str == NULL)
+ return (-1);
+
+ hkey = (HMAC_Key *) in_key->dk_KEY_struct;
+ for (i = 0; i < in_key->dk_key_size; i++)
+ out_str[i] = hkey->hk_ipad[i] ^ HMAC_IPAD;
+ return (i);
+}
+
+/**************************************************************************
+ * dst_hmac_md5_compare_keys
+ * Compare two keys for equality.
+ * Return
+ * 0 The keys are equal
+ * NON-ZERO The keys are not equal
+ */
+
+static int
+dst_hmac_md5_compare_keys(const DST_KEY *key1, const DST_KEY *key2)
+{
+ HMAC_Key *hkey1 = (HMAC_Key *) key1->dk_KEY_struct;
+ HMAC_Key *hkey2 = (HMAC_Key *) key2->dk_KEY_struct;
+ return memcmp(hkey1->hk_ipad, hkey2->hk_ipad, HMAC_LEN);
+}
+
+/**************************************************************************
+ * dst_hmac_md5_free_key_structure
+ * Frees all (none) dynamically allocated structures in hkey
+ */
+
+static void *
+dst_hmac_md5_free_key_structure(void *key)
+{
+ HMAC_Key *hkey = key;
+ SAFE_FREE(hkey);
+ return (NULL);
+}
+
+
+/***************************************************************************
+ * dst_hmac_md5_generate_key
+ * Creates a HMAC key of size size with a maximum size of 63 bytes
+ * generating a HMAC key larger than 63 bytes makes no sense as that key
+ * is digested before use.
+ */
+
+static int
+dst_hmac_md5_generate_key(DST_KEY *key, const int nothing)
+{
+ (void)key;
+ (void)nothing;
+ return (-1);
+}
+
+/*%
+ * dst_hmac_md5_init() Function to answer set up function pointers for HMAC
+ * related functions
+ */
+int
+dst_hmac_md5_init()
+{
+ if (dst_t_func[KEY_HMAC_MD5] != NULL)
+ return (1);
+ dst_t_func[KEY_HMAC_MD5] = malloc(sizeof(struct dst_func));
+ if (dst_t_func[KEY_HMAC_MD5] == NULL)
+ return (0);
+ memset(dst_t_func[KEY_HMAC_MD5], 0, sizeof(struct dst_func));
+ dst_t_func[KEY_HMAC_MD5]->sign = dst_hmac_md5_sign;
+ dst_t_func[KEY_HMAC_MD5]->verify = dst_hmac_md5_verify;
+ dst_t_func[KEY_HMAC_MD5]->compare = dst_hmac_md5_compare_keys;
+ dst_t_func[KEY_HMAC_MD5]->generate = dst_hmac_md5_generate_key;
+ dst_t_func[KEY_HMAC_MD5]->destroy = dst_hmac_md5_free_key_structure;
+ dst_t_func[KEY_HMAC_MD5]->to_dns_key = dst_hmac_md5_to_dns_key;
+ dst_t_func[KEY_HMAC_MD5]->from_dns_key = dst_buffer_to_hmac_md5;
+ dst_t_func[KEY_HMAC_MD5]->to_file_fmt = dst_hmac_md5_key_to_file_format;
+ dst_t_func[KEY_HMAC_MD5]->from_file_fmt = dst_hmac_md5_key_from_file_format;
+ return (1);
+}
+
+#else
+#define dst_hmac_md5_init __dst_hmac_md5_init
+
+int
+dst_hmac_md5_init(){
+ return (0);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/support.c b/usr/src/lib/libresolv2_joy/common/dst/support.c
new file mode 100644
index 0000000000..8f827667d6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/support.c
@@ -0,0 +1,342 @@
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/support.c,v 1.6 2005/10/11 00:10:13 marka Exp $";
+
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+
+#include "port_before.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <memory.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+
+#include "port_after.h"
+
+/*%
+ * dst_s_verify_str()
+ * Validate that the input string(*str) is at the head of the input
+ * buffer(**buf). If so, move the buffer head pointer (*buf) to
+ * the first byte of data following the string(*str).
+ * Parameters
+ * buf Input buffer.
+ * str Input string.
+ * Return
+ * 0 *str is not the head of **buff
+ * 1 *str is the head of **buff, *buf is is advanced to
+ * the tail of **buf.
+ */
+
+int
+dst_s_verify_str(const char **buf, const char *str)
+{
+ int b, s;
+ if (*buf == NULL) /*%< error checks */
+ return (0);
+ if (str == NULL || *str == '\0')
+ return (1);
+
+ b = strlen(*buf); /*%< get length of strings */
+ s = strlen(str);
+ if (s > b || strncmp(*buf, str, s)) /*%< check if same */
+ return (0); /*%< not a match */
+ (*buf) += s; /*%< advance pointer */
+ return (1);
+}
+
+/*%
+ * dst_s_calculate_bits
+ * Given a binary number represented in a u_char[], determine
+ * the number of significant bits used.
+ * Parameters
+ * str An input character string containing a binary number.
+ * max_bits The maximum possible significant bits.
+ * Return
+ * N The number of significant bits in str.
+ */
+
+int
+dst_s_calculate_bits(const u_char *str, const int max_bits)
+{
+ const u_char *p = str;
+ u_char i, j = 0x80;
+ int bits;
+ for (bits = max_bits; *p == 0x00 && bits > 0; p++)
+ bits -= 8;
+ for (i = *p; (i & j) != j; j >>= 1)
+ bits--;
+ return (bits);
+}
+
+/*%
+ * calculates a checksum used in dst for an id.
+ * takes an array of bytes and a length.
+ * returns a 16 bit checksum.
+ */
+u_int16_t
+dst_s_id_calc(const u_char *key, const int keysize)
+{
+ u_int32_t ac;
+ const u_char *kp = key;
+ int size = keysize;
+
+ if (!key || (keysize <= 0))
+ return (0xffffU);
+
+ for (ac = 0; size > 1; size -= 2, kp += 2)
+ ac += ((*kp) << 8) + *(kp + 1);
+
+ if (size > 0)
+ ac += ((*kp) << 8);
+ ac += (ac >> 16) & 0xffff;
+
+ return (ac & 0xffff);
+}
+
+/*%
+ * dst_s_dns_key_id() Function to calculate DNSSEC footprint from KEY record
+ * rdata
+ * Input:
+ * dns_key_rdata: the raw data in wire format
+ * rdata_len: the size of the input data
+ * Output:
+ * the key footprint/id calculated from the key data
+ */
+u_int16_t
+dst_s_dns_key_id(const u_char *dns_key_rdata, const int rdata_len)
+{
+ if (!dns_key_rdata)
+ return 0;
+
+ /* compute id */
+ if (dns_key_rdata[3] == KEY_RSA) /*%< Algorithm RSA */
+ return dst_s_get_int16((const u_char *)
+ &dns_key_rdata[rdata_len - 3]);
+ else if (dns_key_rdata[3] == KEY_HMAC_MD5)
+ /* compatibility */
+ return 0;
+ else
+ /* compute a checksum on the key part of the key rr */
+ return dst_s_id_calc(dns_key_rdata, rdata_len);
+}
+
+/*%
+ * dst_s_get_int16
+ * This routine extracts a 16 bit integer from a two byte character
+ * string. The character string is assumed to be in network byte
+ * order and may be unaligned. The number returned is in host order.
+ * Parameter
+ * buf A two byte character string.
+ * Return
+ * The converted integer value.
+ */
+
+u_int16_t
+dst_s_get_int16(const u_char *buf)
+{
+ register u_int16_t a = 0;
+ a = ((u_int16_t)(buf[0] << 8)) | ((u_int16_t)(buf[1]));
+ return (a);
+}
+
+/*%
+ * dst_s_get_int32
+ * This routine extracts a 32 bit integer from a four byte character
+ * string. The character string is assumed to be in network byte
+ * order and may be unaligned. The number returned is in host order.
+ * Parameter
+ * buf A four byte character string.
+ * Return
+ * The converted integer value.
+ */
+
+u_int32_t
+dst_s_get_int32(const u_char *buf)
+{
+ register u_int32_t a = 0;
+ a = ((u_int32_t)(buf[0] << 24)) | ((u_int32_t)(buf[1] << 16)) |
+ ((u_int32_t)(buf[2] << 8)) | ((u_int32_t)(buf[3]));
+ return (a);
+}
+
+/*%
+ * dst_s_put_int16
+ * Take a 16 bit integer and store the value in a two byte
+ * character string. The integer is assumed to be in network
+ * order and the string is returned in host order.
+ *
+ * Parameters
+ * buf Storage for a two byte character string.
+ * val 16 bit integer.
+ */
+
+void
+dst_s_put_int16(u_int8_t *buf, const u_int16_t val)
+{
+ buf[0] = (u_int8_t)(val >> 8);
+ buf[1] = (u_int8_t)(val);
+}
+
+/*%
+ * dst_s_put_int32
+ * Take a 32 bit integer and store the value in a four byte
+ * character string. The integer is assumed to be in network
+ * order and the string is returned in host order.
+ *
+ * Parameters
+ * buf Storage for a four byte character string.
+ * val 32 bit integer.
+ */
+
+void
+dst_s_put_int32(u_int8_t *buf, const u_int32_t val)
+{
+ buf[0] = (u_int8_t)(val >> 24);
+ buf[1] = (u_int8_t)(val >> 16);
+ buf[2] = (u_int8_t)(val >> 8);
+ buf[3] = (u_int8_t)(val);
+}
+
+/*%
+ * dst_s_filename_length
+ *
+ * This function returns the number of bytes needed to hold the
+ * filename for a key file. '/', '\' and ':' are not allowed.
+ * form: K&lt;keyname&gt;+&lt;alg&gt;+&lt;id&gt;.&lt;suffix&gt;
+ *
+ * Returns 0 if the filename would contain either '\', '/' or ':'
+ */
+size_t
+dst_s_filename_length(const char *name, const char *suffix)
+{
+ if (name == NULL)
+ return (0);
+ if (strrchr(name, '\\'))
+ return (0);
+ if (strrchr(name, '/'))
+ return (0);
+ if (strrchr(name, ':'))
+ return (0);
+ if (suffix == NULL)
+ return (0);
+ if (strrchr(suffix, '\\'))
+ return (0);
+ if (strrchr(suffix, '/'))
+ return (0);
+ if (strrchr(suffix, ':'))
+ return (0);
+ return (1 + strlen(name) + 6 + strlen(suffix));
+}
+
+/*%
+ * dst_s_build_filename ()
+ * Builds a key filename from the key name, it's id, and a
+ * suffix. '\', '/' and ':' are not allowed. fA filename is of the
+ * form: K&lt;keyname&gt;&lt;id&gt;.&lt;suffix&gt;
+ * form: K&lt;keyname&gt;+&lt;alg&gt;+&lt;id&gt;.&lt;suffix&gt;
+ *
+ * Returns -1 if the conversion fails:
+ * if the filename would be too long for space allotted
+ * if the filename would contain a '\', '/' or ':'
+ * Returns 0 on success
+ */
+
+int
+dst_s_build_filename(char *filename, const char *name, u_int16_t id,
+ int alg, const char *suffix, size_t filename_length)
+{
+ u_int32_t my_id;
+ if (filename == NULL)
+ return (-1);
+ memset(filename, 0, filename_length);
+ if (name == NULL)
+ return (-1);
+ if (suffix == NULL)
+ return (-1);
+ if (filename_length < 1 + strlen(name) + 4 + 6 + 1 + strlen(suffix))
+ return (-1);
+ my_id = id;
+ sprintf(filename, "K%s+%03d+%05d.%s", name, alg, my_id,
+ (const char *) suffix);
+ if (strrchr(filename, '/'))
+ return (-1);
+ if (strrchr(filename, '\\'))
+ return (-1);
+ if (strrchr(filename, ':'))
+ return (-1);
+ return (0);
+}
+
+/*%
+ * dst_s_fopen ()
+ * Open a file in the dst_path directory. If perm is specified, the
+ * file is checked for existence first, and not opened if it exists.
+ * Parameters
+ * filename File to open
+ * mode Mode to open the file (passed directly to fopen)
+ * perm File permission, if creating a new file.
+ * Returns
+ * NULL Failure
+ * NON-NULL (FILE *) of opened file.
+ */
+FILE *
+dst_s_fopen(const char *filename, const char *mode, int perm)
+{
+ FILE *fp;
+ char pathname[PATH_MAX];
+
+ if (strlen(filename) + strlen(dst_path) >= sizeof(pathname))
+ return (NULL);
+
+ if (*dst_path != '\0') {
+ strcpy(pathname, dst_path);
+ strcat(pathname, filename);
+ } else
+ strcpy(pathname, filename);
+
+ fp = fopen(pathname, mode);
+ if (perm)
+ chmod(pathname, perm);
+ return (fp);
+}
+
+void
+dst_s_dump(const int mode, const u_char *data, const int size,
+ const char *msg)
+{
+ UNUSED(data);
+
+ if (size > 0) {
+#ifdef LONG_TEST
+ static u_char scratch[1000];
+ int n ;
+ n = b64_ntop(data, scratch, size, sizeof(scratch));
+ printf("%s: %x %d %s\n", msg, mode, n, scratch);
+#else
+ printf("%s,%x %d\n", msg, mode, size);
+#endif
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c
new file mode 100644
index 0000000000..bf960a8acc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_cidr_ntop.c,v 1.7 2006/10/11 02:18:18 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static char *
+inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size);
+static char *
+inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size);
+
+/*%
+ * char *
+ * inet_cidr_ntop(af, src, bits, dst, size)
+ * convert network address from network to presentation format.
+ * "src"'s size is determined from its "af".
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * 192.5.5.1/28 has a nonzero host part, which means it isn't a network
+ * as called for by inet_net_ntop() but it can be a host address with
+ * an included netmask.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+char *
+inet_cidr_ntop(int af, const void *src, int bits, char *dst, size_t size) {
+ switch (af) {
+ case AF_INET:
+ return (inet_cidr_ntop_ipv4(src, bits, dst, size));
+ case AF_INET6:
+ return (inet_cidr_ntop_ipv6(src, bits, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (NULL);
+ }
+}
+
+static int
+decoct(const u_char *src, int bytes, char *dst, size_t size) {
+ char *odst = dst;
+ char *t;
+ int b;
+
+ for (b = 1; b <= bytes; b++) {
+ if (size < sizeof "255.")
+ return (0);
+ t = dst;
+ dst += SPRINTF((dst, "%u", *src++));
+ if (b != bytes) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - t);
+ }
+ return (dst - odst);
+}
+
+/*%
+ * static char *
+ * inet_cidr_ntop_ipv4(src, bits, dst, size)
+ * convert IPv4 network address from network to presentation format.
+ * "src"'s size is determined from its "af".
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+static char *
+inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size) {
+ char *odst = dst;
+ size_t len = 4;
+ size_t b;
+ size_t bytes;
+
+ if ((bits < -1) || (bits > 32)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /* Find number of significant bytes in address. */
+ if (bits == -1)
+ len = 4;
+ else
+ for (len = 1, b = 1 ; b < 4U; b++)
+ if (*(src + b))
+ len = b + 1;
+
+ /* Format whole octets plus nonzero trailing octets. */
+ bytes = (((bits <= 0) ? 1 : bits) + 7) / 8;
+ if (len > bytes)
+ bytes = len;
+ b = decoct(src, bytes, dst, size);
+ if (b == 0U)
+ goto emsgsize;
+ dst += b;
+ size -= b;
+
+ if (bits != -1) {
+ /* Format CIDR /width. */
+ if (size < sizeof "/32")
+ goto emsgsize;
+ dst += SPRINTF((dst, "/%u", bits));
+ }
+
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+static char *
+inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size) {
+ /*
+ * Note that int32_t and int16_t need only be "at least" large enough
+ * to contain a value of the specified size. On some systems, like
+ * Crays, there is no such thing as an integer variable with 16 bits.
+ * Keep this in mind if you think this function should have been coded
+ * to use pointer overlays. All the world's not a VAX.
+ */
+ char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255/128"];
+ char *tp;
+ struct { int base, len; } best, cur;
+ u_int words[NS_IN6ADDRSZ / NS_INT16SZ];
+ int i;
+
+ if ((bits < -1) || (bits > 128)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Preprocess:
+ * Copy the input (bytewise) array into a wordwise array.
+ * Find the longest run of 0x00's in src[] for :: shorthanding.
+ */
+ memset(words, '\0', sizeof words);
+ for (i = 0; i < NS_IN6ADDRSZ; i++)
+ words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3));
+ best.base = -1;
+ best.len = 0;
+ cur.base = -1;
+ cur.len = 0;
+ for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) {
+ if (words[i] == 0) {
+ if (cur.base == -1)
+ cur.base = i, cur.len = 1;
+ else
+ cur.len++;
+ } else {
+ if (cur.base != -1) {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ cur.base = -1;
+ }
+ }
+ }
+ if (cur.base != -1) {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ }
+ if (best.base != -1 && best.len < 2)
+ best.base = -1;
+
+ /*
+ * Format the result.
+ */
+ tp = tmp;
+ for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) {
+ /* Are we inside the best run of 0x00's? */
+ if (best.base != -1 && i >= best.base &&
+ i < (best.base + best.len)) {
+ if (i == best.base)
+ *tp++ = ':';
+ continue;
+ }
+ /* Are we following an initial run of 0x00s or any real hex? */
+ if (i != 0)
+ *tp++ = ':';
+ /* Is this address an encapsulated IPv4? */
+ if (i == 6 && best.base == 0 && (best.len == 6 ||
+ (best.len == 7 && words[7] != 0x0001) ||
+ (best.len == 5 && words[5] == 0xffff))) {
+ int n;
+
+ if (src[15] || bits == -1 || bits > 120)
+ n = 4;
+ else if (src[14] || bits > 112)
+ n = 3;
+ else
+ n = 2;
+ n = decoct(src+12, n, tp, sizeof tmp - (tp - tmp));
+ if (n == 0) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ tp += strlen(tp);
+ break;
+ }
+ tp += SPRINTF((tp, "%x", words[i]));
+ }
+
+ /* Was it a trailing run of 0x00's? */
+ if (best.base != -1 && (best.base + best.len) ==
+ (NS_IN6ADDRSZ / NS_INT16SZ))
+ *tp++ = ':';
+ *tp = '\0';
+
+ if (bits != -1)
+ tp += SPRINTF((tp, "/%u", bits));
+
+ /*
+ * Check for overflow, copy, and we're done.
+ */
+ if ((size_t)(tp - tmp) > size) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ strcpy(dst, tmp);
+ return (dst);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c
new file mode 100644
index 0000000000..07652af463
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_cidr_pton.c,v 1.6 2005/04/27 04:56:19 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static int inet_cidr_pton_ipv4 __P((const char *src, u_char *dst,
+ int *bits, int ipv6));
+static int inet_cidr_pton_ipv6 __P((const char *src, u_char *dst,
+ int *bits));
+
+static int getbits(const char *, int ipv6);
+
+/*%
+ * int
+ * inet_cidr_pton(af, src, dst, *bits)
+ * convert network address from presentation to network format.
+ * accepts inet_pton()'s input for this "af" plus trailing "/CIDR".
+ * "dst" is assumed large enough for its "af". "bits" is set to the
+ * /CIDR prefix length, which can have defaults (like /32 for IPv4).
+ * return:
+ * -1 if an error occurred (inspect errno; ENOENT means bad format).
+ * 0 if successful conversion occurred.
+ * note:
+ * 192.5.5.1/28 has a nonzero host part, which means it isn't a network
+ * as called for by inet_net_pton() but it can be a host address with
+ * an included netmask.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+int
+inet_cidr_pton(int af, const char *src, void *dst, int *bits) {
+ switch (af) {
+ case AF_INET:
+ return (inet_cidr_pton_ipv4(src, dst, bits, 0));
+ case AF_INET6:
+ return (inet_cidr_pton_ipv6(src, dst, bits));
+ default:
+ errno = EAFNOSUPPORT;
+ return (-1);
+ }
+}
+
+static const char digits[] = "0123456789";
+
+static int
+inet_cidr_pton_ipv4(const char *src, u_char *dst, int *pbits, int ipv6) {
+ const u_char *odst = dst;
+ int n, ch, tmp, bits;
+ size_t size = 4;
+
+ /* Get the mantissa. */
+ while (ch = *src++, (isascii(ch) && isdigit(ch))) {
+ tmp = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' && isascii(ch) && isdigit(ch));
+ if (size-- == 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ }
+
+ /* Get the prefix length if any. */
+ bits = -1;
+ if (ch == '/' && dst > odst) {
+ bits = getbits(src, ipv6);
+ if (bits == -2)
+ goto enoent;
+ } else if (ch != '\0')
+ goto enoent;
+
+ /* Prefix length can default to /32 only if all four octets spec'd. */
+ if (bits == -1) {
+ if (dst - odst == 4)
+ bits = ipv6 ? 128 : 32;
+ else
+ goto enoent;
+ }
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+
+ /* If prefix length overspecifies mantissa, life is bad. */
+ if (((bits - (ipv6 ? 96 : 0)) / 8) > (dst - odst))
+ goto enoent;
+
+ /* Extend address to four octets. */
+ while (size-- > 0U)
+ *dst++ = 0;
+
+ *pbits = bits;
+ return (0);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+inet_cidr_pton_ipv6(const char *src, u_char *dst, int *pbits) {
+ static const char xdigits_l[] = "0123456789abcdef",
+ xdigits_u[] = "0123456789ABCDEF";
+ u_char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
+ const char *xdigits, *curtok;
+ int ch, saw_xdigit;
+ u_int val;
+ int bits;
+
+ memset((tp = tmp), '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+ /* Leading :: requires some special handling. */
+ if (*src == ':')
+ if (*++src != ':')
+ return (0);
+ curtok = src;
+ saw_xdigit = 0;
+ val = 0;
+ bits = -1;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+ pch = strchr((xdigits = xdigits_u), ch);
+ if (pch != NULL) {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (val > 0xffff)
+ return (0);
+ saw_xdigit = 1;
+ continue;
+ }
+ if (ch == ':') {
+ curtok = src;
+ if (!saw_xdigit) {
+ if (colonp)
+ return (0);
+ colonp = tp;
+ continue;
+ } else if (*src == '\0') {
+ return (0);
+ }
+ if (tp + NS_INT16SZ > endp)
+ return (0);
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+ inet_cidr_pton_ipv4(curtok, tp, &bits, 1) == 0) {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break; /*%< '\\0' was seen by inet_pton4(). */
+ }
+ if (ch == '/') {
+ bits = getbits(src, 1);
+ if (bits == -2)
+ goto enoent;
+ break;
+ }
+ goto enoent;
+ }
+ if (saw_xdigit) {
+ if (tp + NS_INT16SZ > endp)
+ goto emsgsize;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ }
+ if (colonp != NULL) {
+ /*
+ * Since some memmove()'s erroneously fail to handle
+ * overlapping regions, we'll do the shift by hand.
+ */
+ const int n = tp - colonp;
+ int i;
+
+ if (tp == endp)
+ goto enoent;
+ for (i = 1; i <= n; i++) {
+ endp[- i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+
+ memcpy(dst, tmp, NS_IN6ADDRSZ);
+
+ *pbits = bits;
+ return (0);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+getbits(const char *src, int ipv6) {
+ int bits = 0;
+ char *cp, ch;
+
+ if (*src == '\0') /*%< syntax */
+ return (-2);
+ do {
+ ch = *src++;
+ cp = strchr(digits, ch);
+ if (cp == NULL) /*%< syntax */
+ return (-2);
+ bits *= 10;
+ bits += cp - digits;
+ if (bits == 0 && *src != '\0') /*%< no leading zeros */
+ return (-2);
+ if (bits > (ipv6 ? 128 : 32)) /*%< range error */
+ return (-2);
+ } while (*src != '\0');
+
+ return (bits);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_data.c b/usr/src/lib/libresolv2_joy/common/inet/inet_data.c
new file mode 100644
index 0000000000..58b8c4342d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_data.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char rcsid[] = "$Id: inet_data.c,v 1.4 2005/04/27 04:56:19 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+const struct in6_addr isc_in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr isc_in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c b/usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c
new file mode 100644
index 0000000000..70ac409512
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_lnaof.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "port_after.h"
+
+/*%
+ * Return the local network address portion of an
+ * internet address; handles class a/b/c network
+ * number formats.
+ */
+u_long
+inet_lnaof(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+
+ if (IN_CLASSA(i))
+ return ((i)&IN_CLASSA_HOST);
+ else if (IN_CLASSB(i))
+ return ((i)&IN_CLASSB_HOST);
+ else
+ return ((i)&IN_CLASSC_HOST);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c b/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c
new file mode 100644
index 0000000000..c56cb3eaeb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_makeaddr.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "port_after.h"
+
+/*%
+ * Formulate an Internet address from network + host. Used in
+ * building addresses stored in the ifnet structure.
+ */
+struct in_addr
+inet_makeaddr(net, host)
+ u_long net, host;
+{
+ struct in_addr a;
+
+ if (net < 128U)
+ a.s_addr = (net << IN_CLASSA_NSHIFT) | (host & IN_CLASSA_HOST);
+ else if (net < 65536U)
+ a.s_addr = (net << IN_CLASSB_NSHIFT) | (host & IN_CLASSB_HOST);
+ else if (net < 16777216L)
+ a.s_addr = (net << IN_CLASSC_NSHIFT) | (host & IN_CLASSC_HOST);
+ else
+ a.s_addr = net | host;
+ a.s_addr = htonl(a.s_addr);
+ return (a);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c b/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c
new file mode 100644
index 0000000000..fb28e3cbe5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_net_ntop.c,v 1.5 2006/06/20 02:50:14 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static char * inet_net_ntop_ipv4 __P((const u_char *src, int bits,
+ char *dst, size_t size));
+static char * inet_net_ntop_ipv6 __P((const u_char *src, int bits,
+ char *dst, size_t size));
+
+/*%
+ * char *
+ * inet_net_ntop(af, src, bits, dst, size)
+ * convert network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+char *
+inet_net_ntop(af, src, bits, dst, size)
+ int af;
+ const void *src;
+ int bits;
+ char *dst;
+ size_t size;
+{
+ switch (af) {
+ case AF_INET:
+ return (inet_net_ntop_ipv4(src, bits, dst, size));
+ case AF_INET6:
+ return (inet_net_ntop_ipv6(src, bits, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (NULL);
+ }
+}
+
+/*%
+ * static char *
+ * inet_net_ntop_ipv4(src, bits, dst, size)
+ * convert IPv4 network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+static char *
+inet_net_ntop_ipv4(src, bits, dst, size)
+ const u_char *src;
+ int bits;
+ char *dst;
+ size_t size;
+{
+ char *odst = dst;
+ char *t;
+ u_int m;
+ int b;
+
+ if (bits < 0 || bits > 32) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (bits == 0) {
+ if (size < sizeof "0")
+ goto emsgsize;
+ *dst++ = '0';
+ size--;
+ *dst = '\0';
+ }
+
+ /* Format whole octets. */
+ for (b = bits / 8; b > 0; b--) {
+ if (size <= sizeof "255.")
+ goto emsgsize;
+ t = dst;
+ dst += SPRINTF((dst, "%u", *src++));
+ if (b > 1) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - t);
+ }
+
+ /* Format partial octet. */
+ b = bits % 8;
+ if (b > 0) {
+ if (size <= sizeof ".255")
+ goto emsgsize;
+ t = dst;
+ if (dst != odst)
+ *dst++ = '.';
+ m = ((1 << b) - 1) << (8 - b);
+ dst += SPRINTF((dst, "%u", *src & m));
+ size -= (size_t)(dst - t);
+ }
+
+ /* Format CIDR /width. */
+ if (size <= sizeof "/32")
+ goto emsgsize;
+ dst += SPRINTF((dst, "/%u", bits));
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*%
+ * static char *
+ * inet_net_ntop_ipv6(src, bits, fakebits, dst, size)
+ * convert IPv6 network number from network to presentation format.
+ * generates CIDR style result always. Picks the shortest representation
+ * unless the IP is really IPv4.
+ * always prints specified number of bits (bits).
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Vadim Kogan (UCB), June 2001
+ * Original version (IPv4) by Paul Vixie (ISC), July 1996
+ */
+
+static char *
+inet_net_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size) {
+ u_int m;
+ int b;
+ int p;
+ int zero_s, zero_l, tmp_zero_s, tmp_zero_l;
+ int i;
+ int is_ipv4 = 0;
+ unsigned char inbuf[16];
+ char outbuf[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+ char *cp;
+ int words;
+ u_char *s;
+
+ if (bits < 0 || bits > 128) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ cp = outbuf;
+
+ if (bits == 0) {
+ *cp++ = ':';
+ *cp++ = ':';
+ *cp = '\0';
+ } else {
+ /* Copy src to private buffer. Zero host part. */
+ p = (bits + 7) / 8;
+ memcpy(inbuf, src, p);
+ memset(inbuf + p, 0, 16 - p);
+ b = bits % 8;
+ if (b != 0) {
+ m = ~0 << (8 - b);
+ inbuf[p-1] &= m;
+ }
+
+ s = inbuf;
+
+ /* how many words need to be displayed in output */
+ words = (bits + 15) / 16;
+ if (words == 1)
+ words = 2;
+
+ /* Find the longest substring of zero's */
+ zero_s = zero_l = tmp_zero_s = tmp_zero_l = 0;
+ for (i = 0; i < (words * 2); i += 2) {
+ if ((s[i] | s[i+1]) == 0) {
+ if (tmp_zero_l == 0)
+ tmp_zero_s = i / 2;
+ tmp_zero_l++;
+ } else {
+ if (tmp_zero_l && zero_l < tmp_zero_l) {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ tmp_zero_l = 0;
+ }
+ }
+ }
+
+ if (tmp_zero_l && zero_l < tmp_zero_l) {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ }
+
+ if (zero_l != words && zero_s == 0 && ((zero_l == 6) ||
+ ((zero_l == 5 && s[10] == 0xff && s[11] == 0xff) ||
+ ((zero_l == 7 && s[14] != 0 && s[15] != 1)))))
+ is_ipv4 = 1;
+
+ /* Format whole words. */
+ for (p = 0; p < words; p++) {
+ if (zero_l != 0 && p >= zero_s && p < zero_s + zero_l) {
+ /* Time to skip some zeros */
+ if (p == zero_s)
+ *cp++ = ':';
+ if (p == words - 1)
+ *cp++ = ':';
+ s++;
+ s++;
+ continue;
+ }
+
+ if (is_ipv4 && p > 5 ) {
+ *cp++ = (p == 6) ? ':' : '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ /* we can potentially drop the last octet */
+ if (p != 7 || bits > 120) {
+ *cp++ = '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ }
+ } else {
+ if (cp != outbuf)
+ *cp++ = ':';
+ cp += SPRINTF((cp, "%x", *s * 256 + s[1]));
+ s += 2;
+ }
+ }
+ }
+ /* Format CIDR /width. */
+ sprintf(cp, "/%u", bits);
+ if (strlen(outbuf) + 1 > size)
+ goto emsgsize;
+ strcpy(dst, outbuf);
+
+ return (dst);
+
+emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c b/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c
new file mode 100644
index 0000000000..8d8bfb1f64
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1998, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_net_pton.c,v 1.10 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/*%
+ * static int
+ * inet_net_pton_ipv4(src, dst, size)
+ * convert IPv4 network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not an IPv4 network specification.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+static int
+inet_net_pton_ipv4(const char *src, u_char *dst, size_t size) {
+ static const char xdigits[] = "0123456789abcdef";
+ static const char digits[] = "0123456789";
+ int n, ch, tmp = 0, dirty, bits;
+ const u_char *odst = dst;
+
+ ch = *src++;
+ if (ch == '0' && (src[0] == 'x' || src[0] == 'X')
+ && isascii((unsigned char)(src[1]))
+ && isxdigit((unsigned char)(src[1]))) {
+ /* Hexadecimal: Eat nybble string. */
+ if (size <= 0U)
+ goto emsgsize;
+ dirty = 0;
+ src++; /*%< skip x or X. */
+ while ((ch = *src++) != '\0' && isascii(ch) && isxdigit(ch)) {
+ if (isupper(ch))
+ ch = tolower(ch);
+ n = strchr(xdigits, ch) - xdigits;
+ INSIST(n >= 0 && n <= 15);
+ if (dirty == 0)
+ tmp = n;
+ else
+ tmp = (tmp << 4) | n;
+ if (++dirty == 2) {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ dirty = 0;
+ }
+ }
+ if (dirty) { /*%< Odd trailing nybble? */
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) (tmp << 4);
+ }
+ } else if (isascii(ch) && isdigit(ch)) {
+ /* Decimal: eat dotted digit string. */
+ for (;;) {
+ tmp = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' &&
+ isascii(ch) && isdigit(ch));
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ ch = *src++;
+ if (!isascii(ch) || !isdigit(ch))
+ goto enoent;
+ }
+ } else
+ goto enoent;
+
+ bits = -1;
+ if (ch == '/' && isascii((unsigned char)(src[0])) &&
+ isdigit((unsigned char)(src[0])) && dst > odst) {
+ /* CIDR width specifier. Nothing can follow it. */
+ ch = *src++; /*%< Skip over the /. */
+ bits = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ bits *= 10;
+ bits += n;
+ if (bits > 32)
+ goto enoent;
+ } while ((ch = *src++) != '\0' && isascii(ch) && isdigit(ch));
+ if (ch != '\0')
+ goto enoent;
+ }
+
+ /* Firey death and destruction unless we prefetched EOS. */
+ if (ch != '\0')
+ goto enoent;
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+ /* If no CIDR spec was given, infer width from net class. */
+ if (bits == -1) {
+ if (*odst >= 240) /*%< Class E */
+ bits = 32;
+ else if (*odst >= 224) /*%< Class D */
+ bits = 8;
+ else if (*odst >= 192) /*%< Class C */
+ bits = 24;
+ else if (*odst >= 128) /*%< Class B */
+ bits = 16;
+ else /*%< Class A */
+ bits = 8;
+ /* If imputed mask is narrower than specified octets, widen. */
+ if (bits < ((dst - odst) * 8))
+ bits = (dst - odst) * 8;
+ /*
+ * If there are no additional bits specified for a class D
+ * address adjust bits to 4.
+ */
+ if (bits == 8 && *odst == 224)
+ bits = 4;
+ }
+ /* Extend network to cover the actual mask. */
+ while (bits > ((dst - odst) * 8)) {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = '\0';
+ }
+ return (bits);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+getbits(const char *src, int *bitsp) {
+ static const char digits[] = "0123456789";
+ int n;
+ int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL) {
+ if (n++ != 0 && val == 0) /*%< no leading zeros */
+ return (0);
+ val *= 10;
+ val += (pch - digits);
+ if (val > 128) /*%< range */
+ return (0);
+ continue;
+ }
+ return (0);
+ }
+ if (n == 0)
+ return (0);
+ *bitsp = val;
+ return (1);
+}
+
+static int
+getv4(const char *src, u_char *dst, int *bitsp) {
+ static const char digits[] = "0123456789";
+ u_char *odst = dst;
+ int n;
+ u_int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL) {
+ if (n++ != 0 && val == 0) /*%< no leading zeros */
+ return (0);
+ val *= 10;
+ val += (pch - digits);
+ if (val > 255) /*%< range */
+ return (0);
+ continue;
+ }
+ if (ch == '.' || ch == '/') {
+ if (dst - odst > 3) /*%< too many octets? */
+ return (0);
+ *dst++ = val;
+ if (ch == '/')
+ return (getbits(src, bitsp));
+ val = 0;
+ n = 0;
+ continue;
+ }
+ return (0);
+ }
+ if (n == 0)
+ return (0);
+ if (dst - odst > 3) /*%< too many octets? */
+ return (0);
+ *dst++ = val;
+ return (1);
+}
+
+static int
+inet_net_pton_ipv6(const char *src, u_char *dst, size_t size) {
+ static const char xdigits_l[] = "0123456789abcdef",
+ xdigits_u[] = "0123456789ABCDEF";
+ u_char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
+ const char *xdigits, *curtok;
+ int ch, saw_xdigit;
+ u_int val;
+ int digits;
+ int bits;
+ size_t bytes;
+ int words;
+ int ipv4;
+
+ memset((tp = tmp), '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+ /* Leading :: requires some special handling. */
+ if (*src == ':')
+ if (*++src != ':')
+ goto enoent;
+ curtok = src;
+ saw_xdigit = 0;
+ val = 0;
+ digits = 0;
+ bits = -1;
+ ipv4 = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+ pch = strchr((xdigits = xdigits_u), ch);
+ if (pch != NULL) {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (++digits > 4)
+ goto enoent;
+ saw_xdigit = 1;
+ continue;
+ }
+ if (ch == ':') {
+ curtok = src;
+ if (!saw_xdigit) {
+ if (colonp)
+ goto enoent;
+ colonp = tp;
+ continue;
+ } else if (*src == '\0')
+ goto enoent;
+ if (tp + NS_INT16SZ > endp)
+ return (0);
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ saw_xdigit = 0;
+ digits = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+ getv4(curtok, tp, &bits) > 0) {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ ipv4 = 1;
+ break; /*%< '\\0' was seen by inet_pton4(). */
+ }
+ if (ch == '/' && getbits(src, &bits) > 0)
+ break;
+ goto enoent;
+ }
+ if (saw_xdigit) {
+ if (tp + NS_INT16SZ > endp)
+ goto enoent;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ }
+ if (bits == -1)
+ bits = 128;
+
+ words = (bits + 15) / 16;
+ if (words < 2)
+ words = 2;
+ if (ipv4)
+ words = 8;
+ endp = tmp + 2 * words;
+
+ if (colonp != NULL) {
+ /*
+ * Since some memmove()'s erroneously fail to handle
+ * overlapping regions, we'll do the shift by hand.
+ */
+ const int n = tp - colonp;
+ int i;
+
+ if (tp == endp)
+ goto enoent;
+ for (i = 1; i <= n; i++) {
+ endp[- i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+ if (tp != endp)
+ goto enoent;
+
+ bytes = (bits + 7) / 8;
+ if (bytes > size)
+ goto emsgsize;
+ memcpy(dst, tmp, bytes);
+ return (bits);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+/*%
+ * int
+ * inet_net_pton(af, src, dst, size)
+ * convert network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not a valid network specification.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+int
+inet_net_pton(int af, const char *src, void *dst, size_t size) {
+ switch (af) {
+ case AF_INET:
+ return (inet_net_pton_ipv4(src, dst, size));
+ case AF_INET6:
+ return (inet_net_pton_ipv6(src, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (-1);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c b/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c
new file mode 100644
index 0000000000..63a6c201a4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_neta.c,v 1.3 2005/04/27 04:56:20 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/*%
+ * char *
+ * inet_neta(src, dst, size)
+ * format a u_long network number into presentation format.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * format of ``src'' is as for inet_network().
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+char *
+inet_neta(src, dst, size)
+ u_long src;
+ char *dst;
+ size_t size;
+{
+ char *odst = dst;
+ char *tp;
+
+ while (src & 0xffffffff) {
+ u_char b = (src & 0xff000000) >> 24;
+
+ src <<= 8;
+ if (b) {
+ if (size < sizeof "255.")
+ goto emsgsize;
+ tp = dst;
+ dst += SPRINTF((dst, "%u", b));
+ if (src != 0L) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - tp);
+ }
+ }
+ if (dst == odst) {
+ if (size < sizeof "0.0.0.0")
+ goto emsgsize;
+ strcpy(dst, "0.0.0.0");
+ }
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c b/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c
new file mode 100644
index 0000000000..c228e3d818
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_netof.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "port_after.h"
+
+/*%
+ * Return the network number from an internet
+ * address; handles class a/b/c network #'s.
+ */
+u_long
+inet_netof(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+
+ if (IN_CLASSA(i))
+ return (((i)&IN_CLASSA_NET) >> IN_CLASSA_NSHIFT);
+ else if (IN_CLASSB(i))
+ return (((i)&IN_CLASSB_NET) >> IN_CLASSB_NSHIFT);
+ else
+ return (((i)&IN_CLASSC_NET) >> IN_CLASSC_NSHIFT);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_network.c b/usr/src/lib/libresolv2_joy/common/inet/inet_network.c
new file mode 100644
index 0000000000..47976cff68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_network.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_network.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+/*%
+ * Internet network address interpretation routine.
+ * The library routines call this routine to interpret
+ * network numbers.
+ */
+u_long
+inet_network(cp)
+ register const char *cp;
+{
+ register u_long val, base, n, i;
+ register char c;
+ u_long parts[4], *pp = parts;
+ int digit;
+
+again:
+ val = 0; base = 10; digit = 0;
+ if (*cp == '0')
+ digit = 1, base = 8, cp++;
+ if (*cp == 'x' || *cp == 'X')
+ base = 16, cp++;
+ while ((c = *cp) != 0) {
+ if (isdigit((unsigned char)c)) {
+ if (base == 8U && (c == '8' || c == '9'))
+ return (INADDR_NONE);
+ val = (val * base) + (c - '0');
+ cp++;
+ digit = 1;
+ continue;
+ }
+ if (base == 16U && isxdigit((unsigned char)c)) {
+ val = (val << 4) +
+ (c + 10 - (islower((unsigned char)c) ? 'a' : 'A'));
+ cp++;
+ digit = 1;
+ continue;
+ }
+ break;
+ }
+ if (!digit)
+ return (INADDR_NONE);
+ if (pp >= parts + 4 || val > 0xffU)
+ return (INADDR_NONE);
+ if (*cp == '.') {
+ *pp++ = val, cp++;
+ goto again;
+ }
+ if (*cp && !isspace(*cp&0xff))
+ return (INADDR_NONE);
+ *pp++ = val;
+ n = pp - parts;
+ if (n > 4U)
+ return (INADDR_NONE);
+ for (val = 0, i = 0; i < n; i++) {
+ val <<= 8;
+ val |= parts[i] & 0xff;
+ }
+ return (val);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c b/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c
new file mode 100644
index 0000000000..a9972e6e32
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nsap_addr.c,v 1.5 2005/07/28 06:51:48 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <resolv_joy.h>
+#include <resolv_mt.h>
+
+#include "port_after.h"
+
+static char
+xtob(int c) {
+ return (c - (((c >= '0') && (c <= '9')) ? '0' : '7'));
+}
+
+u_int
+inet_nsap_addr(const char *ascii, u_char *binary, int maxlen) {
+ u_char c, nib;
+ u_int len = 0;
+
+ if (ascii[0] != '0' || (ascii[1] != 'x' && ascii[1] != 'X'))
+ return (0);
+ ascii += 2;
+
+ while ((c = *ascii++) != '\0' && len < (u_int)maxlen) {
+ if (c == '.' || c == '+' || c == '/')
+ continue;
+ if (!isascii(c))
+ return (0);
+ if (islower(c))
+ c = toupper(c);
+ if (isxdigit(c)) {
+ nib = xtob(c);
+ c = *ascii++;
+ if (c != '\0') {
+ c = toupper(c);
+ if (isxdigit(c)) {
+ *binary++ = (nib << 4) | xtob(c);
+ len++;
+ } else
+ return (0);
+ }
+ else
+ return (0);
+ }
+ else
+ return (0);
+ }
+ return (len);
+}
+
+char *
+inet_nsap_ntoa(int binlen, const u_char *binary, char *ascii) {
+ int nib;
+ int i;
+ char *tmpbuf = inet_nsap_ntoa_tmpbuf;
+ char *start;
+
+ if (ascii)
+ start = ascii;
+ else {
+ ascii = tmpbuf;
+ start = tmpbuf;
+ }
+
+ *ascii++ = '0';
+ *ascii++ = 'x';
+
+ if (binlen > 255)
+ binlen = 255;
+
+ for (i = 0; i < binlen; i++) {
+ nib = *binary >> 4;
+ *ascii++ = nib + (nib < 10 ? '0' : '7');
+ nib = *binary++ & 0x0f;
+ *ascii++ = nib + (nib < 10 ? '0' : '7');
+ if (((i % 2) == 0 && (i + 1) < binlen))
+ *ascii++ = '.';
+ }
+ *ascii = '\0';
+ return (start);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns.c b/usr/src/lib/libresolv2_joy/common/irs/dns.c
new file mode 100644
index 0000000000..7c50320ae7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * dns.c --- this is the top-level accessor function for the dns
+ */
+
+#include "port_before.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* forward */
+
+static void dns_close(struct irs_acc *);
+static struct __res_state * dns_res_get(struct irs_acc *);
+static void dns_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* public */
+
+struct irs_acc *
+irs_dns_acc(const char *options) {
+ struct irs_acc *acc;
+ struct dns_p *dns;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(dns = memget(sizeof *dns))) {
+ errno = ENOMEM;
+ memput(acc, sizeof *acc);
+ return (NULL);
+ }
+ memset(dns, 0x5e, sizeof *dns);
+ dns->res = NULL;
+ dns->free_res = NULL;
+ if (hesiod_init(&dns->hes_ctx) < 0) {
+ /*
+ * We allow the dns accessor class to initialize
+ * despite hesiod failing to initialize correctly,
+ * since dns host queries don't depend on hesiod.
+ */
+ dns->hes_ctx = NULL;
+ }
+ acc->private = dns;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_dns_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_dns_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_dns_sv;
+ acc->pr_map = irs_dns_pr;
+ acc->ho_map = irs_dns_ho;
+ acc->nw_map = irs_dns_nw;
+ acc->ng_map = irs_nul_ng;
+ acc->res_get = dns_res_get;
+ acc->res_set = dns_res_set;
+ acc->close = dns_close;
+ return (acc);
+}
+
+/* methods */
+static struct __res_state *
+dns_res_get(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+
+ if (dns->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ dns_res_set(this, res, free);
+ }
+
+ if ((dns->res->options & RES_INIT) == 0U &&
+ res_ninit(dns->res) < 0)
+ return (NULL);
+
+ return (dns->res);
+}
+
+static void
+dns_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+
+ if (dns->res && dns->free_res) {
+ res_nclose(dns->res);
+ (*dns->free_res)(dns->res);
+ }
+ dns->res = res;
+ dns->free_res = free_res;
+}
+
+static void
+dns_close(struct irs_acc *this) {
+ struct dns_p *dns;
+
+ dns = (struct dns_p *)this->private;
+ if (dns->res && dns->free_res)
+ (*dns->free_res)(dns->res);
+ if (dns->hes_ctx)
+ hesiod_end(dns->hes_ctx);
+ memput(dns, sizeof *dns);
+ memput(this, sizeof *this);
+}
+
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c b/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c
new file mode 100644
index 0000000000..67812717fb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c
@@ -0,0 +1,1143 @@
+/*
+ * Portions Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* from gethostnamadr.c 8.1 (Berkeley) 6/4/93 */
+/* BIND Id: gethnamaddr.c,v 8.15 1996/05/22 04:56:30 vixie Exp $ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_ho.c,v 1.23 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+
+#define MAXPACKET (65535) /*%< Maximum TCP message size */
+#define BOUNDS_CHECK(ptr, count) \
+ if ((ptr) + (count) > eom) { \
+ had_error++; \
+ continue; \
+ } else (void)0
+
+typedef union {
+ HEADER hdr;
+ u_char buf[MAXPACKET];
+} querybuf;
+
+struct dns_res_target {
+ struct dns_res_target *next;
+ querybuf qbuf; /*%< query buffer */
+ u_char *answer; /*%< buffer to put answer */
+ int anslen; /*%< size of answer buffer */
+ int qclass, qtype; /*%< class and type of query */
+ int action; /*%< condition whether query is really issued */
+ char qname[MAXDNAME +1]; /*%< domain name */
+#if 0
+ int n; /*%< result length */
+#endif
+};
+enum {RESTGT_DOALWAYS, RESTGT_AFTERFAILURE, RESTGT_IGNORE};
+enum {RESQRY_SUCCESS, RESQRY_FAIL};
+
+struct pvt {
+ struct hostent host;
+ char * h_addr_ptrs[MAXADDRS + 1];
+ char * host_aliases[MAXALIASES];
+ char hostbuf[8*1024];
+ u_char host_addr[16]; /*%< IPv4 or IPv6 */
+ struct __res_state *res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ int32_t al;
+ char ac;
+} align;
+
+static const u_char mapped[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0xff,0xff };
+static const u_char tunnelled[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
+/* Note: the IPv6 loopback address is in the "tunnel" space */
+static const u_char v6local[] = { 0,0, 0,1 }; /*%< last 4 bytes of IPv6 addr */
+/* Forwards. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static void map_v4v6_hostent(struct hostent *hp, char **bp,
+ char *ep);
+static void addrsort(res_state, char **, int);
+static struct hostent * gethostans(struct irs_ho *this,
+ const u_char *ansbuf, int anslen,
+ const char *qname, int qtype,
+ int af, int size,
+ struct addrinfo **ret_aip,
+ const struct addrinfo *pai);
+static int add_hostent(struct pvt *pvt, char *bp, char **hap,
+ struct addrinfo *ai);
+static int init(struct irs_ho *this);
+
+/* Exports. */
+
+struct irs_ho *
+irs_dns_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (pvt->res->options & RES_USE_INET6) {
+ hp = ho_byname2(this, name, AF_INET6);
+ if (hp)
+ return (hp);
+ }
+ return (ho_byname2(this, name, AF_INET));
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp = NULL;
+ int n, size;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+ struct addrinfo ai;
+ struct dns_res_target *q, *p;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ q = memget(sizeof(*q));
+ if (q == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q));
+
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_A;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ hp = NULL;
+ goto cleanup;
+ }
+
+ /*
+ * if there aren't any dots, it could be a user-level alias.
+ * this is also done in res_nquery() since we are not the only
+ * function that looks up host names.
+ */
+ if (!strchr(name, '.') && (cp = res_hostalias(pvt->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+
+ for (p = q; p; p = p->next) {
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nsearch(pvt->res, name, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = af;
+ if ((hp = gethostans(this, p->answer, n, name, p->qtype,
+ af, size, NULL,
+ (const struct addrinfo *)&ai)) != NULL)
+ goto cleanup; /*%< no more loop is necessary */
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ return(hp);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ const u_char *uaddr = addr;
+ char *qp;
+ struct hostent *hp = NULL;
+ struct addrinfo ai;
+ struct dns_res_target *q, *q2, *p;
+ int n, size, i;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ q = memget(sizeof(*q));
+ q2 = memget(sizeof(*q2));
+ if (q == NULL || q2 == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q));
+ memset(q2, 0, sizeof(*q2));
+
+ if (af == AF_INET6 && len == IN6ADDRSZ &&
+ (!memcmp(uaddr, mapped, sizeof mapped) ||
+ (!memcmp(uaddr, tunnelled, sizeof tunnelled) &&
+ memcmp(&uaddr[sizeof tunnelled], v6local, sizeof(v6local))))) {
+ /* Unmap. */
+ addr = (const char *)addr + sizeof mapped;
+ uaddr += sizeof mapped;
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_PTR;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_PTR;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->next = q2;
+ q->action = RESTGT_DOALWAYS;
+ q2->qclass = C_IN;
+ q2->qtype = T_PTR;
+ q2->answer = q2->qbuf.buf;
+ q2->anslen = sizeof(q2->qbuf);
+ if ((pvt->res->options & RES_NO_NIBBLE2) != 0U)
+ q2->action = RESTGT_IGNORE;
+ else
+ q2->action = RESTGT_AFTERFAILURE;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ if (size > len) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ switch (af) {
+ case AF_INET:
+ qp = q->qname;
+ (void) sprintf(qp, "%u.%u.%u.%u.in-addr.arpa",
+ (uaddr[3] & 0xff),
+ (uaddr[2] & 0xff),
+ (uaddr[1] & 0xff),
+ (uaddr[0] & 0xff));
+ break;
+ case AF_INET6:
+ if (q->action != RESTGT_IGNORE) {
+ const char *nibsuff = res_get_nibblesuffix(pvt->res);
+ qp = q->qname;
+ for (n = IN6ADDRSZ - 1; n >= 0; n--) {
+ i = SPRINTF((qp, "%x.%x.",
+ uaddr[n] & 0xf,
+ (uaddr[n] >> 4) & 0xf));
+ if (i != 4)
+ abort();
+ qp += i;
+ }
+ if (strlen(q->qname) + strlen(nibsuff) + 1 >
+ sizeof q->qname) {
+ errno = ENAMETOOLONG;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ strcpy(qp, nibsuff); /* (checked) */
+ }
+ if (q2->action != RESTGT_IGNORE) {
+ const char *nibsuff2 = res_get_nibblesuffix2(pvt->res);
+ qp = q2->qname;
+ for (n = IN6ADDRSZ - 1; n >= 0; n--) {
+ i = SPRINTF((qp, "%x.%x.",
+ uaddr[n] & 0xf,
+ (uaddr[n] >> 4) & 0xf));
+ if (i != 4)
+ abort();
+ qp += i;
+ }
+ if (strlen(q2->qname) + strlen(nibsuff2) + 1 >
+ sizeof q2->qname) {
+ errno = ENAMETOOLONG;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ strcpy(qp, nibsuff2); /* (checked) */
+ }
+ break;
+ default:
+ abort();
+ }
+
+ for (p = q; p; p = p->next) {
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nquery(pvt->res, p->qname, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = af;
+ hp = gethostans(this, p->answer, n, p->qname, T_PTR, af, size,
+ NULL, (const struct addrinfo *)&ai);
+ if (!hp) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memcpy(pvt->host_addr, addr, len);
+ pvt->h_addr_ptrs[0] = (char *)pvt->host_addr;
+ pvt->h_addr_ptrs[1] = NULL;
+ if (af == AF_INET && (pvt->res->options & RES_USE_INET6)) {
+ map_v4v6_address((char*)pvt->host_addr,
+ (char*)pvt->host_addr);
+ pvt->host.h_addrtype = AF_INET6;
+ pvt->host.h_length = IN6ADDRSZ;
+ }
+
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ goto cleanup; /*%< no more loop is necessary. */
+ }
+ hp = NULL; /*%< H_ERRNO was set by subroutines */
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ if (q2 != NULL)
+ memput(q2, sizeof(*q2));
+ return(hp);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+
+ UNUSED(this);
+
+ return (NULL);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+
+ UNUSED(this);
+
+ /* NOOP */
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+/* XXX */
+extern struct addrinfo *addr2addrinfo __P((const struct addrinfo *,
+ const char *));
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int n;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+ struct dns_res_target *q, *q2, *p;
+ struct addrinfo sentinel, *cur;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ q = memget(sizeof(*q));
+ q2 = memget(sizeof(*q2));
+ if (q == NULL || q2 == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q2));
+ memset(q2, 0, sizeof(*q2));
+
+ switch (pai->ai_family) {
+ case AF_UNSPEC:
+ /* prefer IPv6 */
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->next = q2;
+ q->action = RESTGT_DOALWAYS;
+ q2->qclass = C_IN;
+ q2->qtype = T_A;
+ q2->answer = q2->qbuf.buf;
+ q2->anslen = sizeof(q2->qbuf);
+ q2->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET:
+ q->qclass = C_IN;
+ q->qtype = T_A;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY); /*%< better error? */
+ goto cleanup;
+ }
+
+ /*
+ * if there aren't any dots, it could be a user-level alias.
+ * this is also done in res_nquery() since we are not the only
+ * function that looks up host names.
+ */
+ if (!strchr(name, '.') && (cp = res_hostalias(pvt->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+
+ for (p = q; p; p = p->next) {
+ struct addrinfo *ai;
+
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nsearch(pvt->res, name, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+ (void)gethostans(this, p->answer, n, name, p->qtype,
+ pai->ai_family, /*%< XXX: meaningless */
+ 0, &ai, pai);
+ if (ai) {
+ querystate = RESQRY_SUCCESS;
+ cur->ai_next = ai;
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ querystate = RESQRY_FAIL;
+ }
+
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ if (q2 != NULL)
+ memput(q2, sizeof(*q2));
+ return(sentinel.ai_next);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+/* Private. */
+
+static struct hostent *
+gethostans(struct irs_ho *this,
+ const u_char *ansbuf, int anslen, const char *qname, int qtype,
+ int af, int size, /*!< meaningless for addrinfo cases */
+ struct addrinfo **ret_aip, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int type, class, ancount, qdcount, n, haveanswer, had_error;
+ int error = NETDB_SUCCESS;
+ int (*name_ok)(const char *);
+ const HEADER *hp;
+ const u_char *eom;
+ const u_char *eor;
+ const u_char *cp;
+ const char *tname;
+ const char *hname;
+ char *bp, *ep, **ap, **hap;
+ char tbuf[MAXDNAME+1];
+ struct addrinfo sentinel, *cur, ai;
+
+ if (pai == NULL) abort();
+ if (ret_aip != NULL)
+ *ret_aip = NULL;
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ tname = qname;
+ eom = ansbuf + anslen;
+ switch (qtype) {
+ case T_A:
+ case T_AAAA:
+ case T_ANY: /*%< use T_ANY only for T_A/T_AAAA lookup */
+ name_ok = res_hnok;
+ break;
+ case T_PTR:
+ name_ok = res_dnok;
+ break;
+ default:
+ abort();
+ }
+
+ pvt->host.h_addrtype = af;
+ pvt->host.h_length = size;
+ hname = pvt->host.h_name = NULL;
+
+ /*
+ * Find first satisfactory answer.
+ */
+ if (ansbuf + HFIXEDSZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ hp = (const HEADER *)ansbuf;
+ ancount = ntohs(hp->ancount);
+ qdcount = ntohs(hp->qdcount);
+ bp = pvt->hostbuf;
+ ep = pvt->hostbuf + sizeof(pvt->hostbuf);
+ cp = ansbuf + HFIXEDSZ;
+ if (qdcount != 1) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (n < 0 || !maybe_ok(pvt->res, bp, name_ok)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ cp += n + QFIXEDSZ;
+ if (cp > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ if (qtype == T_A || qtype == T_AAAA || qtype == T_ANY) {
+ /* res_nsend() has already verified that the query name is the
+ * same as the one we sent; this just gets the expanded name
+ * (i.e., with the succeeding search-domain tacked on).
+ */
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ if (n > MAXHOSTNAMELEN) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += n;
+ /* The qname can be abbreviated, but hname is now absolute. */
+ qname = pvt->host.h_name;
+ }
+ ap = pvt->host_aliases;
+ *ap = NULL;
+ pvt->host.h_aliases = pvt->host_aliases;
+ hap = pvt->h_addr_ptrs;
+ *hap = NULL;
+ pvt->host.h_addr_list = pvt->h_addr_ptrs;
+ haveanswer = 0;
+ had_error = 0;
+ while (ancount-- > 0 && cp < eom && !had_error) {
+ n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (n < 0 || !maybe_ok(pvt->res, bp, name_ok)) {
+ had_error++;
+ continue;
+ }
+ cp += n; /*%< name */
+ BOUNDS_CHECK(cp, 3 * INT16SZ + INT32SZ);
+ type = ns_get16(cp);
+ cp += INT16SZ; /*%< type */
+ class = ns_get16(cp);
+ cp += INT16SZ + INT32SZ; /*%< class, TTL */
+ n = ns_get16(cp);
+ cp += INT16SZ; /*%< len */
+ BOUNDS_CHECK(cp, n);
+ if (class != C_IN) {
+ cp += n;
+ continue;
+ }
+ eor = cp + n;
+ if ((qtype == T_A || qtype == T_AAAA || qtype == T_ANY) &&
+ type == T_CNAME) {
+ if (haveanswer) {
+ int level = LOG_CRIT;
+#ifdef LOG_SECURITY
+ level |= LOG_SECURITY;
+#endif
+ syslog(level,
+ "gethostans: possible attempt to exploit buffer overflow while looking up %s",
+ *qname ? qname : ".");
+ }
+ n = dn_expand(ansbuf, eor, cp, tbuf, sizeof tbuf);
+ if (n < 0 || !maybe_ok(pvt->res, tbuf, name_ok)) {
+ had_error++;
+ continue;
+ }
+ cp += n;
+ /* Store alias. */
+ if (ap >= &pvt->host_aliases[MAXALIASES-1])
+ continue;
+ *ap++ = bp;
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ bp += n;
+ /* Get canonical name. */
+ n = strlen(tbuf) + 1; /*%< for the \\0 */
+ if (n > (ep - bp) || n > MAXHOSTNAMELEN) {
+ had_error++;
+ continue;
+ }
+ strcpy(bp, tbuf); /* (checked) */
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += n;
+ continue;
+ }
+ if (qtype == T_PTR && type == T_CNAME) {
+ n = dn_expand(ansbuf, eor, cp, tbuf, sizeof tbuf);
+ if (n < 0 || !maybe_dnok(pvt->res, tbuf)) {
+ had_error++;
+ continue;
+ }
+ cp += n;
+#ifdef RES_USE_DNAME
+ if ((pvt->res->options & RES_USE_DNAME) != 0U)
+#endif
+ {
+ /*
+ * We may be able to check this regardless
+ * of the USE_DNAME bit, but we add the check
+ * for now since the DNAME support is
+ * experimental.
+ */
+ if (ns_samename(tname, bp) != 1)
+ continue;
+ }
+ /* Get canonical name. */
+ n = strlen(tbuf) + 1; /*%< for the \\0 */
+ if (n > (ep - bp)) {
+ had_error++;
+ continue;
+ }
+ strcpy(bp, tbuf); /* (checked) */
+ tname = bp;
+ bp += n;
+ continue;
+ }
+ if (qtype == T_ANY) {
+ if (!(type == T_A || type == T_AAAA)) {
+ cp += n;
+ continue;
+ }
+ } else if (type != qtype) {
+ cp += n;
+ continue;
+ }
+ switch (type) {
+ case T_PTR:
+ if (ret_aip != NULL) {
+ /* addrinfo never needs T_PTR */
+ cp += n;
+ continue;
+ }
+ if (ns_samename(tname, bp) != 1) {
+ cp += n;
+ continue;
+ }
+ n = dn_expand(ansbuf, eor, cp, bp, ep - bp);
+ if (n < 0 || !maybe_hnok(pvt->res, bp) ||
+ n >= MAXHOSTNAMELEN) {
+ had_error++;
+ break;
+ }
+ cp += n;
+ if (!haveanswer) {
+ pvt->host.h_name = bp;
+ hname = bp;
+ }
+ else if (ap < &pvt->host_aliases[MAXALIASES-1])
+ *ap++ = bp;
+ else
+ n = -1;
+ if (n != -1) {
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ bp += n;
+ }
+ break;
+ case T_A:
+ case T_AAAA:
+ if (ns_samename(hname, bp) != 1) {
+ cp += n;
+ continue;
+ }
+ if (type == T_A && n != INADDRSZ) {
+ cp += n;
+ continue;
+ }
+ if (type == T_AAAA && n != IN6ADDRSZ) {
+ cp += n;
+ continue;
+ }
+
+ /* make addrinfo. don't overwrite constant PAI */
+ ai = *pai;
+ ai.ai_family = (type == T_AAAA) ? AF_INET6 : AF_INET;
+ cur->ai_next = addr2addrinfo(
+ (const struct addrinfo *)&ai,
+ (const char *)cp);
+ if (cur->ai_next == NULL)
+ had_error++;
+
+ if (!haveanswer) {
+ int nn;
+
+ nn = strlen(bp) + 1; /*%< for the \\0 */
+ if (nn >= MAXHOSTNAMELEN) {
+ cp += n;
+ had_error++;
+ continue;
+ }
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += nn;
+ }
+ /* Ensure alignment. */
+ bp = (char *)(((u_long)bp + (sizeof(align) - 1)) &
+ ~(sizeof(align) - 1));
+ /* Avoid overflows. */
+ if (bp + n > &pvt->hostbuf[sizeof(pvt->hostbuf) - 1]) {
+ had_error++;
+ continue;
+ }
+ if (ret_aip) { /*%< need addrinfo. keep it. */
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else if (cur->ai_next) { /*%< need hostent */
+ struct addrinfo *aip = cur->ai_next;
+
+ for (aip = cur->ai_next; aip;
+ aip = aip->ai_next) {
+ int m;
+
+ m = add_hostent(pvt, bp, hap, aip);
+ if (m < 0) {
+ had_error++;
+ break;
+ }
+ if (m == 0)
+ continue;
+ if (hap < &pvt->h_addr_ptrs[MAXADDRS])
+ hap++;
+ *hap = NULL;
+ bp += m;
+ }
+
+ freeaddrinfo(cur->ai_next);
+ cur->ai_next = NULL;
+ }
+ cp += n;
+ break;
+ default:
+ abort();
+ }
+ if (!had_error)
+ haveanswer++;
+ }
+ if (haveanswer) {
+ if (ret_aip == NULL) {
+ *ap = NULL;
+ *hap = NULL;
+
+ if (pvt->res->nsort && hap != pvt->h_addr_ptrs &&
+ qtype == T_A)
+ addrsort(pvt->res, pvt->h_addr_ptrs,
+ hap - pvt->h_addr_ptrs);
+ if (pvt->host.h_name == NULL) {
+ n = strlen(qname) + 1; /*%< for the \\0 */
+ if (n > (ep - bp) || n >= MAXHOSTNAMELEN)
+ goto no_recovery;
+ strcpy(bp, qname); /* (checked) */
+ pvt->host.h_name = bp;
+ bp += n;
+ }
+ if (pvt->res->options & RES_USE_INET6)
+ map_v4v6_hostent(&pvt->host, &bp, ep);
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (&pvt->host);
+ } else {
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ if (pvt->host.h_name == NULL) {
+ sentinel.ai_next->ai_canonname =
+ strdup(qname);
+ }
+ else {
+ sentinel.ai_next->ai_canonname =
+ strdup(pvt->host.h_name);
+ }
+ }
+ *ret_aip = sentinel.ai_next;
+ return(NULL);
+ }
+ }
+ no_recovery:
+ if (sentinel.ai_next) {
+ /* this should be impossible, but check it for safety */
+ freeaddrinfo(sentinel.ai_next);
+ }
+ if (error == NETDB_SUCCESS)
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ else
+ RES_SET_H_ERRNO(pvt->res, error);
+ return(NULL);
+}
+
+static int
+add_hostent(struct pvt *pvt, char *bp, char **hap, struct addrinfo *ai)
+{
+ int addrlen;
+ char *addrp;
+ const char **tap;
+ char *obp = bp;
+
+ switch(ai->ai_addr->sa_family) {
+ case AF_INET6:
+ addrlen = IN6ADDRSZ;
+ addrp = (char *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
+ break;
+ case AF_INET:
+ addrlen = INADDRSZ;
+ addrp = (char *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr;
+ break;
+ default:
+ return(-1); /*%< abort? */
+ }
+
+ /* Ensure alignment. */
+ bp = (char *)(((u_long)bp + (sizeof(align) - 1)) &
+ ~(sizeof(align) - 1));
+ /* Avoid overflows. */
+ if (bp + addrlen > &pvt->hostbuf[sizeof(pvt->hostbuf) - 1])
+ return(-1);
+ if (hap >= &pvt->h_addr_ptrs[MAXADDRS])
+ return(0); /*%< fail, but not treat it as an error. */
+ /* Suppress duplicates. */
+ for (tap = (const char **)pvt->h_addr_ptrs;
+ *tap != NULL;
+ tap++)
+ if (memcmp(*tap, addrp, addrlen) == 0)
+ break;
+ if (*tap != NULL)
+ return (0);
+
+ memcpy(*hap = bp, addrp, addrlen);
+ return((bp + addrlen) - obp);
+}
+
+static void
+map_v4v6_hostent(struct hostent *hp, char **bpp, char *ep) {
+ char **ap;
+
+ if (hp->h_addrtype != AF_INET || hp->h_length != INADDRSZ)
+ return;
+ hp->h_addrtype = AF_INET6;
+ hp->h_length = IN6ADDRSZ;
+ for (ap = hp->h_addr_list; *ap; ap++) {
+ int i = (u_long)*bpp % sizeof(align);
+
+ if (i != 0)
+ i = sizeof(align) - i;
+
+ if ((ep - *bpp) < (i + IN6ADDRSZ)) {
+ /* Out of memory. Truncate address list here. */
+ *ap = NULL;
+ return;
+ }
+ *bpp += i;
+ map_v4v6_address(*ap, *bpp);
+ *ap = *bpp;
+ *bpp += IN6ADDRSZ;
+ }
+}
+
+static void
+addrsort(res_state statp, char **ap, int num) {
+ int i, j, needsort = 0, aval[MAXADDRS];
+ char **p;
+
+ p = ap;
+ for (i = 0; i < num; i++, p++) {
+ for (j = 0 ; (unsigned)j < statp->nsort; j++)
+ if (statp->sort_list[j].addr.s_addr ==
+ (((struct in_addr *)(*p))->s_addr &
+ statp->sort_list[j].mask))
+ break;
+ aval[i] = j;
+ if (needsort == 0 && i > 0 && j < aval[i-1])
+ needsort = i;
+ }
+ if (!needsort)
+ return;
+
+ while (needsort < num) {
+ for (j = needsort - 1; j >= 0; j--) {
+ if (aval[j] > aval[j+1]) {
+ char *hp;
+
+ i = aval[j];
+ aval[j] = aval[j+1];
+ aval[j+1] = i;
+
+ hp = ap[j];
+ ap[j] = ap[j+1];
+ ap[j+1] = hp;
+
+ } else
+ break;
+ }
+ needsort++;
+ }
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c b/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c
new file mode 100644
index 0000000000..e9acfd39c7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_nw.c,v 1.12 2005/04/27 04:56:22 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+
+#define MAXPACKET (64*1024)
+
+struct pvt {
+ struct nwent net;
+ char * ali[MAXALIASES];
+ char buf[BUFSIZ+1];
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ long al;
+ char ac;
+} align;
+
+enum by_what { by_addr, by_name };
+
+/* Forwards. */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static struct nwent * get1101byaddr(struct irs_nw *, u_char *, int);
+static struct nwent * get1101byname(struct irs_nw *, const char *);
+static struct nwent * get1101answer(struct irs_nw *,
+ u_char *ansbuf, int anslen,
+ enum by_what by_what,
+ int af, const char *name,
+ const u_char *addr, int addrlen);
+static struct nwent * get1101mask(struct irs_nw *this, struct nwent *);
+static int make1101inaddr(const u_char *, int, char *, int);
+static void normalize_name(char *name);
+static int init(struct irs_nw *this);
+
+/* Exports. */
+
+struct irs_nw *
+irs_dns_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods. */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ switch (af) {
+ case AF_INET:
+ return (get1101byname(this, name));
+ default:
+ (void)NULL;
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ return (NULL);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ switch (af) {
+ case AF_INET:
+ return (get1101byaddr(this, net, len));
+ default:
+ (void)NULL;
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ return (NULL);
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+
+ UNUSED(this);
+
+ return (NULL);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+/* Private. */
+
+static struct nwent *
+get1101byname(struct irs_nw *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ u_char *ansbuf;
+ int anslen;
+ struct nwent *result;
+
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ anslen = res_nsearch(pvt->res, name, C_IN, T_PTR, ansbuf, MAXPACKET);
+ if (anslen < 0) {
+ memput(ansbuf, MAXPACKET);
+ return (NULL);
+ }
+ result = get1101mask(this, get1101answer(this, ansbuf, anslen, by_name,
+ AF_INET, name, NULL, 0));
+ memput(ansbuf, MAXPACKET);
+ return (result);
+}
+
+static struct nwent *
+get1101byaddr(struct irs_nw *this, u_char *net, int len) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char qbuf[sizeof "255.255.255.255.in-addr.arpa"];
+ struct nwent *result;
+ u_char *ansbuf;
+ int anslen;
+
+ if (len < 1 || len > 32) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (make1101inaddr(net, len, qbuf, sizeof qbuf) < 0)
+ return (NULL);
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ anslen = res_nquery(pvt->res, qbuf, C_IN, T_PTR, ansbuf, MAXPACKET);
+ if (anslen < 0) {
+ memput(ansbuf, MAXPACKET);
+ return (NULL);
+ }
+ result = get1101mask(this, get1101answer(this, ansbuf, anslen, by_addr,
+ AF_INET, NULL, net, len));
+ memput(ansbuf, MAXPACKET);
+ return (result);
+}
+
+static struct nwent *
+get1101answer(struct irs_nw *this,
+ u_char *ansbuf, int anslen, enum by_what by_what,
+ int af, const char *name, const u_char *addr, int addrlen)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int type, class, ancount, qdcount, haveanswer;
+ char *bp, *ep, **ap;
+ u_char *cp, *eom;
+ HEADER *hp;
+
+ /* Initialize, and parse header. */
+ eom = ansbuf + anslen;
+ if (ansbuf + HFIXEDSZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ hp = (HEADER *)ansbuf;
+ cp = ansbuf + HFIXEDSZ;
+ qdcount = ntohs(hp->qdcount);
+ while (qdcount-- > 0) {
+ int n = dn_skipname(cp, eom);
+ cp += n + QFIXEDSZ;
+ if (n < 0 || cp > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ }
+ ancount = ntohs(hp->ancount);
+ if (!ancount) {
+ if (hp->aa)
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ else
+ RES_SET_H_ERRNO(pvt->res, TRY_AGAIN);
+ return (NULL);
+ }
+
+ /* Prepare a return structure. */
+ bp = pvt->buf;
+ ep = pvt->buf + sizeof(pvt->buf);
+ pvt->net.n_name = NULL;
+ pvt->net.n_aliases = pvt->ali;
+ pvt->net.n_addrtype = af;
+ pvt->net.n_addr = NULL;
+ pvt->net.n_length = addrlen;
+
+ /* Save input key if given. */
+ switch (by_what) {
+ case by_name:
+ if (name != NULL) {
+ int n = strlen(name) + 1;
+
+ if (n > (ep - bp)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->net.n_name = strcpy(bp, name); /* (checked) */
+ bp += n;
+ }
+ break;
+ case by_addr:
+ if (addr != NULL && addrlen != 0) {
+ int n = addrlen / 8 + ((addrlen % 8) != 0);
+
+ if (INADDRSZ > (ep - bp)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ memset(bp, 0, INADDRSZ);
+ memcpy(bp, addr, n);
+ pvt->net.n_addr = bp;
+ bp += INADDRSZ;
+ }
+ break;
+ default:
+ abort();
+ }
+
+ /* Parse the answer, collect aliases. */
+ ap = pvt->ali;
+ haveanswer = 0;
+ while (--ancount >= 0 && cp < eom) {
+ int n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+
+ cp += n; /*%< Owner */
+ if (n < 0 || !maybe_dnok(pvt->res, bp) ||
+ cp + 3 * INT16SZ + INT32SZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ GETSHORT(type, cp); /*%< Type */
+ GETSHORT(class, cp); /*%< Class */
+ cp += INT32SZ; /*%< TTL */
+ GETSHORT(n, cp); /*%< RDLENGTH */
+ if (class == C_IN && type == T_PTR) {
+ int nn;
+
+ nn = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (nn < 0 || !maybe_hnok(pvt->res, bp) || nn != n) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ normalize_name(bp);
+ switch (by_what) {
+ case by_addr: {
+ if (pvt->net.n_name == NULL)
+ pvt->net.n_name = bp;
+ else if (ns_samename(pvt->net.n_name, bp) == 1)
+ break;
+ else
+ *ap++ = bp;
+ nn = strlen(bp) + 1;
+ bp += nn;
+ haveanswer++;
+ break;
+ }
+ case by_name: {
+ u_int b1, b2, b3, b4;
+
+ if (pvt->net.n_addr != NULL ||
+ sscanf(bp, "%u.%u.%u.%u.in-addr.arpa",
+ &b1, &b2, &b3, &b4) != 4)
+ break;
+ if ((ep - bp) < INADDRSZ) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->net.n_addr = bp;
+ *bp++ = b4;
+ *bp++ = b3;
+ *bp++ = b2;
+ *bp++ = b1;
+ pvt->net.n_length = INADDRSZ * 8;
+ haveanswer++;
+ }
+ }
+ }
+ cp += n; /*%< RDATA */
+ }
+ if (!haveanswer) {
+ RES_SET_H_ERRNO(pvt->res, TRY_AGAIN);
+ return (NULL);
+ }
+ *ap = NULL;
+
+ return (&pvt->net);
+}
+
+static struct nwent *
+get1101mask(struct irs_nw *this, struct nwent *nwent) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char qbuf[sizeof "255.255.255.255.in-addr.arpa"], owner[MAXDNAME];
+ int anslen, type, class, ancount, qdcount;
+ u_char *ansbuf, *cp, *eom;
+ HEADER *hp;
+
+ if (!nwent)
+ return (NULL);
+ if (make1101inaddr(nwent->n_addr, nwent->n_length, qbuf, sizeof qbuf)
+ < 0) {
+ /* "First, do no harm." */
+ return (nwent);
+ }
+
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ /* Query for the A RR that would hold this network's mask. */
+ anslen = res_nquery(pvt->res, qbuf, C_IN, T_A, ansbuf, MAXPACKET);
+ if (anslen < HFIXEDSZ) {
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+ }
+
+ /* Initialize, and parse header. */
+ hp = (HEADER *)ansbuf;
+ cp = ansbuf + HFIXEDSZ;
+ eom = ansbuf + anslen;
+ qdcount = ntohs(hp->qdcount);
+ while (qdcount-- > 0) {
+ int n = dn_skipname(cp, eom);
+ cp += n + QFIXEDSZ;
+ if (n < 0 || cp > eom) {
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+ }
+ }
+ ancount = ntohs(hp->ancount);
+
+ /* Parse the answer, collect aliases. */
+ while (--ancount >= 0 && cp < eom) {
+ int n = dn_expand(ansbuf, eom, cp, owner, sizeof owner);
+
+ if (n < 0 || !maybe_dnok(pvt->res, owner))
+ break;
+ cp += n; /*%< Owner */
+ if (cp + 3 * INT16SZ + INT32SZ > eom)
+ break;
+ GETSHORT(type, cp); /*%< Type */
+ GETSHORT(class, cp); /*%< Class */
+ cp += INT32SZ; /*%< TTL */
+ GETSHORT(n, cp); /*%< RDLENGTH */
+ if (cp + n > eom)
+ break;
+ if (n == INADDRSZ && class == C_IN && type == T_A &&
+ ns_samename(qbuf, owner) == 1) {
+ /* This A RR indicates the actual netmask. */
+ int nn, mm;
+
+ nwent->n_length = 0;
+ for (nn = 0; nn < INADDRSZ; nn++)
+ for (mm = 7; mm >= 0; mm--)
+ if (cp[nn] & (1 << mm))
+ nwent->n_length++;
+ else
+ break;
+ }
+ cp += n; /*%< RDATA */
+ }
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+}
+
+static int
+make1101inaddr(const u_char *net, int bits, char *name, int size) {
+ int n, m;
+ char *ep;
+
+ ep = name + size;
+
+ /* Zero fill any whole bytes left out of the prefix. */
+ for (n = (32 - bits) / 8; n > 0; n--) {
+ if (ep - name < (int)(sizeof "0."))
+ goto emsgsize;
+ m = SPRINTF((name, "0."));
+ name += m;
+ }
+
+ /* Format the partial byte, if any, within the prefix. */
+ if ((n = bits % 8) != 0) {
+ if (ep - name < (int)(sizeof "255."))
+ goto emsgsize;
+ m = SPRINTF((name, "%u.",
+ net[bits / 8] & ~((1 << (8 - n)) - 1)));
+ name += m;
+ }
+
+ /* Format the whole bytes within the prefix. */
+ for (n = bits / 8; n > 0; n--) {
+ if (ep - name < (int)(sizeof "255."))
+ goto emsgsize;
+ m = SPRINTF((name, "%u.", net[n - 1]));
+ name += m;
+ }
+
+ /* Add the static text. */
+ if (ep - name < (int)(sizeof "in-addr.arpa"))
+ goto emsgsize;
+ (void) SPRINTF((name, "in-addr.arpa"));
+ return (0);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static void
+normalize_name(char *name) {
+ char *t;
+
+ /* Make lower case. */
+ for (t = name; *t; t++)
+ if (isascii((unsigned char)*t) && isupper((unsigned char)*t))
+ *t = tolower((*t)&0xff);
+
+ /* Remove trailing dots. */
+ while (t > name && t[-1] == '.')
+ *--t = '\0';
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_p.h b/usr/src/lib/libresolv2_joy/common/irs/dns_p.h
new file mode 100644
index 0000000000..d85ae2a238
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_p.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: dns_p.h,v 1.4 2005/04/27 04:56:22 sra Exp $
+ */
+
+#ifndef _DNS_P_H_INCLUDED
+#define _DNS_P_H_INCLUDED
+
+#define maybe_ok(res, nm, ok) (((res)->options & RES_NOCHECKNAME) != 0U || \
+ (ok)(nm) != 0)
+#define maybe_hnok(res, hn) maybe_ok((res), (hn), res_hnok)
+#define maybe_dnok(res, dn) maybe_ok((res), (dn), res_dnok)
+
+/*%
+ * Object state.
+ */
+struct dns_p {
+ void *hes_ctx;
+ struct __res_state *res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Methods.
+ */
+
+extern struct irs_gr * irs_dns_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_dns_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_dns_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_dns_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_dns_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_dns_nw __P((struct irs_acc *));
+
+#endif /*_DNS_P_H_INCLUDED*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c b/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c
new file mode 100644
index 0000000000..c281be432c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_pr.c,v 1.5 2005/04/27 04:56:22 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Types. */
+
+struct pvt {
+ struct dns_p * dns;
+ struct protoent proto;
+ char * prbuf;
+};
+
+/* Forward. */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static struct protoent * pr_next(struct irs_pr *);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+static struct __res_state * pr_res_get(struct irs_pr *);
+static void pr_res_set(struct irs_pr *,
+ struct __res_state *,
+ void (*)(void *));
+
+static struct protoent * parse_hes_list(struct irs_pr *, char **);
+
+/* Public. */
+
+struct irs_pr *
+irs_dns_pr(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+ struct pvt *pvt;
+ struct irs_pr *pr;
+
+ if (!dns->hes_ctx) {
+ errno = ENODEV;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(pr = memget(sizeof *pr))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x5e, sizeof *pr);
+ pvt->dns = dns;
+ pr->private = pvt;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->close = pr_close;
+ pr->minimize = pr_minimize;
+ pr->res_get = pr_res_get;
+ pr->res_set = pr_res_set;
+ return (pr);
+}
+
+/* Methods. */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->proto.p_aliases)
+ free(pvt->proto.p_aliases);
+ if (pvt->prbuf)
+ free(pvt->prbuf);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct protoent *proto;
+ char **hes_list;
+
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, name, "protocol")))
+ return (NULL);
+
+ proto = parse_hes_list(this, hes_list);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (proto);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int num) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct protoent *proto;
+ char numstr[16];
+ char **hes_list;
+
+ sprintf(numstr, "%d", num);
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, numstr, "protonum")))
+ return (NULL);
+
+ proto = parse_hes_list(this, hes_list);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (proto);
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ UNUSED(this);
+ errno = ENODEV;
+ return (NULL);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static struct __res_state *
+pr_res_get(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ return (__hesiod_res_get(dns->hes_ctx));
+}
+
+static void
+pr_res_set(struct irs_pr *this, struct __res_state * res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ __hesiod_res_set(dns->hes_ctx, res, free_res);
+}
+
+/* Private. */
+
+static struct protoent *
+parse_hes_list(struct irs_pr *this, char **hes_list) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **cpp, **new;
+ int num = 0;
+ int max = 0;
+
+ for (cpp = hes_list; *cpp; cpp++) {
+ cp = *cpp;
+
+ /* Strip away comments, if any. */
+ if ((p = strchr(cp, '#')))
+ *p = 0;
+
+ /* Skip blank lines. */
+ p = cp;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+
+ /* OK, we've got a live one. Let's parse it for real. */
+ if (pvt->prbuf)
+ free(pvt->prbuf);
+ pvt->prbuf = strdup(cp);
+
+ p = pvt->prbuf;
+ pvt->proto.p_name = p;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->proto.p_proto = atoi(p);
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (*p)
+ *p++ = '\0';
+
+ while (*p) {
+ if ((num + 1) >= max || !pvt->proto.p_aliases) {
+ max += 10;
+ new = realloc(pvt->proto.p_aliases,
+ max * sizeof(char *));
+ if (!new) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ pvt->proto.p_aliases = new;
+ }
+ pvt->proto.p_aliases[num++] = p;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (*p)
+ *p++ = '\0';
+ }
+ if (!pvt->proto.p_aliases)
+ pvt->proto.p_aliases = malloc(sizeof(char *));
+ if (!pvt->proto.p_aliases)
+ goto cleanup;
+ pvt->proto.p_aliases[num] = NULL;
+ return (&pvt->proto);
+ }
+
+ cleanup:
+ if (pvt->proto.p_aliases) {
+ free(pvt->proto.p_aliases);
+ pvt->proto.p_aliases = NULL;
+ }
+ if (pvt->prbuf) {
+ free(pvt->prbuf);
+ pvt->prbuf = NULL;
+ }
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c b/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c
new file mode 100644
index 0000000000..1001dc1051
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_sv.c,v 1.5 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Definitions */
+
+struct pvt {
+ struct dns_p * dns;
+ struct servent serv;
+ char * svbuf;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward. */
+
+static void sv_close(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *,
+ const char *, const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static struct servent * sv_next(struct irs_sv *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+#ifdef SV_RES_SETGET
+static struct __res_state * sv_res_get(struct irs_sv *);
+static void sv_res_set(struct irs_sv *,
+ struct __res_state *,
+ void (*)(void *));
+#endif
+
+static struct servent * parse_hes_list(struct irs_sv *,
+ char **, const char *);
+
+/* Public */
+
+struct irs_sv *
+irs_dns_sv(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if (!dns || !dns->hes_ctx) {
+ errno = ENODEV;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->dns = dns;
+ if (!(sv = memget(sizeof *sv))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ sv->private = pvt;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->next = sv_next;
+ sv->rewind = sv_rewind;
+ sv->close = sv_close;
+ sv->minimize = sv_minimize;
+#ifdef SV_RES_SETGET
+ sv->res_get = sv_res_get;
+ sv->res_set = sv_res_set;
+#else
+ sv->res_get = NULL; /*%< sv_res_get; */
+ sv->res_set = NULL; /*%< sv_res_set; */
+#endif
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->serv.s_aliases)
+ free(pvt->serv.s_aliases);
+ if (pvt->svbuf)
+ free(pvt->svbuf);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct servent *s;
+ char **hes_list;
+
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, name, "service")))
+ return (NULL);
+
+ s = parse_hes_list(this, hes_list, proto);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (s);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct servent *s;
+ char portstr[16];
+ char **hes_list;
+
+ sprintf(portstr, "%d", ntohs(port));
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, portstr, "port")))
+ return (NULL);
+
+ s = parse_hes_list(this, hes_list, proto);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (s);
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ UNUSED(this);
+ errno = ENODEV;
+ return (NULL);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+/* Private */
+
+static struct servent *
+parse_hes_list(struct irs_sv *this, char **hes_list, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **cpp, **new;
+ int proto_len;
+ int num = 0;
+ int max = 0;
+
+ for (cpp = hes_list; *cpp; cpp++) {
+ cp = *cpp;
+
+ /* Strip away comments, if any. */
+ if ((p = strchr(cp, '#')))
+ *p = 0;
+
+ /* Check to make sure the protocol matches. */
+ p = cp;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+ if (proto) {
+ proto_len = strlen(proto);
+ if (strncasecmp(++p, proto, proto_len) != 0)
+ continue;
+ if (p[proto_len] && !isspace(p[proto_len]&0xff))
+ continue;
+ }
+ /* OK, we've got a live one. Let's parse it for real. */
+ if (pvt->svbuf)
+ free(pvt->svbuf);
+ pvt->svbuf = strdup(cp);
+
+ p = pvt->svbuf;
+ pvt->serv.s_name = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->serv.s_proto = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->serv.s_port = htons((u_short) atoi(p));
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (*p)
+ *p++ = '\0';
+
+ while (*p) {
+ if ((num + 1) >= max || !pvt->serv.s_aliases) {
+ max += 10;
+ new = realloc(pvt->serv.s_aliases,
+ max * sizeof(char *));
+ if (!new) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ pvt->serv.s_aliases = new;
+ }
+ pvt->serv.s_aliases[num++] = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (*p)
+ *p++ = '\0';
+ }
+ if (!pvt->serv.s_aliases)
+ pvt->serv.s_aliases = malloc(sizeof(char *));
+ if (!pvt->serv.s_aliases)
+ goto cleanup;
+ pvt->serv.s_aliases[num] = NULL;
+ return (&pvt->serv);
+ }
+
+ cleanup:
+ if (pvt->serv.s_aliases) {
+ free(pvt->serv.s_aliases);
+ pvt->serv.s_aliases = NULL;
+ }
+ if (pvt->svbuf) {
+ free(pvt->svbuf);
+ pvt->svbuf = NULL;
+ }
+ return (NULL);
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+#ifdef SV_RES_SETGET
+static struct __res_state *
+sv_res_get(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ return (__hesiod_res_get(dns->hes_ctx));
+}
+
+static void
+sv_res_set(struct irs_sv *this, struct __res_state * res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ __hesiod_res_set(dns->hes_ctx, res, free_res);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c b/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c
new file mode 100644
index 0000000000..9ca1c4bfe1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2001 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <port_before.h>
+#include <netdb.h>
+#include <port_after.h>
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#include <stdlib.h>
+#endif
+
+static const char *gai_errlist[] = {
+ "no error",
+ "address family not supported for name",/*%< EAI_ADDRFAMILY */
+ "temporary failure", /*%< EAI_AGAIN */
+ "invalid flags", /*%< EAI_BADFLAGS */
+ "permanent failure", /*%< EAI_FAIL */
+ "address family not supported", /*%< EAI_FAMILY */
+ "memory failure", /*%< EAI_MEMORY */
+ "no address", /*%< EAI_NODATA */
+ "unknown name or service", /*%< EAI_NONAME */
+ "service not supported for socktype", /*%< EAI_SERVICE */
+ "socktype not supported", /*%< EAI_SOCKTYPE */
+ "system failure", /*%< EAI_SYSTEM */
+ "bad hints", /*%< EAI_BADHINTS */
+ "bad protocol", /*%< EAI_PROTOCOL */
+ "unknown error" /*%< Must be last. */
+};
+
+static const int gai_nerr = (sizeof(gai_errlist)/sizeof(*gai_errlist));
+
+#define EAI_BUFSIZE 128
+
+const char *
+gai_strerror(int ecode) {
+#ifndef DO_PTHREADS
+ static char buf[EAI_BUFSIZE];
+#else /* DO_PTHREADS */
+#ifndef LIBBIND_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+ static pthread_mutex_t lock = LIBBIND_MUTEX_INITIALIZER;
+ static pthread_key_t key;
+ static int once = 0;
+ char *buf;
+#endif
+
+ if (ecode >= 0 && ecode < (gai_nerr - 1))
+ return (gai_errlist[ecode]);
+
+#ifdef DO_PTHREADS
+ if (!once) {
+ if (pthread_mutex_lock(&lock) != 0)
+ goto unknown;
+ if (!once) {
+ if (pthread_key_create(&key, free) != 0) {
+ (void)pthread_mutex_unlock(&lock);
+ goto unknown;
+ }
+ once = 1;
+ }
+ if (pthread_mutex_unlock(&lock) != 0)
+ goto unknown;
+ }
+
+ buf = pthread_getspecific(key);
+ if (buf == NULL) {
+ buf = malloc(EAI_BUFSIZE);
+ if (buf == NULL)
+ goto unknown;
+ if (pthread_setspecific(key, buf) != 0) {
+ free(buf);
+ goto unknown;
+ }
+ }
+#endif
+ /*
+ * XXX This really should be snprintf(buf, EAI_BUFSIZE, ...).
+ * It is safe until message catalogs are used.
+ */
+ sprintf(buf, "%s: %d", gai_errlist[gai_nerr - 1], ecode);
+ return (buf);
+
+#ifdef DO_PTHREADS
+ unknown:
+ return ("unknown error");
+#endif
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen.c b/usr/src/lib/libresolv2_joy/common/irs/gen.c
new file mode 100644
index 0000000000..7da01f5b09
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen.c,v 1.7 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * this is the top level dispatcher
+ *
+ * The dispatcher is implemented as an accessor class; it is an
+ * accessor class that calls other accessor classes, as controlled by a
+ * configuration file.
+ *
+ * A big difference between this accessor class and others is that the
+ * map class initializers are NULL, and the map classes are already
+ * filled in with method functions that will do the right thing.
+ */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+#ifdef SUNW_HOSTS_FALLBACK
+extern int __res_no_hosts_fallback(void);
+#endif /* SUNW_HOSTS_FALLBACK */
+
+/* Definitions */
+
+struct nameval {
+ const char * name;
+ int val;
+};
+
+static const struct nameval acc_names[irs_nacc+1] = {
+ { "local", irs_lcl },
+ { "dns", irs_dns },
+ { "nis", irs_nis },
+ { "irp", irs_irp },
+ { NULL, irs_nacc }
+};
+
+typedef struct irs_acc *(*accinit) __P((const char *options));
+
+static const accinit accs[irs_nacc+1] = {
+ irs_lcl_acc,
+ irs_dns_acc,
+#ifdef WANT_IRS_NIS
+ irs_nis_acc,
+#else
+ NULL,
+#endif
+ irs_irp_acc,
+ NULL
+};
+
+static const struct nameval map_names[irs_nmap+1] = {
+ { "group", irs_gr },
+ { "passwd", irs_pw },
+ { "services", irs_sv },
+ { "protocols", irs_pr },
+ { "hosts", irs_ho },
+ { "networks", irs_nw },
+ { "netgroup", irs_ng },
+ { NULL, irs_nmap }
+};
+
+static const struct nameval option_names[] = {
+ { "merge", IRS_MERGE },
+ { "continue", IRS_CONTINUE },
+ { NULL, 0 }
+};
+
+/* Forward */
+
+static void gen_close(struct irs_acc *);
+static struct __res_state * gen_res_get(struct irs_acc *);
+static void gen_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+static int find_name(const char *, const struct nameval nv[]);
+static void init_map_rules(struct gen_p *, const char *conf_file);
+static struct irs_rule *release_rule(struct irs_rule *);
+static int add_rule(struct gen_p *,
+ enum irs_map_id, enum irs_acc_id,
+ const char *);
+
+/* Public */
+
+struct irs_acc *
+irs_gen_acc(const char *options, const char *conf_file) {
+ struct irs_acc *acc;
+ struct gen_p *irs;
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(irs = memget(sizeof *irs))) {
+ errno = ENOMEM;
+ memput(acc, sizeof *acc);
+ return (NULL);
+ }
+ memset(irs, 0x5e, sizeof *irs);
+ irs->options = strdup(options);
+ irs->res = NULL;
+ irs->free_res = NULL;
+ memset(irs->accessors, 0, sizeof irs->accessors);
+ memset(irs->map_rules, 0, sizeof irs->map_rules);
+ init_map_rules(irs, conf_file);
+ acc->private = irs;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_gen_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_gen_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_gen_sv;
+ acc->pr_map = irs_gen_pr;
+ acc->ho_map = irs_gen_ho;
+ acc->nw_map = irs_gen_nw;
+ acc->ng_map = irs_gen_ng;
+ acc->res_get = gen_res_get;
+ acc->res_set = gen_res_set;
+ acc->close = gen_close;
+ return (acc);
+}
+
+/* Methods */
+
+static struct __res_state *
+gen_res_get(struct irs_acc *this) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+
+ if (irs->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ gen_res_set(this, res, free);
+ }
+
+ if (((irs->res->options & RES_INIT) == 0U) && res_ninit(irs->res) < 0)
+ return (NULL);
+
+ return (irs->res);
+}
+
+static void
+gen_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+#if 0
+ struct irs_rule *rule;
+ struct irs_ho *ho;
+ struct irs_nw *nw;
+#endif
+
+ if (irs->res && irs->free_res) {
+ res_nclose(irs->res);
+ (*irs->free_res)(irs->res);
+ }
+
+ irs->res = res;
+ irs->free_res = free_res;
+
+#if 0
+ for (rule = irs->map_rules[irs_ho]; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+
+ (*ho->res_set)(ho, res, NULL);
+ }
+ for (rule = irs->map_rules[irs_nw]; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+
+ (*nw->res_set)(nw, res, NULL);
+ }
+#endif
+}
+
+static void
+gen_close(struct irs_acc *this) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+ int n;
+
+ /* Search rules. */
+ for (n = 0; n < irs_nmap; n++)
+ while (irs->map_rules[n] != NULL)
+ irs->map_rules[n] = release_rule(irs->map_rules[n]);
+
+ /* Access methods. */
+ for (n = 0; n < irs_nacc; n++) {
+ /* Map objects. */
+ if (irs->accessors[n].gr != NULL)
+ (*irs->accessors[n].gr->close)(irs->accessors[n].gr);
+ if (irs->accessors[n].pw != NULL)
+ (*irs->accessors[n].pw->close)(irs->accessors[n].pw);
+ if (irs->accessors[n].sv != NULL)
+ (*irs->accessors[n].sv->close)(irs->accessors[n].sv);
+ if (irs->accessors[n].pr != NULL)
+ (*irs->accessors[n].pr->close)(irs->accessors[n].pr);
+ if (irs->accessors[n].ho != NULL)
+ (*irs->accessors[n].ho->close)(irs->accessors[n].ho);
+ if (irs->accessors[n].nw != NULL)
+ (*irs->accessors[n].nw->close)(irs->accessors[n].nw);
+ if (irs->accessors[n].ng != NULL)
+ (*irs->accessors[n].ng->close)(irs->accessors[n].ng);
+ /* Enclosing accessor. */
+ if (irs->accessors[n].acc != NULL)
+ (*irs->accessors[n].acc->close)(irs->accessors[n].acc);
+ }
+
+ /* The options string was strdup'd. */
+ free((void*)irs->options);
+
+ if (irs->res && irs->free_res)
+ (*irs->free_res)(irs->res);
+
+ /* The private data container. */
+ memput(irs, sizeof *irs);
+
+ /* The object. */
+ memput(this, sizeof *this);
+}
+
+/* Private */
+
+static int
+find_name(const char *name, const struct nameval names[]) {
+ int n;
+
+ for (n = 0; names[n].name != NULL; n++)
+ if (strcmp(name, names[n].name) == 0)
+ return (names[n].val);
+ return (-1);
+}
+
+static struct irs_rule *
+release_rule(struct irs_rule *rule) {
+ struct irs_rule *next = rule->next;
+
+ memput(rule, sizeof *rule);
+ return (next);
+}
+
+static int
+add_rule(struct gen_p *irs,
+ enum irs_map_id map, enum irs_acc_id acc,
+ const char *options)
+{
+ struct irs_rule **rules, *last, *tmp, *new;
+ struct irs_inst *inst;
+ const char *cp;
+ int n;
+
+#ifndef WANT_IRS_GR
+ if (map == irs_gr)
+ return (-1);
+#endif
+#ifndef WANT_IRS_PW
+ if (map == irs_pw)
+ return (-1);
+#endif
+#ifndef WANT_IRS_NIS
+ if (acc == irs_nis)
+ return (-1);
+#endif
+ new = memget(sizeof *new);
+ if (new == NULL)
+ return (-1);
+ memset(new, 0x5e, sizeof *new);
+ new->next = NULL;
+
+ new->inst = &irs->accessors[acc];
+
+ new->flags = 0;
+ cp = options;
+ while (cp && *cp) {
+ char option[50], *next;
+
+ next = strchr(cp, ',');
+ if (next)
+ n = next++ - cp;
+ else
+ n = strlen(cp);
+ if ((size_t)n > sizeof option - 1)
+ n = sizeof option - 1;
+ strncpy(option, cp, n);
+ option[n] = '\0';
+
+ n = find_name(option, option_names);
+ if (n >= 0)
+ new->flags |= n;
+
+ cp = next;
+ }
+
+ rules = &irs->map_rules[map];
+ for (last = NULL, tmp = *rules;
+ tmp != NULL;
+ last = tmp, tmp = tmp->next)
+ (void)NULL;
+ if (last == NULL)
+ *rules = new;
+ else
+ last->next = new;
+
+ /* Try to instantiate map accessors for this if necessary & approp. */
+ inst = &irs->accessors[acc];
+ if (inst->acc == NULL && accs[acc] != NULL)
+ inst->acc = (*accs[acc])(irs->options);
+ if (inst->acc != NULL) {
+ if (inst->gr == NULL && inst->acc->gr_map != NULL)
+ inst->gr = (*inst->acc->gr_map)(inst->acc);
+ if (inst->pw == NULL && inst->acc->pw_map != NULL)
+ inst->pw = (*inst->acc->pw_map)(inst->acc);
+ if (inst->sv == NULL && inst->acc->sv_map != NULL)
+ inst->sv = (*inst->acc->sv_map)(inst->acc);
+ if (inst->pr == NULL && inst->acc->pr_map != NULL)
+ inst->pr = (*inst->acc->pr_map)(inst->acc);
+ if (inst->ho == NULL && inst->acc->ho_map != NULL)
+ inst->ho = (*inst->acc->ho_map)(inst->acc);
+ if (inst->nw == NULL && inst->acc->nw_map != NULL)
+ inst->nw = (*inst->acc->nw_map)(inst->acc);
+ if (inst->ng == NULL && inst->acc->ng_map != NULL)
+ inst->ng = (*inst->acc->ng_map)(inst->acc);
+ }
+
+ return (0);
+}
+
+static void
+default_map_rules(struct gen_p *irs) {
+ /* Install time honoured and proved BSD style rules as default. */
+ add_rule(irs, irs_gr, irs_lcl, "");
+ add_rule(irs, irs_pw, irs_lcl, "");
+ add_rule(irs, irs_sv, irs_lcl, "");
+ add_rule(irs, irs_pr, irs_lcl, "");
+#ifdef SUNW_HOSTS_FALLBACK
+ if (__res_no_hosts_fallback())
+ add_rule(irs, irs_ho, irs_dns, "");
+ else {
+ add_rule(irs, irs_ho, irs_dns, "continue");
+ add_rule(irs, irs_ho, irs_lcl, "");
+ }
+#else /* SUNW_HOSTS_FALLBACK */
+ add_rule(irs, irs_ho, irs_dns, "continue");
+ add_rule(irs, irs_ho, irs_lcl, "");
+#endif /* SUNW_HOSTS_FALLBACK */
+ add_rule(irs, irs_nw, irs_dns, "continue");
+ add_rule(irs, irs_nw, irs_lcl, "");
+ add_rule(irs, irs_ng, irs_lcl, "");
+}
+
+static void
+init_map_rules(struct gen_p *irs, const char *conf_file) {
+ char line[1024], pattern[40], mapname[20], accname[20], options[100];
+ FILE *conf;
+
+#ifdef SUNW_HOSTS_FALLBACK
+ if (__res_no_hosts_fallback()) {
+ default_map_rules(irs);
+ return;
+ }
+#endif /* SUNW_HOSTS_FALLBACK */
+
+ if (conf_file == NULL)
+ conf_file = _PATH_IRS_CONF ;
+
+ /* A conf file of "" means compiled in defaults. Irpd wants this */
+ if (conf_file[0] == '\0' || (conf = fopen(conf_file, "r")) == NULL) {
+ default_map_rules(irs);
+ return;
+ }
+ (void) sprintf(pattern, "%%%lus %%%lus %%%lus\n",
+ (unsigned long)sizeof mapname,
+ (unsigned long)sizeof accname,
+ (unsigned long)sizeof options);
+ while (fgets(line, sizeof line, conf)) {
+ enum irs_map_id map;
+ enum irs_acc_id acc;
+ char *tmp;
+ int n;
+
+ for (tmp = line;
+ isascii((unsigned char)*tmp) &&
+ isspace((unsigned char)*tmp);
+ tmp++)
+ (void)NULL;
+ if (*tmp == '#' || *tmp == '\n' || *tmp == '\0')
+ continue;
+ n = sscanf(tmp, pattern, mapname, accname, options);
+ if (n < 2)
+ continue;
+ if (n < 3)
+ options[0] = '\0';
+
+ n = find_name(mapname, map_names);
+ INSIST(n < irs_nmap);
+ if (n < 0)
+ continue;
+ map = (enum irs_map_id) n;
+
+ n = find_name(accname, acc_names);
+ INSIST(n < irs_nacc);
+ if (n < 0)
+ continue;
+ acc = (enum irs_acc_id) n;
+
+ add_rule(irs, map, acc, options);
+ }
+ fclose(conf);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c b/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c
new file mode 100644
index 0000000000..8e41d7d610
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: gen_ho.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Definitions */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct irs_ho * ho;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forwards */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static int init(struct irs_ho *this);
+
+/* Exports */
+
+struct irs_ho *
+irs_gen_ho(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ pvt->rules = accpvt->map_rules[irs_ho];
+ pvt->rule = pvt->rules;
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byname)(ho, name);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * The value TRY_AGAIN can mean that the service
+ * is not available, or just that this particular name
+ * cannot be resolved now. We use the errno ECONNREFUSED
+ * to distinguish. If a lookup sets that errno when
+ * H_ERRNO is TRY_AGAIN, we continue to try other lookup
+ * functions, otherwise we return the TRY_AGAIN error.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byname2)(ho, name, af);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byaddr)(ho, addr, len, af);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *rval;
+ struct irs_ho *ho;
+
+ while (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ rval = (*ho->next)(ho);
+ if (rval)
+ return (rval);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ (*ho->rewind)(ho);
+ }
+ }
+ return (NULL);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ho *ho;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ (*ho->rewind)(ho);
+ }
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ho *ho = rule->inst->ho;
+
+ (*ho->minimize)(ho);
+ }
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ho *ho = rule->inst->ho;
+
+ (*ho->res_set)(ho, pvt->res, NULL);
+ }
+}
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct addrinfo *rval = NULL;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ if (ho->addrinfo == NULL) /*%< for safety */
+ continue;
+ rval = (*ho->addrinfo)(ho, name, pai);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN ||
+ errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ (res_ninit(pvt->res) == -1))
+ return (-1);
+
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c b/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c
new file mode 100644
index 0000000000..e3c874ee68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_ng.c,v 1.3 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ char * curgroup;
+};
+
+/* Forward */
+
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_rewind(struct irs_ng *, const char *);
+static void ng_minimize(struct irs_ng *);
+
+/* Public */
+
+struct irs_ng *
+irs_gen_ng(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_ng];
+ pvt->rule = pvt->rules;
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ng_minimize(this);
+ if (pvt->curgroup)
+ free(pvt->curgroup);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ng *ng;
+
+ while (pvt->rule) {
+ ng = pvt->rule->inst->ng;
+ if ((*ng->next)(ng, host, user, domain) == 1)
+ return (1);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ ng = pvt->rule->inst->ng;
+ (*ng->rewind)(ng, pvt->curgroup);
+ }
+ }
+ return (0);
+}
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *user, const char *host, const char *domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct irs_ng *ng;
+ int rval;
+
+ rval = 0;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ng = rule->inst->ng;
+ rval = (*ng->test)(ng, name, user, host, domain);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ng *ng;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ if (pvt->curgroup)
+ free(pvt->curgroup);
+ pvt->curgroup = strdup(group);
+ ng = pvt->rule->inst->ng;
+ (*ng->rewind)(ng, pvt->curgroup);
+ }
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ng *ng = rule->inst->ng;
+
+ (*ng->minimize)(ng);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c b/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c
new file mode 100644
index 0000000000..12bd0e0f6d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_nw.c,v 1.4 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw*);
+static struct nwent * nw_next(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static int init(struct irs_nw *this);
+
+/* Public */
+
+struct irs_nw *
+irs_gen_nw(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ pvt->rules = accpvt->map_rules[irs_nw];
+ pvt->rule = pvt->rules;
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->next = nw_next;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ while (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ rval = (*nw->next)(nw);
+ if (rval)
+ return (rval);
+ if (!(pvt->rules->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ (*nw->rewind)(nw);
+ }
+ }
+ return (NULL);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ rval = (*nw->byname)(nw, name, type);
+ if (rval != NULL)
+ return (rval);
+ if (pvt->res->res_h_errno != TRY_AGAIN &&
+ !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (NULL);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ rval = (*nw->byaddr)(nw, net, length, type);
+ if (rval != NULL)
+ return (rval);
+ if (pvt->res->res_h_errno != TRY_AGAIN &&
+ !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (NULL);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_nw *nw;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ (*nw->rewind)(nw);
+ }
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_nw *nw = rule->inst->nw;
+
+ (*nw->minimize)(nw);
+ }
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_nw *nw = rule->inst->nw;
+
+ (*nw->res_set)(nw, pvt->res, NULL);
+ }
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_p.h b/usr/src/lib/libresolv2_joy/common/irs/gen_p.h
new file mode 100644
index 0000000000..1adc5909bb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_p.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: gen_p.h,v 1.3 2005/04/27 04:56:23 sra Exp $
+ */
+
+/*! \file
+ * Notes:
+ * We hope to create a complete set of thread-safe entry points someday,
+ * which will mean a set of getXbyY() functions that take as an argument
+ * a pointer to the map class, which will have a pointer to the private
+ * data, which will be used preferentially to the static variables that
+ * are necessary to support the "classic" interface. This "classic"
+ * interface will then be reimplemented as stubs on top of the thread
+ * safe modules, and will keep the map class pointers as their only
+ * static data. HOWEVER, we are not there yet. So while we will call
+ * the just-barely-converted map class methods with map class pointers,
+ * right now they probably all still use statics. We're not fooling
+ * anybody, and we're not trying to (yet).
+ */
+
+#ifndef _GEN_P_H_INCLUDED
+#define _GEN_P_H_INCLUDED
+
+/*%
+ * These are the access methods.
+ */
+enum irs_acc_id {
+ irs_lcl, /*%< Local. */
+ irs_dns, /*%< DNS or Hesiod. */
+ irs_nis, /*%< Sun NIS ("YP"). */
+ irs_irp, /*%< IR protocol. */
+ irs_nacc
+};
+
+/*%
+ * These are the map types.
+ */
+enum irs_map_id {
+ irs_gr, /*%< "group" */
+ irs_pw, /*%< "passwd" */
+ irs_sv, /*%< "services" */
+ irs_pr, /*%< "protocols" */
+ irs_ho, /*%< "hosts" */
+ irs_nw, /*%< "networks" */
+ irs_ng, /*%< "netgroup" */
+ irs_nmap
+};
+
+/*%
+ * This is an accessor instance.
+ */
+struct irs_inst {
+ struct irs_acc *acc;
+ struct irs_gr * gr;
+ struct irs_pw * pw;
+ struct irs_sv * sv;
+ struct irs_pr * pr;
+ struct irs_ho * ho;
+ struct irs_nw * nw;
+ struct irs_ng * ng;
+};
+
+/*%
+ * This is a search rule for some map type.
+ */
+struct irs_rule {
+ struct irs_rule * next;
+ struct irs_inst * inst;
+ int flags;
+};
+#define IRS_MERGE 0x0001 /*%< Don't stop if acc. has data? */
+#define IRS_CONTINUE 0x0002 /*%< Don't stop if acc. has no data? */
+/*
+ * This is the private data for a search access class.
+ */
+struct gen_p {
+ char * options;
+ struct irs_rule * map_rules[(int)irs_nmap];
+ struct irs_inst accessors[(int)irs_nacc];
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_gen_acc __P((const char *, const char *conf_file));
+extern struct irs_gr * irs_gen_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_gen_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_gen_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_gen_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_gen_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_gen_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_gen_ng __P((struct irs_acc *));
+
+#endif /*_IRS_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c b/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c
new file mode 100644
index 0000000000..9fd32c4dd9
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_pr.c,v 1.3 2005/04/27 04:56:24 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr*);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+static struct __res_state * pr_res_get(struct irs_pr *);
+static void pr_res_set(struct irs_pr *,
+ struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_pr *
+irs_gen_pr(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x5e, sizeof *pr);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *pr);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_pr];
+ pvt->rule = pvt->rules;
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->next = pr_next;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ pr->res_get = pr_res_get;
+ pr->res_set = pr_res_set;
+ return (pr);
+}
+
+/* Methods */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ while (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ rval = (*pr->next)(pr);
+ if (rval)
+ return (rval);
+ if (!(pvt->rules->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ (*pr->rewind)(pr);
+ }
+ }
+ return (NULL);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ pr = rule->inst->pr;
+ rval = (*pr->byname)(pr, name);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ pr = rule->inst->pr;
+ rval = (*pr->bynumber)(pr, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_pr *pr;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ (*pr->rewind)(pr);
+ }
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_pr *pr = rule->inst->pr;
+
+ (*pr->minimize)(pr);
+ }
+}
+
+static struct __res_state *
+pr_res_get(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ pr_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+pr_res_set(struct irs_pr *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_pr *pr = rule->inst->pr;
+
+ if (pr->res_set)
+ (*pr->res_set)(pr, pvt->res, NULL);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c b/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c
new file mode 100644
index 0000000000..93b70e57ec
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_sv.c,v 1.3 2005/04/27 04:56:24 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+static struct __res_state * sv_res_get(struct irs_sv *);
+static void sv_res_set(struct irs_sv *,
+ struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_sv *
+irs_gen_sv(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if (!(sv = memget(sizeof *sv))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_sv];
+ pvt->rule = pvt->rules;
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+ sv->res_get = sv_res_get;
+ sv->res_set = sv_res_set;
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ while (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ rval = (*sv->next)(sv);
+ if (rval)
+ return (rval);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ (*sv->rewind)(sv);
+ }
+ }
+ return (NULL);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ sv = rule->inst->sv;
+ rval = (*sv->byname)(sv, name, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ sv = rule->inst->sv;
+ rval = (*sv->byport)(sv, port, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_sv *sv;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ (*sv->rewind)(sv);
+ }
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_sv *sv = rule->inst->sv;
+
+ (*sv->minimize)(sv);
+ }
+}
+
+static struct __res_state *
+sv_res_get(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ sv_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+sv_res_set(struct irs_sv *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_sv *sv = rule->inst->sv;
+
+ if (sv->res_set)
+ (*sv->res_set)(sv, pvt->res, NULL);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c b/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c
new file mode 100644
index 0000000000..19bbbea854
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c
@@ -0,0 +1,1253 @@
+/* $KAME: getaddrinfo.c,v 1.14 2001/01/06 09:41:15 jinmei Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*! \file
+ * Issues to be discussed:
+ *\li Thread safe-ness must be checked.
+ *\li Return values. There are nonstandard return values defined and used
+ * in the source code. This is because RFC2553 is silent about which error
+ * code must be returned for which situation.
+ *\li IPv4 classful (shortened) form. RFC2553 is silent about it. XNET 5.2
+ * says to use inet_aton() to convert IPv4 numeric to binary (allows
+ * classful form as a result).
+ * current code - disallow classful form for IPv4 (due to use of inet_pton).
+ *\li freeaddrinfo(NULL). RFC2553 is silent about it. XNET 5.2 says it is
+ * invalid.
+ * current code - SEGV on freeaddrinfo(NULL)
+ * Note:
+ *\li We use getipnodebyname() just for thread-safeness. There's no intent
+ * to let it do PF_UNSPEC (actually we never pass PF_UNSPEC to
+ * getipnodebyname().
+ *\li The code filters out AFs that are not supported by the kernel,
+ * when globbing NULL hostname (to loopback, or wildcard). Is it the right
+ * thing to do? What is the relationship with post-RFC2553 AI_ADDRCONFIG
+ * in ai_flags?
+ *\li (post-2553) semantics of AI_ADDRCONFIG itself is too vague.
+ * (1) what should we do against numeric hostname (2) what should we do
+ * against NULL hostname (3) what is AI_ADDRCONFIG itself. AF not ready?
+ * non-loopback address configured? global address configured?
+ * \par Additional Issue:
+ * To avoid search order issue, we have a big amount of code duplicate
+ * from gethnamaddr.c and some other places. The issues that there's no
+ * lower layer function to lookup "IPv4 or IPv6" record. Calling
+ * gethostbyname2 from getaddrinfo will end up in wrong search order, as
+ * follows:
+ * \li The code makes use of following calls when asked to resolver with
+ * ai_family = PF_UNSPEC:
+ *\code getipnodebyname(host, AF_INET6);
+ * getipnodebyname(host, AF_INET);
+ *\endcode
+ * \li This will result in the following queries if the node is configure to
+ * prefer /etc/hosts than DNS:
+ *\code
+ * lookup /etc/hosts for IPv6 address
+ * lookup DNS for IPv6 address
+ * lookup /etc/hosts for IPv4 address
+ * lookup DNS for IPv4 address
+ *\endcode
+ * which may not meet people's requirement.
+ * \li The right thing to happen is to have underlying layer which does
+ * PF_UNSPEC lookup (lookup both) and return chain of addrinfos.
+ * This would result in a bit of code duplicate with _dns_ghbyname() and
+ * friends.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <stdarg.h>
+
+#include <irs.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+#define SUCCESS 0
+#define ANY 0
+#define YES 1
+#define NO 0
+
+static const char in_addrany[] = { 0, 0, 0, 0 };
+static const char in6_addrany[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+static const char in_loopback[] = { 127, 0, 0, 1 };
+static const char in6_loopback[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+};
+
+static const struct afd {
+ int a_af;
+ int a_addrlen;
+ int a_socklen;
+ int a_off;
+ const char *a_addrany;
+ const char *a_loopback;
+ int a_scoped;
+} afdl [] = {
+ {PF_INET6, sizeof(struct in6_addr),
+ sizeof(struct sockaddr_in6),
+ offsetof(struct sockaddr_in6, sin6_addr),
+ in6_addrany, in6_loopback, 1},
+ {PF_INET, sizeof(struct in_addr),
+ sizeof(struct sockaddr_in),
+ offsetof(struct sockaddr_in, sin_addr),
+ in_addrany, in_loopback, 0},
+ {0, 0, 0, 0, NULL, NULL, 0},
+};
+
+struct explore {
+ int e_af;
+ int e_socktype;
+ int e_protocol;
+ const char *e_protostr;
+ int e_wild;
+#define WILD_AF(ex) ((ex)->e_wild & 0x01)
+#define WILD_SOCKTYPE(ex) ((ex)->e_wild & 0x02)
+#define WILD_PROTOCOL(ex) ((ex)->e_wild & 0x04)
+};
+
+static const struct explore explore[] = {
+#if 0
+ { PF_LOCAL, 0, ANY, ANY, NULL, 0x01 },
+#endif
+ { PF_INET6, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
+ { PF_INET6, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
+ { PF_INET6, SOCK_RAW, ANY, NULL, 0x05 },
+ { PF_INET, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
+ { PF_INET, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
+ { PF_INET, SOCK_RAW, ANY, NULL, 0x05 },
+ { -1, 0, 0, NULL, 0 },
+};
+
+#define PTON_MAX 16
+
+static int str_isnumber __P((const char *));
+static int explore_fqdn __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int explore_copy __P((const struct addrinfo *, const struct addrinfo *,
+ struct addrinfo **));
+static int explore_null __P((const struct addrinfo *,
+ const char *, struct addrinfo **));
+static int explore_numeric __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int explore_numeric_scope __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int get_canonname __P((const struct addrinfo *,
+ struct addrinfo *, const char *));
+static struct addrinfo *get_ai __P((const struct addrinfo *,
+ const struct afd *, const char *));
+static struct addrinfo *copy_ai __P((const struct addrinfo *));
+static int get_portmatch __P((const struct addrinfo *, const char *));
+static int get_port __P((const struct addrinfo *, const char *, int));
+static const struct afd *find_afd __P((int));
+static int addrconfig __P((int));
+static int ip6_str2scopeid __P((char *, struct sockaddr_in6 *,
+ u_int32_t *scopeidp));
+static struct net_data *init __P((void));
+
+struct addrinfo *hostent2addrinfo __P((struct hostent *,
+ const struct addrinfo *));
+struct addrinfo *addr2addrinfo __P((const struct addrinfo *,
+ const char *));
+
+#if 0
+static const char *ai_errlist[] = {
+ "Success",
+ "Address family for hostname not supported", /*%< EAI_ADDRFAMILY */
+ "Temporary failure in name resolution", /*%< EAI_AGAIN */
+ "Invalid value for ai_flags", /*%< EAI_BADFLAGS */
+ "Non-recoverable failure in name resolution", /*%< EAI_FAIL */
+ "ai_family not supported", /*%< EAI_FAMILY */
+ "Memory allocation failure", /*%< EAI_MEMORY */
+ "No address associated with hostname", /*%< EAI_NODATA */
+ "hostname nor servname provided, or not known", /*%< EAI_NONAME */
+ "servname not supported for ai_socktype", /*%< EAI_SERVICE */
+ "ai_socktype not supported", /*%< EAI_SOCKTYPE */
+ "System error returned in errno", /*%< EAI_SYSTEM */
+ "Invalid value for hints", /*%< EAI_BADHINTS */
+ "Resolved protocol is unknown", /*%< EAI_PROTOCOL */
+ "Unknown error", /*%< EAI_MAX */
+};
+#endif
+
+/* XXX macros that make external reference is BAD. */
+
+#define GET_AI(ai, afd, addr) \
+do { \
+ /* external reference: pai, error, and label free */ \
+ (ai) = get_ai(pai, (afd), (addr)); \
+ if ((ai) == NULL) { \
+ error = EAI_MEMORY; \
+ goto free; \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define GET_PORT(ai, serv) \
+do { \
+ /* external reference: error and label free */ \
+ error = get_port((ai), (serv), 0); \
+ if (error != 0) \
+ goto free; \
+} while (/*CONSTCOND*/0)
+
+#define GET_CANONNAME(ai, str) \
+do { \
+ /* external reference: pai, error and label free */ \
+ error = get_canonname(pai, (ai), (str)); \
+ if (error != 0) \
+ goto free; \
+} while (/*CONSTCOND*/0)
+
+#ifndef SOLARIS2
+#define SETERROR(err) \
+do { \
+ /* external reference: error, and label bad */ \
+ error = (err); \
+ goto bad; \
+ /*NOTREACHED*/ \
+} while (/*CONSTCOND*/0)
+#else
+#define SETERROR(err) \
+do { \
+ /* external reference: error, and label bad */ \
+ error = (err); \
+ if (error == error) \
+ goto bad; \
+} while (/*CONSTCOND*/0)
+#endif
+
+
+#define MATCH_FAMILY(x, y, w) \
+ ((x) == (y) || (/*CONSTCOND*/(w) && ((x) == PF_UNSPEC || (y) == PF_UNSPEC)))
+#define MATCH(x, y, w) \
+ ((x) == (y) || (/*CONSTCOND*/(w) && ((x) == ANY || (y) == ANY)))
+
+#if 0 /*%< bind8 has its own version */
+char *
+gai_strerror(ecode)
+ int ecode;
+{
+ if (ecode < 0 || ecode > EAI_MAX)
+ ecode = EAI_MAX;
+ return ai_errlist[ecode];
+}
+#endif
+
+void
+freeaddrinfo(ai)
+ struct addrinfo *ai;
+{
+ struct addrinfo *next;
+
+ do {
+ next = ai->ai_next;
+ if (ai->ai_canonname)
+ free(ai->ai_canonname);
+ /* no need to free(ai->ai_addr) */
+ free(ai);
+ ai = next;
+ } while (ai);
+}
+
+static int
+str_isnumber(p)
+ const char *p;
+{
+ char *ep;
+
+ if (*p == '\0')
+ return NO;
+ ep = NULL;
+ errno = 0;
+ (void)strtoul(p, &ep, 10);
+ if (errno == 0 && ep && *ep == '\0')
+ return YES;
+ else
+ return NO;
+}
+
+int
+getaddrinfo(hostname, servname, hints, res)
+ const char *hostname, *servname;
+ const struct addrinfo *hints;
+ struct addrinfo **res;
+{
+ struct addrinfo sentinel;
+ struct addrinfo *cur;
+ int error = 0;
+ struct addrinfo ai, ai0, *afai = NULL;
+ struct addrinfo *pai;
+ const struct explore *ex;
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+ pai = &ai;
+ pai->ai_flags = 0;
+ pai->ai_family = PF_UNSPEC;
+ pai->ai_socktype = ANY;
+ pai->ai_protocol = ANY;
+#if defined(sun) && defined(_SOCKLEN_T) && defined(__sparcv9)
+ /*
+ * clear _ai_pad to preserve binary
+ * compatibility with previously compiled 64-bit
+ * applications in a pre-SUSv3 environment by
+ * guaranteeing the upper 32-bits are empty.
+ */
+ pai->_ai_pad = 0;
+#endif
+ pai->ai_addrlen = 0;
+ pai->ai_canonname = NULL;
+ pai->ai_addr = NULL;
+ pai->ai_next = NULL;
+
+ if (hostname == NULL && servname == NULL)
+ return EAI_NONAME;
+ if (hints) {
+ /* error check for hints */
+ if (hints->ai_addrlen || hints->ai_canonname ||
+ hints->ai_addr || hints->ai_next)
+ SETERROR(EAI_BADHINTS); /*%< xxx */
+ if (hints->ai_flags & ~AI_MASK)
+ SETERROR(EAI_BADFLAGS);
+ switch (hints->ai_family) {
+ case PF_UNSPEC:
+ case PF_INET:
+ case PF_INET6:
+ break;
+ default:
+ SETERROR(EAI_FAMILY);
+ }
+ memcpy(pai, hints, sizeof(*pai));
+
+#if defined(sun) && defined(_SOCKLEN_T) && defined(__sparcv9)
+ /*
+ * We need to clear _ai_pad to preserve binary
+ * compatibility. See prior comment.
+ */
+ pai->_ai_pad = 0;
+#endif
+ /*
+ * if both socktype/protocol are specified, check if they
+ * are meaningful combination.
+ */
+ if (pai->ai_socktype != ANY && pai->ai_protocol != ANY) {
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ if (pai->ai_family != ex->e_af)
+ continue;
+ if (ex->e_socktype == ANY)
+ continue;
+ if (ex->e_protocol == ANY)
+ continue;
+ if (pai->ai_socktype == ex->e_socktype &&
+ pai->ai_protocol != ex->e_protocol) {
+ SETERROR(EAI_BADHINTS);
+ }
+ }
+ }
+ }
+
+ /*
+ * post-2553: AI_ALL and AI_V4MAPPED are effective only against
+ * AF_INET6 query. They needs to be ignored if specified in other
+ * occassions.
+ */
+ switch (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) {
+ case AI_V4MAPPED:
+ case AI_ALL | AI_V4MAPPED:
+ if (pai->ai_family != AF_INET6)
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+ case AI_ALL:
+#if 1
+ /* illegal */
+ SETERROR(EAI_BADFLAGS);
+#else
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+#endif
+ }
+
+ /*
+ * check for special cases. (1) numeric servname is disallowed if
+ * socktype/protocol are left unspecified. (2) servname is disallowed
+ * for raw and other inet{,6} sockets.
+ */
+ if (MATCH_FAMILY(pai->ai_family, PF_INET, 1)
+#ifdef PF_INET6
+ || MATCH_FAMILY(pai->ai_family, PF_INET6, 1)
+#endif
+ ) {
+ ai0 = *pai; /* backup *pai */
+
+ if (pai->ai_family == PF_UNSPEC) {
+#ifdef PF_INET6
+ pai->ai_family = PF_INET6;
+#else
+ pai->ai_family = PF_INET;
+#endif
+ }
+ error = get_portmatch(pai, servname);
+ if (error)
+ SETERROR(error);
+
+ *pai = ai0;
+ }
+
+ ai0 = *pai;
+
+ /* NULL hostname, or numeric hostname */
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ *pai = ai0;
+
+ if (!MATCH_FAMILY(pai->ai_family, ex->e_af, WILD_AF(ex)))
+ continue;
+ if (!MATCH(pai->ai_socktype, ex->e_socktype, WILD_SOCKTYPE(ex)))
+ continue;
+ if (!MATCH(pai->ai_protocol, ex->e_protocol, WILD_PROTOCOL(ex)))
+ continue;
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+ if (pai->ai_socktype == ANY && ex->e_socktype != ANY)
+ pai->ai_socktype = ex->e_socktype;
+ if (pai->ai_protocol == ANY && ex->e_protocol != ANY)
+ pai->ai_protocol = ex->e_protocol;
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ continue;
+
+ if (hostname == NULL) {
+ /*
+ * filter out AFs that are not supported by the kernel
+ * XXX errno?
+ */
+ if (!addrconfig(pai->ai_family))
+ continue;
+ error = explore_null(pai, servname, &cur->ai_next);
+ } else
+ error = explore_numeric_scope(pai, hostname, servname,
+ &cur->ai_next);
+
+ if (error)
+ goto free;
+
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+
+ /*
+ * XXX
+ * If numreic representation of AF1 can be interpreted as FQDN
+ * representation of AF2, we need to think again about the code below.
+ */
+ if (sentinel.ai_next)
+ goto good;
+
+ if (pai->ai_flags & AI_NUMERICHOST)
+ SETERROR(EAI_NONAME);
+ if (hostname == NULL)
+ SETERROR(EAI_NONAME);
+
+ /*
+ * hostname as alphabetical name.
+ * We'll make sure that
+ * - if returning addrinfo list is empty, return non-zero error
+ * value (already known one or EAI_NONAME).
+ * - otherwise,
+ * + if we haven't had any errors, return 0 (i.e. success).
+ * + if we've had an error, free the list and return the error.
+ * without any assumption on the behavior of explore_fqdn().
+ */
+
+ /* first, try to query DNS for all possible address families. */
+ *pai = ai0;
+ error = explore_fqdn(pai, hostname, servname, &afai);
+ if (error) {
+ if (afai != NULL)
+ freeaddrinfo(afai);
+ goto free;
+ }
+ if (afai == NULL) {
+ error = EAI_NONAME; /*%< we've had no errors. */
+ goto free;
+ }
+
+ /*
+ * we would like to prefer AF_INET6 than AF_INET, so we'll make an
+ * outer loop by AFs.
+ */
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ *pai = ai0;
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+
+ if (!MATCH_FAMILY(pai->ai_family, ex->e_af, WILD_AF(ex)))
+ continue;
+ if (!MATCH(pai->ai_socktype, ex->e_socktype,
+ WILD_SOCKTYPE(ex))) {
+ continue;
+ }
+ if (!MATCH(pai->ai_protocol, ex->e_protocol,
+ WILD_PROTOCOL(ex))) {
+ continue;
+ }
+
+#ifdef AI_ADDRCONFIG
+ /*
+ * If AI_ADDRCONFIG is specified, check if we are
+ * expected to return the address family or not.
+ */
+ if ((pai->ai_flags & AI_ADDRCONFIG) != 0 &&
+ !addrconfig(pai->ai_family))
+ continue;
+#endif
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+ if (pai->ai_socktype == ANY && ex->e_socktype != ANY)
+ pai->ai_socktype = ex->e_socktype;
+ if (pai->ai_protocol == ANY && ex->e_protocol != ANY)
+ pai->ai_protocol = ex->e_protocol;
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ continue;
+
+ if ((error = explore_copy(pai, afai, &cur->ai_next)) != 0) {
+ freeaddrinfo(afai);
+ goto free;
+ }
+
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+
+ freeaddrinfo(afai); /*%< afai must not be NULL at this point. */
+
+ if (sentinel.ai_next) {
+good:
+ *res = sentinel.ai_next;
+ return(SUCCESS);
+ } else {
+ /*
+ * All the process succeeded, but we've had an empty list.
+ * This can happen if the given hints do not match our
+ * candidates.
+ */
+ error = EAI_NONAME;
+ }
+
+free:
+bad:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ *res = NULL;
+ return(error);
+}
+
+/*%
+ * FQDN hostname, DNS lookup
+ */
+static int
+explore_fqdn(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+ struct addrinfo *result;
+ struct addrinfo *cur;
+ struct net_data *net_data = init();
+ struct irs_ho *ho;
+ int error = 0;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+
+ INSIST(res != NULL && *res == NULL);
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ return(0);
+
+ if (!net_data || !(ho = net_data->ho))
+ return(0);
+#if 0 /*%< XXX (notyet) */
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_addrtype == af) {
+ if (ns_samename(name, net_data->ho_last->h_name) == 1)
+ return (net_data->ho_last);
+ for (hap = net_data->ho_last->h_aliases; hap && *hap; hap++)
+ if (ns_samename(name, *hap) == 1)
+ return (net_data->ho_last);
+ }
+#endif
+ if (!strchr(hostname, '.') &&
+ (cp = res_hostalias(net_data->res, hostname,
+ tmp, sizeof(tmp))))
+ hostname = cp;
+ result = (*ho->addrinfo)(ho, hostname, pai);
+ if (!net_data->ho_stayopen) {
+ (*ho->minimize)(ho);
+ }
+ if (result == NULL) {
+ int e = h_errno;
+
+ switch(e) {
+ case NETDB_INTERNAL:
+ error = EAI_SYSTEM;
+ break;
+ case TRY_AGAIN:
+ error = EAI_AGAIN;
+ break;
+ case NO_RECOVERY:
+ error = EAI_FAIL;
+ break;
+ case HOST_NOT_FOUND:
+ case NO_DATA:
+ error = EAI_NONAME;
+ break;
+ default:
+ case NETDB_SUCCESS: /*%< should be impossible... */
+ error = EAI_NONAME;
+ break;
+ }
+ goto free;
+ }
+
+ for (cur = result; cur; cur = cur->ai_next) {
+ GET_PORT(cur, servname); /*%< XXX: redundant lookups... */
+ /* canonname should already be filled. */
+ }
+
+ *res = result;
+
+ return(0);
+
+free:
+ if (result)
+ freeaddrinfo(result);
+ return error;
+}
+
+static int
+explore_copy(pai, src0, res)
+ const struct addrinfo *pai; /*%< seed */
+ const struct addrinfo *src0; /*%< source */
+ struct addrinfo **res;
+{
+ int error;
+ struct addrinfo sentinel, *cur;
+ const struct addrinfo *src;
+
+ error = 0;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ for (src = src0; src != NULL; src = src->ai_next) {
+ if (src->ai_family != pai->ai_family)
+ continue;
+
+ cur->ai_next = copy_ai(src);
+ if (!cur->ai_next) {
+ error = EAI_MEMORY;
+ goto fail;
+ }
+
+ cur->ai_next->ai_socktype = pai->ai_socktype;
+ cur->ai_next->ai_protocol = pai->ai_protocol;
+ cur = cur->ai_next;
+ }
+
+ *res = sentinel.ai_next;
+ return 0;
+
+fail:
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * hostname == NULL.
+ * passive socket -> anyaddr (0.0.0.0 or ::)
+ * non-passive socket -> localhost (127.0.0.1 or ::1)
+ */
+static int
+explore_null(pai, servname, res)
+ const struct addrinfo *pai;
+ const char *servname;
+ struct addrinfo **res;
+{
+ const struct afd *afd;
+ struct addrinfo *cur;
+ struct addrinfo sentinel;
+ int error;
+
+ *res = NULL;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ if (pai->ai_flags & AI_PASSIVE) {
+ GET_AI(cur->ai_next, afd, afd->a_addrany);
+ /* xxx meaningless?
+ * GET_CANONNAME(cur->ai_next, "anyaddr");
+ */
+ GET_PORT(cur->ai_next, servname);
+ } else {
+ GET_AI(cur->ai_next, afd, afd->a_loopback);
+ /* xxx meaningless?
+ * GET_CANONNAME(cur->ai_next, "localhost");
+ */
+ GET_PORT(cur->ai_next, servname);
+ }
+ cur = cur->ai_next;
+
+ *res = sentinel.ai_next;
+ return 0;
+
+free:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * numeric hostname
+ */
+static int
+explore_numeric(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+ const struct afd *afd;
+ struct addrinfo *cur;
+ struct addrinfo sentinel;
+ int error;
+ char pton[PTON_MAX];
+
+ *res = NULL;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ switch (afd->a_af) {
+#if 0 /*X/Open spec*/
+ case AF_INET:
+ if (inet_aton(hostname, (struct in_addr *)pton) == 1) {
+ if (pai->ai_family == afd->a_af ||
+ pai->ai_family == PF_UNSPEC /*?*/) {
+ GET_AI(cur->ai_next, afd, pton);
+ GET_PORT(cur->ai_next, servname);
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ SETERROR(EAI_FAMILY); /*xxx*/
+ }
+ break;
+#endif
+ default:
+ if (inet_pton(afd->a_af, hostname, pton) == 1) {
+ if (pai->ai_family == afd->a_af ||
+ pai->ai_family == PF_UNSPEC /*?*/) {
+ GET_AI(cur->ai_next, afd, pton);
+ GET_PORT(cur->ai_next, servname);
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ SETERROR(EAI_FAMILY); /*xxx*/
+ }
+ break;
+ }
+
+ *res = sentinel.ai_next;
+ return 0;
+
+free:
+bad:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * numeric hostname with scope
+ */
+static int
+explore_numeric_scope(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+#ifndef SCOPE_DELIMITER
+ return explore_numeric(pai, hostname, servname, res);
+#else
+ const struct afd *afd;
+ struct addrinfo *cur;
+ int error;
+ char *cp, *hostname2 = NULL, *scope, *addr;
+ struct sockaddr_in6 *sin6;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ if (!afd->a_scoped)
+ return explore_numeric(pai, hostname, servname, res);
+
+ cp = strchr(hostname, SCOPE_DELIMITER);
+ if (cp == NULL)
+ return explore_numeric(pai, hostname, servname, res);
+
+ /*
+ * Handle special case of <scoped_address><delimiter><scope id>
+ */
+ hostname2 = strdup(hostname);
+ if (hostname2 == NULL)
+ return EAI_MEMORY;
+ /* terminate at the delimiter */
+ hostname2[cp - hostname] = '\0';
+ addr = hostname2;
+ scope = cp + 1;
+
+ error = explore_numeric(pai, addr, servname, res);
+ if (error == 0) {
+ u_int32_t scopeid = 0;
+
+ for (cur = *res; cur; cur = cur->ai_next) {
+ if (cur->ai_family != AF_INET6)
+ continue;
+ sin6 = (struct sockaddr_in6 *)(void *)cur->ai_addr;
+ if (!ip6_str2scopeid(scope, sin6, &scopeid)) {
+ free(hostname2);
+ return(EAI_NONAME); /*%< XXX: is return OK? */
+ }
+#ifdef HAVE_SIN6_SCOPE_ID
+ sin6->sin6_scope_id = scopeid;
+#endif
+ }
+ }
+
+ free(hostname2);
+
+ return error;
+#endif
+}
+
+static int
+get_canonname(pai, ai, str)
+ const struct addrinfo *pai;
+ struct addrinfo *ai;
+ const char *str;
+{
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ ai->ai_canonname = (char *)malloc(strlen(str) + 1);
+ if (ai->ai_canonname == NULL)
+ return EAI_MEMORY;
+ strcpy(ai->ai_canonname, str);
+ }
+ return 0;
+}
+
+static struct addrinfo *
+get_ai(pai, afd, addr)
+ const struct addrinfo *pai;
+ const struct afd *afd;
+ const char *addr;
+{
+ char *p;
+ struct addrinfo *ai;
+
+ ai = (struct addrinfo *)malloc(sizeof(struct addrinfo)
+ + (afd->a_socklen));
+ if (ai == NULL)
+ return NULL;
+
+ memcpy(ai, pai, sizeof(struct addrinfo));
+ ai->ai_addr = (struct sockaddr *)(void *)(ai + 1);
+ memset(ai->ai_addr, 0, (size_t)afd->a_socklen);
+#ifdef HAVE_SA_LEN
+ ai->ai_addr->sa_len = afd->a_socklen;
+#endif
+ ai->ai_addrlen = afd->a_socklen;
+ ai->ai_addr->sa_family = ai->ai_family = afd->a_af;
+ p = (char *)(void *)(ai->ai_addr);
+ memcpy(p + afd->a_off, addr, (size_t)afd->a_addrlen);
+ return ai;
+}
+
+/* XXX need to malloc() the same way we do from other functions! */
+static struct addrinfo *
+copy_ai(pai)
+ const struct addrinfo *pai;
+{
+ struct addrinfo *ai;
+ size_t l;
+
+ l = sizeof(*ai) + pai->ai_addrlen;
+ if ((ai = (struct addrinfo *)malloc(l)) == NULL)
+ return NULL;
+ memset(ai, 0, l);
+ memcpy(ai, pai, sizeof(*ai));
+ ai->ai_addr = (struct sockaddr *)(void *)(ai + 1);
+ memcpy(ai->ai_addr, pai->ai_addr, pai->ai_addrlen);
+
+ if (pai->ai_canonname) {
+ l = strlen(pai->ai_canonname) + 1;
+ if ((ai->ai_canonname = malloc(l)) == NULL) {
+ free(ai);
+ return NULL;
+ }
+ strcpy(ai->ai_canonname, pai->ai_canonname); /* (checked) */
+ } else {
+ /* just to make sure */
+ ai->ai_canonname = NULL;
+ }
+
+ ai->ai_next = NULL;
+
+ return ai;
+}
+
+static int
+get_portmatch(const struct addrinfo *ai, const char *servname) {
+
+ /* get_port does not touch first argument. when matchonly == 1. */
+ /* LINTED const cast */
+ return get_port((const struct addrinfo *)ai, servname, 1);
+}
+
+static int
+get_port(const struct addrinfo *ai, const char *servname, int matchonly) {
+ const char *proto;
+ struct servent *sp;
+ int port;
+ int allownumeric;
+
+ if (servname == NULL)
+ return 0;
+ switch (ai->ai_family) {
+ case AF_INET:
+#ifdef AF_INET6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ return 0;
+ }
+
+ switch (ai->ai_socktype) {
+ case SOCK_RAW:
+ return EAI_SERVICE;
+ case SOCK_DGRAM:
+ case SOCK_STREAM:
+ allownumeric = 1;
+ break;
+ case ANY:
+ switch (ai->ai_family) {
+ case AF_INET:
+#ifdef AF_INET6
+ case AF_INET6:
+#endif
+ allownumeric = 1;
+ break;
+ default:
+ allownumeric = 0;
+ break;
+ }
+ break;
+ default:
+ return EAI_SOCKTYPE;
+ }
+
+ if (str_isnumber(servname)) {
+ if (!allownumeric)
+ return EAI_SERVICE;
+ port = atoi(servname);
+ if (port < 0 || port > 65535)
+ return EAI_SERVICE;
+ port = htons(port);
+ } else {
+ switch (ai->ai_socktype) {
+ case SOCK_DGRAM:
+ proto = "udp";
+ break;
+ case SOCK_STREAM:
+ proto = "tcp";
+ break;
+ default:
+ proto = NULL;
+ break;
+ }
+
+ if ((sp = getservbyname(servname, proto)) == NULL)
+ return EAI_SERVICE;
+ port = sp->s_port;
+ }
+
+ if (!matchonly) {
+ switch (ai->ai_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)(void *)
+ ai->ai_addr)->sin_port = port;
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)(void *)
+ ai->ai_addr)->sin6_port = port;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static const struct afd *
+find_afd(af)
+ int af;
+{
+ const struct afd *afd;
+
+ if (af == PF_UNSPEC)
+ return NULL;
+ for (afd = afdl; afd->a_af; afd++) {
+ if (afd->a_af == af)
+ return afd;
+ }
+ return NULL;
+}
+
+/*%
+ * post-2553: AI_ADDRCONFIG check. if we use getipnodeby* as backend, backend
+ * will take care of it.
+ * the semantics of AI_ADDRCONFIG is not defined well. we are not sure
+ * if the code is right or not.
+ */
+static int
+addrconfig(af)
+ int af;
+{
+ int s;
+
+ /* XXX errno */
+ s = socket(af, SOCK_DGRAM, 0);
+ if (s < 0) {
+ if (errno != EMFILE)
+ return 0;
+ } else
+ close(s);
+ return 1;
+}
+
+/* convert a string to a scope identifier. XXX: IPv6 specific */
+static int
+ip6_str2scopeid(char *scope, struct sockaddr_in6 *sin6,
+ u_int32_t *scopeidp)
+{
+ u_int32_t scopeid;
+ u_long lscopeid;
+ struct in6_addr *a6 = &sin6->sin6_addr;
+ char *ep;
+
+ /* empty scopeid portion is invalid */
+ if (*scope == '\0')
+ return (0);
+
+#ifdef USE_IFNAMELINKID
+ if (IN6_IS_ADDR_LINKLOCAL(a6) || IN6_IS_ADDR_MC_LINKLOCAL(a6) ||
+ IN6_IS_ADDR_MC_NODELOCAL(a6)) {
+ /*
+ * Using interface names as link indices can be allowed
+ * only when we can assume a one-to-one mappings between
+ * links and interfaces. See comments in getnameinfo.c.
+ */
+ scopeid = if_nametoindex(scope);
+ if (scopeid == 0)
+ goto trynumeric;
+ *scopeidp = scopeid;
+ return (1);
+ }
+#endif
+
+ /* still unclear about literal, allow numeric only - placeholder */
+ if (IN6_IS_ADDR_SITELOCAL(a6) || IN6_IS_ADDR_MC_SITELOCAL(a6))
+ goto trynumeric;
+ if (IN6_IS_ADDR_MC_ORGLOCAL(a6))
+ goto trynumeric;
+ else
+ goto trynumeric; /*%< global */
+ /* try to convert to a numeric id as a last resort */
+trynumeric:
+ errno = 0;
+ lscopeid = strtoul(scope, &ep, 10);
+ scopeid = lscopeid & 0xffffffff;
+ if (errno == 0 && ep && *ep == '\0' && scopeid == lscopeid) {
+ *scopeidp = scopeid;
+ return (1);
+ } else
+ return (0);
+}
+
+struct addrinfo *
+hostent2addrinfo(hp, pai)
+ struct hostent *hp;
+ const struct addrinfo *pai;
+{
+ int i, af, error = 0;
+ char **aplist = NULL, *ap;
+ struct addrinfo sentinel, *cur;
+ const struct afd *afd;
+
+ af = hp->h_addrtype;
+ if (pai->ai_family != AF_UNSPEC && af != pai->ai_family)
+ return(NULL);
+
+ afd = find_afd(af);
+ if (afd == NULL)
+ return(NULL);
+
+ aplist = hp->h_addr_list;
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ for (i = 0; (ap = aplist[i]) != NULL; i++) {
+#if 0 /*%< the trick seems too much */
+ af = hp->h_addr_list;
+ if (af == AF_INET6 &&
+ IN6_IS_ADDR_V4MAPPED((struct in6_addr *)ap)) {
+ af = AF_INET;
+ ap = ap + sizeof(struct in6_addr)
+ - sizeof(struct in_addr);
+ }
+ afd = find_afd(af);
+ if (afd == NULL)
+ continue;
+#endif /* 0 */
+
+ GET_AI(cur->ai_next, afd, ap);
+
+ /* GET_PORT(cur->ai_next, servname); */
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ /*
+ * RFC2553 says that ai_canonname will be set only for
+ * the first element. we do it for all the elements,
+ * just for convenience.
+ */
+ GET_CANONNAME(cur->ai_next, hp->h_name);
+ }
+ while (cur->ai_next) /*%< no need to loop, actually. */
+ cur = cur->ai_next;
+ continue;
+
+ free:
+ if (cur->ai_next)
+ freeaddrinfo(cur->ai_next);
+ cur->ai_next = NULL;
+ /* continue, without tht pointer CUR advanced. */
+ }
+
+ return(sentinel.ai_next);
+}
+
+struct addrinfo *
+addr2addrinfo(pai, cp)
+ const struct addrinfo *pai;
+ const char *cp;
+{
+ const struct afd *afd;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return(NULL);
+
+ return(get_ai(pai, afd, cp));
+}
+
+static struct net_data *
+init()
+{
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ho) {
+ net_data->ho = (*net_data->irs->ho_map)(net_data->irs);
+ if (!net_data->ho || !net_data->res) {
+error:
+ errno = EIO;
+ if (net_data && net_data->res)
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ (*net_data->ho->res_set)(net_data->ho, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gethostent.c b/usr/src/lib/libresolv2_joy/common/irs/gethostent.c
new file mode 100644
index 0000000000..c4751c2c80
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gethostent.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gethostent.c,v 1.8 2006/01/10 05:06:00 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "irs_data.h"
+
+/* Definitions */
+
+struct pvt {
+ char * aliases[1];
+ char * addrs[2];
+ char addr[NS_IN6ADDRSZ];
+ char name[NS_MAXDNAME + 1];
+ struct hostent host;
+};
+
+/* Forward */
+
+static struct net_data *init(void);
+static void freepvt(struct net_data *);
+static struct hostent *fakeaddr(const char *, int, struct net_data *);
+
+#ifdef SUNW_OVERRIDE_RETRY
+extern int __res_retry(int);
+extern int __res_retry_reset(void);
+#endif /* SUNW_OVERRIDE_RETRY */
+
+
+/* Public */
+
+struct hostent *
+gethostbyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (gethostbyname_p(name, net_data));
+}
+
+struct hostent *
+gethostbyname2(const char *name, int af) {
+ struct net_data *net_data = init();
+
+ return (gethostbyname2_p(name, af, net_data));
+}
+
+struct hostent *
+gethostbyaddr(const char *addr, int len, int af) {
+ struct net_data *net_data = init();
+
+ return (gethostbyaddr_p(addr, len, af, net_data));
+}
+
+struct hostent *
+gethostent() {
+ struct net_data *net_data = init();
+
+ return (gethostent_p(net_data));
+}
+
+void
+sethostent(int stayopen) {
+ struct net_data *net_data = init();
+ sethostent_p(stayopen, net_data);
+}
+
+
+void
+endhostent() {
+ struct net_data *net_data = init();
+ endhostent_p(net_data);
+}
+
+/* Shared private. */
+
+struct hostent *
+gethostbyname_p(const char *name, struct net_data *net_data) {
+ struct hostent *hp;
+
+ if (!net_data)
+ return (NULL);
+
+ if (net_data->res->options & RES_USE_INET6) {
+ hp = gethostbyname2_p(name, AF_INET6, net_data);
+ if (hp)
+ return (hp);
+ }
+ return (gethostbyname2_p(name, AF_INET, net_data));
+}
+
+struct hostent *
+gethostbyname2_p(const char *name, int af, struct net_data *net_data) {
+ struct irs_ho *ho;
+ char tmp[NS_MAXDNAME];
+ struct hostent *hp;
+ const char *cp;
+ char **hap;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_addrtype == af) {
+ if (ns_samename(name, net_data->ho_last->h_name) == 1)
+ return (net_data->ho_last);
+ for (hap = net_data->ho_last->h_aliases; hap && *hap; hap++)
+ if (ns_samename(name, *hap) == 1)
+ return (net_data->ho_last);
+ }
+ if (!strchr(name, '.') && (cp = res_hostalias(net_data->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+ if ((hp = fakeaddr(name, af, net_data)) != NULL)
+ return (hp);
+#ifdef SUNW_OVERRIDE_RETRY
+ net_data->res->retry = __res_retry(net_data->res->retry);
+#endif /* SUNW_OVERRIDE_RETRY */
+ net_data->ho_last = (*ho->byname2)(ho, name, af);
+#ifdef SUNW_OVERRIDE_RETRY
+ net_data->res->retry = __res_retry_reset();
+#endif /* SUNW_OVERRIDE_RETRY */
+ if (!net_data->ho_stayopen)
+ endhostent();
+ return (net_data->ho_last);
+}
+
+struct hostent *
+gethostbyaddr_p(const char *addr, int len, int af, struct net_data *net_data) {
+ struct irs_ho *ho;
+ char **hap;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_length == len)
+ for (hap = net_data->ho_last->h_addr_list;
+ hap && *hap;
+ hap++)
+ if (!memcmp(addr, *hap, len))
+ return (net_data->ho_last);
+ net_data->ho_last = (*ho->byaddr)(ho, addr, len, af);
+ if (!net_data->ho_stayopen)
+ endhostent();
+ return (net_data->ho_last);
+}
+
+
+struct hostent *
+gethostent_p(struct net_data *net_data) {
+ struct irs_ho *ho;
+ struct hostent *hp;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ while ((hp = (*ho->next)(ho)) != NULL &&
+ hp->h_addrtype == AF_INET6 &&
+ (net_data->res->options & RES_USE_INET6) == 0U)
+ continue;
+ net_data->ho_last = hp;
+ return (net_data->ho_last);
+}
+
+
+void
+sethostent_p(int stayopen, struct net_data *net_data) {
+ struct irs_ho *ho;
+
+ if (!net_data || !(ho = net_data->ho))
+ return;
+ freepvt(net_data);
+ (*ho->rewind)(ho);
+ net_data->ho_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endhostent_p(struct net_data *net_data) {
+ struct irs_ho *ho;
+
+ if ((net_data != NULL) && ((ho = net_data->ho) != NULL))
+ (*ho->minimize)(ho);
+}
+
+#ifndef IN6_IS_ADDR_V4COMPAT
+static const unsigned char in6addr_compat[12] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+#define IN6_IS_ADDR_V4COMPAT(x) (!memcmp((x)->s6_addr, in6addr_compat, 12) && \
+ ((x)->s6_addr[12] != 0 || \
+ (x)->s6_addr[13] != 0 || \
+ (x)->s6_addr[14] != 0 || \
+ ((x)->s6_addr[15] != 0 && \
+ (x)->s6_addr[15] != 1)))
+#endif
+#ifndef IN6_IS_ADDR_V4MAPPED
+#define IN6_IS_ADDR_V4MAPPED(x) (!memcmp((x)->s6_addr, in6addr_mapped, 12))
+#endif
+
+static const unsigned char in6addr_mapped[12] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+
+static int scan_interfaces(int *, int *);
+static struct hostent *copyandmerge(struct hostent *, struct hostent *, int, int *);
+
+/*%
+ * Public functions
+ */
+
+/*%
+ * AI_V4MAPPED + AF_INET6
+ * If no IPv6 address then a query for IPv4 and map returned values.
+ *
+ * AI_ALL + AI_V4MAPPED + AF_INET6
+ * Return IPv6 and IPv4 mapped.
+ *
+ * AI_ADDRCONFIG
+ * Only return IPv6 / IPv4 address if there is an interface of that
+ * type active.
+ */
+
+struct hostent *
+getipnodebyname(const char *name, int af, int flags, int *error_num) {
+ int have_v4 = 1, have_v6 = 1;
+ struct in_addr in4;
+ struct in6_addr in6;
+ struct hostent he, *he1 = NULL, *he2 = NULL, *he3;
+ int v4 = 0, v6 = 0;
+ struct net_data *net_data = init();
+ u_long options;
+ int tmp_err;
+
+ if (net_data == NULL) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /* If we care about active interfaces then check. */
+ if ((flags & AI_ADDRCONFIG) != 0)
+ if (scan_interfaces(&have_v4, &have_v6) == -1) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /* Check for literal address. */
+ if ((v4 = inet_pton(AF_INET, name, &in4)) != 1)
+ v6 = inet_pton(AF_INET6, name, &in6);
+
+ /* Impossible combination? */
+
+ if ((af == AF_INET6 && (flags & AI_V4MAPPED) == 0 && v4 == 1) ||
+ (af == AF_INET && v6 == 1) ||
+ (have_v4 == 0 && v4 == 1) ||
+ (have_v6 == 0 && v6 == 1) ||
+ (have_v4 == 0 && af == AF_INET) ||
+ (have_v6 == 0 && af == AF_INET6)) {
+ *error_num = HOST_NOT_FOUND;
+ return (NULL);
+ }
+
+ /* Literal address? */
+ if (v4 == 1 || v6 == 1) {
+ char *addr_list[2];
+ char *aliases[1];
+
+ DE_CONST(name, he.h_name);
+ he.h_addr_list = addr_list;
+ he.h_addr_list[0] = (v4 == 1) ? (char *)&in4 : (char *)&in6;
+ he.h_addr_list[1] = NULL;
+ he.h_aliases = aliases;
+ he.h_aliases[0] = NULL;
+ he.h_length = (v4 == 1) ? INADDRSZ : IN6ADDRSZ;
+ he.h_addrtype = (v4 == 1) ? AF_INET : AF_INET6;
+ return (copyandmerge(&he, NULL, af, error_num));
+ }
+
+ options = net_data->res->options;
+ net_data->res->options &= ~RES_USE_INET6;
+
+ tmp_err = NO_RECOVERY;
+ if (have_v6 && af == AF_INET6) {
+ he2 = gethostbyname2_p(name, AF_INET6, net_data);
+ if (he2 != NULL) {
+ he1 = copyandmerge(he2, NULL, af, error_num);
+ if (he1 == NULL)
+ return (NULL);
+ he2 = NULL;
+ } else {
+ tmp_err = net_data->res->res_h_errno;
+ }
+ }
+
+ if (have_v4 &&
+ ((af == AF_INET) ||
+ (af == AF_INET6 && (flags & AI_V4MAPPED) != 0 &&
+ (he1 == NULL || (flags & AI_ALL) != 0)))) {
+ he2 = gethostbyname2_p(name, AF_INET, net_data);
+ if (he1 == NULL && he2 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ } else
+ *error_num = tmp_err;
+
+ net_data->res->options = options;
+
+ he3 = copyandmerge(he1, he2, af, error_num);
+
+ if (he1 != NULL)
+ freehostent(he1);
+ return (he3);
+}
+
+struct hostent *
+getipnodebyaddr(const void *src, size_t len, int af, int *error_num) {
+ struct hostent *he1, *he2;
+ struct net_data *net_data = init();
+
+ /* Sanity Checks. */
+#ifdef ORIGINAL_ISC_CODE
+ if (src == NULL) {
+#else
+ /* this change was added circa May 2009, but not in ISC libbind 6.0 */
+ if (src == NULL|| net_data == NULL) {
+#endif /* ORIGINAL_ISC_CODE */
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ switch (af) {
+ case AF_INET:
+ if (len != (size_t)INADDRSZ) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+ break;
+ case AF_INET6:
+ if (len != (size_t)IN6ADDRSZ) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+ break;
+ default:
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /*
+ * Lookup IPv4 and IPv4 mapped/compatible addresses
+ */
+ if ((af == AF_INET6 &&
+ IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)src)) ||
+ (af == AF_INET6 &&
+ IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)src)) ||
+ (af == AF_INET)) {
+ const char *cp = src;
+
+ if (af == AF_INET6)
+ cp += 12;
+ he1 = gethostbyaddr_p(cp, 4, AF_INET, net_data);
+ if (he1 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ he2 = copyandmerge(he1, NULL, af, error_num);
+ if (he2 == NULL)
+ return (NULL);
+ /*
+ * Restore original address if mapped/compatible.
+ */
+ if (af == AF_INET6)
+ memcpy(he1->h_addr, src, len);
+ return (he2);
+ }
+
+ /*
+ * Lookup IPv6 address.
+ */
+ if (memcmp((const struct in6_addr *)src, &in6addr_any, 16) == 0) {
+ *error_num = HOST_NOT_FOUND;
+ return (NULL);
+ }
+
+ he1 = gethostbyaddr_p(src, 16, AF_INET6, net_data);
+ if (he1 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ return (copyandmerge(he1, NULL, af, error_num));
+}
+
+void
+freehostent(struct hostent *he) {
+ char **cpp;
+ int names = 1;
+ int addresses = 1;
+
+ memput(he->h_name, strlen(he->h_name) + 1);
+
+ cpp = he->h_addr_list;
+ while (*cpp != NULL) {
+ memput(*cpp, (he->h_addrtype == AF_INET) ?
+ INADDRSZ : IN6ADDRSZ);
+ *cpp = NULL;
+ cpp++;
+ addresses++;
+ }
+
+ cpp = he->h_aliases;
+ while (*cpp != NULL) {
+ memput(*cpp, strlen(*cpp) + 1);
+ cpp++;
+ names++;
+ }
+
+ memput(he->h_aliases, sizeof(char *) * (names));
+ memput(he->h_addr_list, sizeof(char *) * (addresses));
+ memput(he, sizeof *he);
+}
+
+/*%
+ * Private
+ */
+
+/*%
+ * Scan the interface table and set have_v4 and have_v6 depending
+ * upon whether there are IPv4 and IPv6 interface addresses.
+ *
+ * Returns:
+ * 0 on success
+ * -1 on failure.
+ */
+
+#if defined(SIOCGLIFCONF) && defined(SIOCGLIFADDR) && \
+ !defined(IRIX_EMUL_IOCTL_SIOCGIFCONF)
+
+#ifdef __hpux
+#define lifc_len iflc_len
+#define lifc_buf iflc_buf
+#define lifc_req iflc_req
+#define LIFCONF if_laddrconf
+#else
+#define SETFAMILYFLAGS
+#define LIFCONF lifconf
+#endif
+
+#ifdef __hpux
+#define lifr_addr iflr_addr
+#define lifr_name iflr_name
+#define lifr_dstaddr iflr_dstaddr
+#define lifr_flags iflr_flags
+#define ss_family sa_family
+#define LIFREQ if_laddrreq
+#else
+#define LIFREQ lifreq
+#endif
+
+static void
+scan_interfaces6(int *have_v4, int *have_v6) {
+ struct LIFCONF lifc;
+ struct LIFREQ lifreq;
+ struct in_addr in4;
+ struct in6_addr in6;
+ char *buf = NULL, *cp, *cplim;
+ static unsigned int bufsiz = 4095;
+ int s, cpsize, n;
+
+ /* Get interface list from system. */
+ if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) == -1)
+ goto cleanup;
+
+ /*
+ * Grow buffer until large enough to contain all interface
+ * descriptions.
+ */
+ for (;;) {
+ buf = memget(bufsiz);
+ if (buf == NULL)
+ goto cleanup;
+#ifdef SETFAMILYFLAGS
+ lifc.lifc_family = AF_UNSPEC; /*%< request all families */
+ lifc.lifc_flags = 0;
+#endif
+ lifc.lifc_len = bufsiz;
+ lifc.lifc_buf = buf;
+ if ((n = ioctl(s, SIOCGLIFCONF, (char *)&lifc)) != -1) {
+ /*
+ * Some OS's just return what will fit rather
+ * than set EINVAL if the buffer is too small
+ * to fit all the interfaces in. If
+ * lifc.lifc_len is too near to the end of the
+ * buffer we will grow it just in case and
+ * retry.
+ */
+ if (lifc.lifc_len + 2 * sizeof(lifreq) < bufsiz)
+ break;
+ }
+ if ((n == -1) && errno != EINVAL)
+ goto cleanup;
+
+ if (bufsiz > 1000000)
+ goto cleanup;
+
+ memput(buf, bufsiz);
+ bufsiz += 4096;
+ }
+
+ /* Parse system's interface list. */
+ cplim = buf + lifc.lifc_len; /*%< skip over if's with big ifr_addr's */
+ for (cp = buf;
+ (*have_v4 == 0 || *have_v6 == 0) && cp < cplim;
+ cp += cpsize) {
+ memcpy(&lifreq, cp, sizeof lifreq);
+#ifdef HAVE_SA_LEN
+#ifdef FIX_ZERO_SA_LEN
+ if (lifreq.lifr_addr.sa_len == 0)
+ lifreq.lifr_addr.sa_len = 16;
+#endif
+#ifdef HAVE_MINIMUM_IFREQ
+ cpsize = sizeof lifreq;
+ if (lifreq.lifr_addr.sa_len > sizeof (struct sockaddr))
+ cpsize += (int)lifreq.lifr_addr.sa_len -
+ (int)(sizeof (struct sockaddr));
+#else
+ cpsize = sizeof lifreq.lifr_name + lifreq.lifr_addr.sa_len;
+#endif /* HAVE_MINIMUM_IFREQ */
+#elif defined SIOCGIFCONF_ADDR
+ cpsize = sizeof lifreq;
+#else
+ cpsize = sizeof lifreq.lifr_name;
+ /* XXX maybe this should be a hard error? */
+ if (ioctl(s, SIOCGLIFADDR, (char *)&lifreq) < 0)
+ continue;
+#endif
+ switch (lifreq.lifr_addr.ss_family) {
+ case AF_INET:
+ if (*have_v4 == 0) {
+ memcpy(&in4,
+ &((struct sockaddr_in *)
+ &lifreq.lifr_addr)->sin_addr,
+ sizeof in4);
+ if (in4.s_addr == INADDR_ANY)
+ break;
+ n = ioctl(s, SIOCGLIFFLAGS, (char *)&lifreq);
+ if (n < 0)
+ break;
+ if ((lifreq.lifr_flags & IFF_UP) == 0)
+ break;
+ *have_v4 = 1;
+ }
+ break;
+ case AF_INET6:
+ if (*have_v6 == 0) {
+ memcpy(&in6,
+ &((struct sockaddr_in6 *)
+ &lifreq.lifr_addr)->sin6_addr, sizeof in6);
+ if (memcmp(&in6, &in6addr_any, sizeof in6) == 0)
+ break;
+ n = ioctl(s, SIOCGLIFFLAGS, (char *)&lifreq);
+ if (n < 0)
+ break;
+ if ((lifreq.lifr_flags & IFF_UP) == 0)
+ break;
+ *have_v6 = 1;
+ }
+ break;
+ }
+ }
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return;
+ cleanup:
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ if (s != -1)
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return;
+}
+#endif
+
+#if ( defined(__linux__) || defined(__linux) || defined(LINUX) )
+#ifndef IF_NAMESIZE
+# ifdef IFNAMSIZ
+# define IF_NAMESIZE IFNAMSIZ
+# else
+# define IF_NAMESIZE 16
+# endif
+#endif
+static void
+scan_linux6(int *have_v6) {
+ FILE *proc = NULL;
+ char address[33];
+ char name[IF_NAMESIZE+1];
+ int ifindex, prefix, flag3, flag4;
+
+ proc = fopen("/proc/net/if_inet6", "r");
+ if (proc == NULL)
+ return;
+
+ if (fscanf(proc, "%32[a-f0-9] %x %x %x %x %16s\n",
+ address, &ifindex, &prefix, &flag3, &flag4, name) == 6)
+ *have_v6 = 1;
+ fclose(proc);
+ return;
+}
+#endif
+
+static int
+scan_interfaces(int *have_v4, int *have_v6) {
+ struct ifconf ifc;
+ union {
+ char _pad[256]; /*%< leave space for IPv6 addresses */
+ struct ifreq ifreq;
+ } u;
+ struct in_addr in4;
+ struct in6_addr in6;
+ char *buf = NULL, *cp, *cplim;
+ static unsigned int bufsiz = 4095;
+ int s, n;
+ size_t cpsize;
+
+ /* Set to zero. Used as loop terminators below. */
+ *have_v4 = *have_v6 = 0;
+
+#if defined(SIOCGLIFCONF) && defined(SIOCGLIFADDR) && \
+ !defined(IRIX_EMUL_IOCTL_SIOCGIFCONF)
+ /*
+ * Try to scan the interfaces using IPv6 ioctls().
+ */
+ scan_interfaces6(have_v4, have_v6);
+ if (*have_v4 != 0 && *have_v6 != 0)
+ return (0);
+#endif
+#ifdef __linux
+ scan_linux6(have_v6);
+#endif
+
+ /* Get interface list from system. */
+ if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
+ goto err_ret;
+
+ /*
+ * Grow buffer until large enough to contain all interface
+ * descriptions.
+ */
+ for (;;) {
+ buf = memget(bufsiz);
+ if (buf == NULL)
+ goto err_ret;
+ ifc.ifc_len = bufsiz;
+ ifc.ifc_buf = buf;
+#ifdef IRIX_EMUL_IOCTL_SIOCGIFCONF
+ /*
+ * This is a fix for IRIX OS in which the call to ioctl with
+ * the flag SIOCGIFCONF may not return an entry for all the
+ * interfaces like most flavors of Unix.
+ */
+ if (emul_ioctl(&ifc) >= 0)
+ break;
+#else
+ if ((n = ioctl(s, SIOCGIFCONF, (char *)&ifc)) != -1) {
+ /*
+ * Some OS's just return what will fit rather
+ * than set EINVAL if the buffer is too small
+ * to fit all the interfaces in. If
+ * ifc.ifc_len is too near to the end of the
+ * buffer we will grow it just in case and
+ * retry.
+ */
+ if (ifc.ifc_len + 2 * sizeof(u.ifreq) < bufsiz)
+ break;
+ }
+#endif
+ if ((n == -1) && errno != EINVAL)
+ goto err_ret;
+
+ if (bufsiz > 1000000)
+ goto err_ret;
+
+ memput(buf, bufsiz);
+ bufsiz += 4096;
+ }
+
+ /* Parse system's interface list. */
+ cplim = buf + ifc.ifc_len; /*%< skip over if's with big ifr_addr's */
+ for (cp = buf;
+ (*have_v4 == 0 || *have_v6 == 0) && cp < cplim;
+ cp += cpsize) {
+ memcpy(&u.ifreq, cp, sizeof u.ifreq);
+#ifdef HAVE_SA_LEN
+#ifdef FIX_ZERO_SA_LEN
+ if (u.ifreq.ifr_addr.sa_len == 0)
+ u.ifreq.ifr_addr.sa_len = 16;
+#endif
+#ifdef HAVE_MINIMUM_IFREQ
+ cpsize = sizeof u.ifreq;
+ if (u.ifreq.ifr_addr.sa_len > sizeof (struct sockaddr))
+ cpsize += (int)u.ifreq.ifr_addr.sa_len -
+ (int)(sizeof (struct sockaddr));
+#else
+ cpsize = sizeof u.ifreq.ifr_name + u.ifreq.ifr_addr.sa_len;
+#endif /* HAVE_MINIMUM_IFREQ */
+ if (cpsize > sizeof u.ifreq && cpsize <= sizeof u)
+ memcpy(&u.ifreq, cp, cpsize);
+#elif defined SIOCGIFCONF_ADDR
+ cpsize = sizeof u.ifreq;
+#else
+ cpsize = sizeof u.ifreq.ifr_name;
+ /* XXX maybe this should be a hard error? */
+ if (ioctl(s, SIOCGIFADDR, (char *)&u.ifreq) < 0)
+ continue;
+#endif
+ switch (u.ifreq.ifr_addr.sa_family) {
+ case AF_INET:
+ if (*have_v4 == 0) {
+ memcpy(&in4,
+ &((struct sockaddr_in *)
+ &u.ifreq.ifr_addr)->sin_addr,
+ sizeof in4);
+ if (in4.s_addr == INADDR_ANY)
+ break;
+ n = ioctl(s, SIOCGIFFLAGS, (char *)&u.ifreq);
+ if (n < 0)
+ break;
+ if ((u.ifreq.ifr_flags & IFF_UP) == 0)
+ break;
+ *have_v4 = 1;
+ }
+ break;
+ case AF_INET6:
+ if (*have_v6 == 0) {
+ memcpy(&in6,
+ &((struct sockaddr_in6 *)
+ &u.ifreq.ifr_addr)->sin6_addr,
+ sizeof in6);
+ if (memcmp(&in6, &in6addr_any, sizeof in6) == 0)
+ break;
+ n = ioctl(s, SIOCGIFFLAGS, (char *)&u.ifreq);
+ if (n < 0)
+ break;
+ if ((u.ifreq.ifr_flags & IFF_UP) == 0)
+ break;
+ *have_v6 = 1;
+ }
+ break;
+ }
+ }
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return (0);
+ err_ret:
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ if (s != -1)
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return (-1);
+}
+
+static struct hostent *
+copyandmerge(struct hostent *he1, struct hostent *he2, int af, int *error_num) {
+ struct hostent *he = NULL;
+ int addresses = 1; /*%< NULL terminator */
+ int names = 1; /*%< NULL terminator */
+ int len = 0;
+ char **cpp, **npp;
+
+ /*
+ * Work out array sizes;
+ */
+ if (he1 != NULL) {
+ cpp = he1->h_addr_list;
+ while (*cpp != NULL) {
+ addresses++;
+ cpp++;
+ }
+ cpp = he1->h_aliases;
+ while (*cpp != NULL) {
+ names++;
+ cpp++;
+ }
+ }
+
+ if (he2 != NULL) {
+ cpp = he2->h_addr_list;
+ while (*cpp != NULL) {
+ addresses++;
+ cpp++;
+ }
+ if (he1 == NULL) {
+ cpp = he2->h_aliases;
+ while (*cpp != NULL) {
+ names++;
+ cpp++;
+ }
+ }
+ }
+
+ if (addresses == 1) {
+ *error_num = NO_ADDRESS;
+ return (NULL);
+ }
+
+ he = memget(sizeof *he);
+ if (he == NULL)
+ goto no_recovery;
+
+ he->h_addr_list = memget(sizeof(char *) * (addresses));
+ if (he->h_addr_list == NULL)
+ goto cleanup0;
+ memset(he->h_addr_list, 0, sizeof(char *) * (addresses));
+
+ /* copy addresses */
+ npp = he->h_addr_list;
+ if (he1 != NULL) {
+ cpp = he1->h_addr_list;
+ while (*cpp != NULL) {
+ *npp = memget((af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ if (*npp == NULL)
+ goto cleanup1;
+ /* convert to mapped if required */
+ if (af == AF_INET6 && he1->h_addrtype == AF_INET) {
+ memcpy(*npp, in6addr_mapped,
+ sizeof in6addr_mapped);
+ memcpy(*npp + sizeof in6addr_mapped, *cpp,
+ INADDRSZ);
+ } else {
+ memcpy(*npp, *cpp,
+ (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ }
+ cpp++;
+ npp++;
+ }
+ }
+
+ if (he2 != NULL) {
+ cpp = he2->h_addr_list;
+ while (*cpp != NULL) {
+ *npp = memget((af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ if (*npp == NULL)
+ goto cleanup1;
+ /* convert to mapped if required */
+ if (af == AF_INET6 && he2->h_addrtype == AF_INET) {
+ memcpy(*npp, in6addr_mapped,
+ sizeof in6addr_mapped);
+ memcpy(*npp + sizeof in6addr_mapped, *cpp,
+ INADDRSZ);
+ } else {
+ memcpy(*npp, *cpp,
+ (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ }
+ cpp++;
+ npp++;
+ }
+ }
+
+ he->h_aliases = memget(sizeof(char *) * (names));
+ if (he->h_aliases == NULL)
+ goto cleanup1;
+ memset(he->h_aliases, 0, sizeof(char *) * (names));
+
+ /* copy aliases */
+ npp = he->h_aliases;
+ cpp = (he1 != NULL) ? he1->h_aliases : he2->h_aliases;
+ while (*cpp != NULL) {
+ len = strlen (*cpp) + 1;
+ *npp = memget(len);
+ if (*npp == NULL)
+ goto cleanup2;
+ strcpy(*npp, *cpp);
+ npp++;
+ cpp++;
+ }
+
+ /* copy hostname */
+ he->h_name = memget(strlen((he1 != NULL) ?
+ he1->h_name : he2->h_name) + 1);
+ if (he->h_name == NULL)
+ goto cleanup2;
+ strcpy(he->h_name, (he1 != NULL) ? he1->h_name : he2->h_name);
+
+ /* set address type and length */
+ he->h_addrtype = af;
+ he->h_length = (af == AF_INET) ? INADDRSZ : IN6ADDRSZ;
+ return(he);
+
+ cleanup2:
+ cpp = he->h_aliases;
+ while (*cpp != NULL) {
+ memput(*cpp, strlen(*cpp) + 1);
+ cpp++;
+ }
+ memput(he->h_aliases, sizeof(char *) * (names));
+
+ cleanup1:
+ cpp = he->h_addr_list;
+ while (*cpp != NULL) {
+ memput(*cpp, (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ *cpp = NULL;
+ cpp++;
+ }
+ memput(he->h_addr_list, sizeof(char *) * (addresses));
+
+ cleanup0:
+ memput(he, sizeof *he);
+
+ no_recovery:
+ *error_num = NO_RECOVERY;
+ return (NULL);
+}
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ho) {
+ net_data->ho = (*net_data->irs->ho_map)(net_data->irs);
+ if (!net_data->ho || !net_data->res) {
+ error:
+ errno = EIO;
+
+#ifdef SUNW_SETHERRNO
+ h_errno = NETDB_INTERNAL;
+#endif /* SUNW_SETHERRNO */
+ if (net_data && net_data->res)
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ (*net_data->ho->res_set)(net_data->ho, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+static void
+freepvt(struct net_data *net_data) {
+ if (net_data->ho_data) {
+ free(net_data->ho_data);
+ net_data->ho_data = NULL;
+ }
+}
+
+static struct hostent *
+fakeaddr(const char *name, int af, struct net_data *net_data) {
+ struct pvt *pvt;
+
+ freepvt(net_data);
+ net_data->ho_data = malloc(sizeof (struct pvt));
+ if (!net_data->ho_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->ho_data;
+#ifndef __bsdi__
+ /*
+ * Unlike its forebear(inet_aton), our friendly inet_pton() is strict
+ * in its interpretation of its input, and it will only return "1" if
+ * the input string is a formally valid(and thus unambiguous with
+ * respect to host names) internet address specification for this AF.
+ *
+ * This means "telnet 0xdeadbeef" and "telnet 127.1" are dead now.
+ */
+ if (inet_pton(af, name, pvt->addr) != 1) {
+#else
+ /* BSDI XXX
+ * We put this back to inet_aton -- we really want the old behavior
+ * Long live 127.1...
+ */
+ if ((af != AF_INET ||
+ inet_aton(name, (struct in_addr *)pvt->addr) != 1) &&
+ inet_pton(af, name, pvt->addr) != 1) {
+#endif
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ strncpy(pvt->name, name, NS_MAXDNAME);
+ pvt->name[NS_MAXDNAME] = '\0';
+ if (af == AF_INET && (net_data->res->options & RES_USE_INET6) != 0U) {
+ map_v4v6_address(pvt->addr, pvt->addr);
+ af = AF_INET6;
+ }
+ pvt->host.h_addrtype = af;
+ switch(af) {
+ case AF_INET:
+ pvt->host.h_length = NS_INADDRSZ;
+ break;
+ case AF_INET6:
+ pvt->host.h_length = NS_IN6ADDRSZ;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt->host.h_name = pvt->name;
+ pvt->host.h_aliases = pvt->aliases;
+ pvt->aliases[0] = NULL;
+ pvt->addrs[0] = (char *)pvt->addr;
+ pvt->addrs[1] = NULL;
+ pvt->host.h_addr_list = pvt->addrs;
+ RES_SET_H_ERRNO(net_data->res, NETDB_SUCCESS);
+ return (&pvt->host);
+}
+
+#ifdef grot /*%< for future use in gethostbyaddr(), for "SUNSECURITY" */
+ struct hostent *rhp;
+ char **haddr;
+ u_long old_options;
+ char hname2[MAXDNAME+1];
+
+ if (af == AF_INET) {
+ /*
+ * turn off search as the name should be absolute,
+ * 'localhost' should be matched by defnames
+ */
+ strncpy(hname2, hp->h_name, MAXDNAME);
+ hname2[MAXDNAME] = '\0';
+ old_options = net_data->res->options;
+ net_data->res->options &= ~RES_DNSRCH;
+ net_data->res->options |= RES_DEFNAMES;
+ if (!(rhp = gethostbyname(hname2))) {
+ net_data->res->options = old_options;
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ net_data->res->options = old_options;
+ for (haddr = rhp->h_addr_list; *haddr; haddr++)
+ if (!memcmp(*haddr, addr, INADDRSZ))
+ break;
+ if (!*haddr) {
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ }
+#endif /* grot */
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c b/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c
new file mode 100644
index 0000000000..1971677c5d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: gethostent_r.c,v 1.9 2005/09/03 12:41:37 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int gethostent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#pragma redefine extname res_gethostbyname joy_res_gethostbyname
+#pragma redefine extname res_gethostbyaddr joy_res_gethostbyaddr
+
+#ifdef HOST_R_RETURN
+
+static HOST_R_RETURN
+copy_hostent(struct hostent *, struct hostent *, HOST_R_COPY_ARGS);
+
+HOST_R_RETURN
+gethostbyname_r(const char *name, struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostbyname(name);
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+HOST_R_RETURN
+gethostbyaddr_r(const char *addr, int len, int type,
+ struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostbyaddr(addr, len, type);
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+HOST_R_RETURN
+gethostent_r(struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostent();
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+HOST_R_SET_RETURN
+#ifdef HOST_R_ENT_ARGS
+sethostent_r(int stay_open, HOST_R_ENT_ARGS)
+#else
+sethostent_r(int stay_open)
+#endif
+{
+#ifdef HOST_R_ENT_ARGS
+ UNUSED(hdptr);
+#endif
+ sethostent(stay_open);
+#ifdef HOST_R_SET_RESULT
+ return (HOST_R_SET_RESULT);
+#endif
+}
+
+HOST_R_END_RETURN
+#ifdef HOST_R_ENT_ARGS
+endhostent_r(HOST_R_ENT_ARGS)
+#else
+endhostent_r(void)
+#endif
+{
+#ifdef HOST_R_ENT_ARGS
+ UNUSED(hdptr);
+#endif
+ endhostent();
+ HOST_R_END_RESULT(HOST_R_OK);
+}
+
+/* Private */
+
+#ifndef HOSTENT_DATA
+static HOST_R_RETURN
+copy_hostent(struct hostent *he, struct hostent *hptr, HOST_R_COPY_ARGS) {
+ char *cp;
+ char **ptr;
+ int i, n;
+ int nptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ nptr = 2; /*%< NULL ptrs */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; he->h_addr_list[i]; i++, nptr++) {
+ len += he->h_length;
+ }
+ for (i = 0; he->h_aliases[i]; i++, nptr++) {
+ len += strlen(he->h_aliases[i]) + 1;
+ }
+ len += strlen(he->h_name) + 1;
+ len += nptr * sizeof(char*);
+
+ if (len > buflen) {
+ errno = ERANGE;
+ return (HOST_R_BAD);
+ }
+
+ /* copy address size and type */
+ hptr->h_addrtype = he->h_addrtype;
+ n = hptr->h_length = he->h_length;
+
+ ptr = (char **)ALIGN(buf);
+ cp = (char *)ALIGN(buf) + nptr * sizeof(char *);
+
+ /* copy address list */
+ hptr->h_addr_list = ptr;
+ for (i = 0; he->h_addr_list[i]; i++ , ptr++) {
+ memcpy(cp, he->h_addr_list[i], n);
+ hptr->h_addr_list[i] = cp;
+ cp += n;
+ }
+ hptr->h_addr_list[i] = NULL;
+ ptr++;
+
+ /* copy official name */
+ n = strlen(he->h_name) + 1;
+ strcpy(cp, he->h_name);
+ hptr->h_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ hptr->h_aliases = ptr;
+ for (i = 0 ; he->h_aliases[i]; i++) {
+ n = strlen(he->h_aliases[i]) + 1;
+ strcpy(cp, he->h_aliases[i]);
+ hptr->h_aliases[i] = cp;
+ cp += n;
+ }
+ hptr->h_aliases[i] = NULL;
+
+ return (HOST_R_OK);
+}
+#else /* !HOSTENT_DATA */
+static int
+copy_hostent(struct hostent *he, struct hostent *hptr, HOST_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy address size and type */
+ hptr->h_addrtype = he->h_addrtype;
+ n = hptr->h_length = he->h_length;
+
+ /* copy up to first 35 addresses */
+ i = 0;
+ cp = hdptr->hostbuf;
+ eob = hdptr->hostbuf + sizeof(hdptr->hostbuf);
+ hptr->h_addr_list = hdptr->h_addr_ptrs;
+ while (he->h_addr_list[i] && i < (_MAXADDRS)) {
+ if (n < (eob - cp)) {
+ memcpy(cp, he->h_addr_list[i], n);
+ hptr->h_addr_list[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ hptr->h_addr_list[i] = NULL;
+
+ /* copy official name */
+ if ((n = strlen(he->h_name) + 1) < (eob - cp)) {
+ strcpy(cp, he->h_name);
+ hptr->h_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ hptr->h_aliases = hdptr->host_aliases;
+ while (he->h_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(he->h_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, he->h_aliases[i]);
+ hptr->h_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ hptr->h_aliases[i] = NULL;
+
+ return (HOST_R_OK);
+}
+#endif /* !HOSTENT_DATA */
+#else /* HOST_R_RETURN */
+ static int gethostent_r_unknown_system = 0;
+#endif /* HOST_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c b/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c
new file mode 100644
index 0000000000..360bda8bfe
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c
@@ -0,0 +1,334 @@
+/*
+ * Issues to be discussed:
+ * - Thread safe-ness must be checked
+ */
+
+#if ( defined(__linux__) || defined(__linux) || defined(LINUX) )
+#ifndef IF_NAMESIZE
+# ifdef IFNAMSIZ
+# define IF_NAMESIZE IFNAMSIZ
+# else
+# define IF_NAMESIZE 16
+# endif
+#endif
+#endif
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by WIDE Project and
+ * its contributors.
+ * 4. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <port_before.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <stddef.h>
+
+#include <port_after.h>
+
+/*%
+ * Note that a_off will be dynamically adjusted so that to be consistent
+ * with the definition of sockaddr_in{,6}.
+ * The value presented below is just a guess.
+ */
+static struct afd {
+ int a_af;
+ int a_addrlen;
+ size_t a_socklen;
+ int a_off;
+} afdl [] = {
+ /* first entry is linked last... */
+ {PF_INET, sizeof(struct in_addr), sizeof(struct sockaddr_in),
+ offsetof(struct sockaddr_in, sin_addr)},
+ {PF_INET6, sizeof(struct in6_addr), sizeof(struct sockaddr_in6),
+ offsetof(struct sockaddr_in6, sin6_addr)},
+ {0, 0, 0, 0},
+};
+
+struct sockinet {
+#ifdef HAVE_SA_LEN
+ u_char si_len;
+#endif
+ u_char si_family;
+ u_short si_port;
+};
+
+static int ip6_parsenumeric __P((const struct sockaddr *, const char *, char *,
+ size_t, int));
+#ifdef HAVE_SIN6_SCOPE_ID
+static int ip6_sa2str __P((const struct sockaddr_in6 *, char *, size_t, int));
+#endif
+
+int
+getnameinfo(sa, salen, host, hostlen, serv, servlen, flags)
+ const struct sockaddr *sa;
+ size_t salen;
+ char *host;
+ size_t hostlen;
+ char *serv;
+ size_t servlen;
+ int flags;
+{
+ struct afd *afd;
+ struct servent *sp;
+ struct hostent *hp;
+ u_short port;
+#ifdef HAVE_SA_LEN
+ size_t len;
+#endif
+ int family, i;
+ const char *addr;
+ char *p;
+ char numserv[512];
+ char numaddr[512];
+ const struct sockaddr_in6 *sin6;
+
+ if (sa == NULL)
+ return EAI_FAIL;
+
+#ifdef HAVE_SA_LEN
+ len = sa->sa_len;
+ if (len != salen) return EAI_FAIL;
+#endif
+
+ family = sa->sa_family;
+ for (i = 0; afdl[i].a_af; i++)
+ if (afdl[i].a_af == family) {
+ afd = &afdl[i];
+ goto found;
+ }
+ return EAI_FAMILY;
+
+ found:
+ if (salen != afd->a_socklen) return EAI_FAIL;
+
+ port = ((const struct sockinet *)sa)->si_port; /*%< network byte order */
+ addr = (const char *)sa + afd->a_off;
+
+ if (serv == NULL || servlen == 0U) {
+ /*
+ * rfc2553bis says that serv == NULL or servlen == 0 means that
+ * the caller does not want the result.
+ */
+ } else if (flags & NI_NUMERICSERV) {
+ sprintf(numserv, "%d", ntohs(port));
+ if (strlen(numserv) > servlen)
+ return EAI_MEMORY;
+ strcpy(serv, numserv);
+ } else {
+ sp = getservbyport(port, (flags & NI_DGRAM) ? "udp" : "tcp");
+ if (sp) {
+ if (strlen(sp->s_name) + 1 > servlen)
+ return EAI_MEMORY;
+ strcpy(serv, sp->s_name);
+ } else
+ return EAI_NONAME;
+ }
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ if (ntohl(*(const u_int32_t *)addr) >> IN_CLASSA_NSHIFT == 0)
+ flags |= NI_NUMERICHOST;
+ break;
+ case AF_INET6:
+ sin6 = (const struct sockaddr_in6 *)sa;
+ switch (sin6->sin6_addr.s6_addr[0]) {
+ case 0x00:
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+ ;
+ else if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
+ ;
+ else
+ flags |= NI_NUMERICHOST;
+ break;
+ default:
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+ flags |= NI_NUMERICHOST;
+ else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ flags |= NI_NUMERICHOST;
+ break;
+ }
+ break;
+ }
+ if (host == NULL || hostlen == 0U) {
+ /*
+ * rfc2553bis says that host == NULL or hostlen == 0 means that
+ * the caller does not want the result.
+ */
+ } else if (flags & NI_NUMERICHOST) {
+ goto numeric;
+ } else {
+ hp = gethostbyaddr(addr, afd->a_addrlen, afd->a_af);
+
+ if (hp) {
+ if (flags & NI_NOFQDN) {
+ p = strchr(hp->h_name, '.');
+ if (p) *p = '\0';
+ }
+ if (strlen(hp->h_name) + 1 > hostlen)
+ return EAI_MEMORY;
+ strcpy(host, hp->h_name);
+ } else {
+ if (flags & NI_NAMEREQD)
+ return EAI_NONAME;
+ numeric:
+ switch(afd->a_af) {
+ case AF_INET6:
+ {
+ int error;
+
+ if ((error = ip6_parsenumeric(sa, addr, host,
+ hostlen,
+ flags)) != 0)
+ return(error);
+ break;
+ }
+
+ default:
+ if (inet_ntop(afd->a_af, addr, numaddr,
+ sizeof(numaddr)) == NULL)
+ return EAI_NONAME;
+ if (strlen(numaddr) + 1 > hostlen)
+ return EAI_MEMORY;
+ strcpy(host, numaddr);
+ }
+ }
+ }
+ return(0);
+}
+
+static int
+ip6_parsenumeric(const struct sockaddr *sa, const char *addr, char *host,
+ size_t hostlen, int flags)
+{
+ size_t numaddrlen;
+ char numaddr[512];
+
+#ifndef HAVE_SIN6_SCOPE_ID
+ UNUSED(sa);
+ UNUSED(flags);
+#endif
+
+ if (inet_ntop(AF_INET6, addr, numaddr, sizeof(numaddr))
+ == NULL)
+ return EAI_SYSTEM;
+
+ numaddrlen = strlen(numaddr);
+ if (numaddrlen + 1 > hostlen) /*%< don't forget terminator */
+ return EAI_MEMORY;
+ strcpy(host, numaddr);
+
+#ifdef HAVE_SIN6_SCOPE_ID
+ if (((const struct sockaddr_in6 *)sa)->sin6_scope_id) {
+ char scopebuf[MAXHOSTNAMELEN]; /*%< XXX */
+ int scopelen;
+
+ /* ip6_sa2str never fails */
+ scopelen = ip6_sa2str((const struct sockaddr_in6 *)sa,
+ scopebuf, sizeof(scopebuf), flags);
+
+ if (scopelen + 1 + numaddrlen + 1 > hostlen)
+ return EAI_MEMORY;
+
+ /* construct <numeric-addr><delim><scopeid> */
+ memcpy(host + numaddrlen + 1, scopebuf,
+ scopelen);
+ host[numaddrlen] = SCOPE_DELIMITER;
+ host[numaddrlen + 1 + scopelen] = '\0';
+ }
+#endif
+
+ return 0;
+}
+
+#ifdef HAVE_SIN6_SCOPE_ID
+/* ARGSUSED */
+static int
+ip6_sa2str(const struct sockaddr_in6 *sa6, char *buf,
+ size_t bufsiz, int flags)
+{
+#ifdef USE_IFNAMELINKID
+ unsigned int ifindex = (unsigned int)sa6->sin6_scope_id;
+ const struct in6_addr *a6 = &sa6->sin6_addr;
+#endif
+ char tmp[64];
+
+#ifdef NI_NUMERICSCOPE
+ if (flags & NI_NUMERICSCOPE) {
+ sprintf(tmp, "%u", sa6->sin6_scope_id);
+ if (bufsiz != 0U) {
+ strncpy(buf, tmp, bufsiz - 1);
+ buf[bufsiz - 1] = '\0';
+ }
+ return(strlen(tmp));
+ }
+#endif
+
+#ifdef USE_IFNAMELINKID
+ /*
+ * For a link-local address, convert the index to an interface
+ * name, assuming a one-to-one mapping between links and interfaces.
+ * Note, however, that this assumption is stronger than the
+ * specification of the scoped address architecture; the
+ * specficication says that more than one interfaces can belong to
+ * a single link.
+ */
+
+ /* if_indextoname() does not take buffer size. not a good api... */
+ if ((IN6_IS_ADDR_LINKLOCAL(a6) || IN6_IS_ADDR_MC_LINKLOCAL(a6)) &&
+ bufsiz >= IF_NAMESIZE) {
+ char *p = if_indextoname(ifindex, buf);
+ if (p) {
+ return(strlen(p));
+ }
+ }
+#endif
+
+ /* last resort */
+ sprintf(tmp, "%u", sa6->sin6_scope_id);
+ if (bufsiz != 0U) {
+ strncpy(buf, tmp, bufsiz - 1);
+ buf[bufsiz - 1] = '\0';
+ }
+ return(strlen(tmp));
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetent.c b/usr/src/lib/libresolv2_joy/common/irs/getnetent.c
new file mode 100644
index 0000000000..0d00699e81
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetent.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getnetent.c,v 1.7 2005/04/27 04:56:25 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "irs_data.h"
+
+/* Definitions */
+
+struct pvt {
+ struct netent netent;
+ char * aliases[1];
+ char name[MAXDNAME + 1];
+};
+
+/* Forward */
+
+static struct net_data *init(void);
+static struct netent *nw_to_net(struct nwent *, struct net_data *);
+static void freepvt(struct net_data *);
+static struct netent *fakeaddr(const char *, int af, struct net_data *);
+
+/* Portability */
+
+#ifndef INADDR_NONE
+# define INADDR_NONE 0xffffffff
+#endif
+
+/* Public */
+
+struct netent *
+getnetent() {
+ struct net_data *net_data = init();
+
+ return (getnetent_p(net_data));
+}
+
+struct netent *
+getnetbyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (getnetbyname_p(name, net_data));
+}
+
+struct netent *
+getnetbyaddr(unsigned long net, int type) {
+ struct net_data *net_data = init();
+
+ return (getnetbyaddr_p(net, type, net_data));
+}
+
+void
+setnetent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setnetent_p(stayopen, net_data);
+}
+
+
+void
+endnetent() {
+ struct net_data *net_data = init();
+
+ endnetent_p(net_data);
+}
+
+/* Shared private. */
+
+struct netent *
+getnetent_p(struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ net_data->nww_last = (*nw->next)(nw);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ return (net_data->nw_last);
+}
+
+struct netent *
+getnetbyname_p(const char *name, struct net_data *net_data) {
+ struct irs_nw *nw;
+ struct netent *np;
+ char **nap;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ if (net_data->nw_stayopen && net_data->nw_last) {
+ if (!strcmp(net_data->nw_last->n_name, name))
+ return (net_data->nw_last);
+ for (nap = net_data->nw_last->n_aliases; nap && *nap; nap++)
+ if (!strcmp(name, *nap))
+ return (net_data->nw_last);
+ }
+ if ((np = fakeaddr(name, AF_INET, net_data)) != NULL)
+ return (np);
+ net_data->nww_last = (*nw->byname)(nw, name, AF_INET);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ if (!net_data->nw_stayopen)
+ endnetent();
+ return (net_data->nw_last);
+}
+
+struct netent *
+getnetbyaddr_p(unsigned long net, int type, struct net_data *net_data) {
+ struct irs_nw *nw;
+ u_char addr[4];
+ int bits;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ if (net_data->nw_stayopen && net_data->nw_last)
+ if (type == net_data->nw_last->n_addrtype &&
+ net == net_data->nw_last->n_net)
+ return (net_data->nw_last);
+
+ /* cannonize net(host order) */
+ if (net < 256UL) {
+ net <<= 24;
+ bits = 8;
+ } else if (net < 65536UL) {
+ net <<= 16;
+ bits = 16;
+ } else if (net < 16777216UL) {
+ net <<= 8;
+ bits = 24;
+ } else
+ bits = 32;
+
+ /* convert to net order */
+ addr[0] = (0xFF000000 & net) >> 24;
+ addr[1] = (0x00FF0000 & net) >> 16;
+ addr[2] = (0x0000FF00 & net) >> 8;
+ addr[3] = (0x000000FF & net);
+
+ /* reduce bits to as close to natural number as possible */
+ if ((bits == 32) && (addr[0] < 224) && (addr[3] == 0)) {
+ if ((addr[0] < 192) && (addr[2] == 0)) {
+ if ((addr[0] < 128) && (addr[1] == 0))
+ bits = 8;
+ else
+ bits = 16;
+ } else {
+ bits = 24;
+ }
+ }
+
+ net_data->nww_last = (*nw->byaddr)(nw, addr, bits, AF_INET);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ if (!net_data->nw_stayopen)
+ endnetent();
+ return (net_data->nw_last);
+}
+
+
+
+
+void
+setnetent_p(int stayopen, struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if (!net_data || !(nw = net_data->nw))
+ return;
+ freepvt(net_data);
+ (*nw->rewind)(nw);
+ net_data->nw_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endnetent_p(struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if ((net_data != NULL) && ((nw = net_data->nw) != NULL))
+ (*nw->minimize)(nw);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->nw) {
+ net_data->nw = (*net_data->irs->nw_map)(net_data->irs);
+
+ if (!net_data->nw || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->nw->res_set)(net_data->nw, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+static void
+freepvt(struct net_data *net_data) {
+ if (net_data->nw_data) {
+ free(net_data->nw_data);
+ net_data->nw_data = NULL;
+ }
+}
+
+static struct netent *
+fakeaddr(const char *name, int af, struct net_data *net_data) {
+ struct pvt *pvt;
+ const char *cp;
+ u_long tmp;
+
+ if (af != AF_INET) {
+ /* XXX should support IPv6 some day */
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (!isascii((unsigned char)(name[0])) ||
+ !isdigit((unsigned char)(name[0])))
+ return (NULL);
+ for (cp = name; *cp; ++cp)
+ if (!isascii(*cp) || (!isdigit((unsigned char)*cp) && *cp != '.'))
+ return (NULL);
+ if (*--cp == '.')
+ return (NULL);
+
+ /* All-numeric, no dot at the end. */
+
+ tmp = inet_network(name);
+ if (tmp == INADDR_NONE) {
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+
+ /* Valid network number specified.
+ * Fake up a netent as if we'd actually
+ * done a lookup.
+ */
+ freepvt(net_data);
+ net_data->nw_data = malloc(sizeof (struct pvt));
+ if (!net_data->nw_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->nw_data;
+
+ strncpy(pvt->name, name, MAXDNAME);
+ pvt->name[MAXDNAME] = '\0';
+ pvt->netent.n_name = pvt->name;
+ pvt->netent.n_addrtype = AF_INET;
+ pvt->netent.n_aliases = pvt->aliases;
+ pvt->aliases[0] = NULL;
+ pvt->netent.n_net = tmp;
+
+ return (&pvt->netent);
+}
+
+static struct netent *
+nw_to_net(struct nwent *nwent, struct net_data *net_data) {
+ struct pvt *pvt;
+ u_long addr = 0;
+ int i;
+ int msbyte;
+
+ if (!nwent || nwent->n_addrtype != AF_INET)
+ return (NULL);
+ freepvt(net_data);
+ net_data->nw_data = malloc(sizeof (struct pvt));
+ if (!net_data->nw_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->nw_data;
+ pvt->netent.n_name = nwent->n_name;
+ pvt->netent.n_aliases = nwent->n_aliases;
+ pvt->netent.n_addrtype = nwent->n_addrtype;
+
+/*%
+ * What this code does: Converts net addresses from network to host form.
+ *
+ * msbyte: the index of the most significant byte in the n_addr array.
+ *
+ * Shift bytes in significant order into addr. When all signicant
+ * bytes are in, zero out bits in the LSB that are not part of the network.
+ */
+ msbyte = nwent->n_length / 8 +
+ ((nwent->n_length % 8) != 0 ? 1 : 0) - 1;
+ for (i = 0; i <= msbyte; i++)
+ addr = (addr << 8) | ((unsigned char *)nwent->n_addr)[i];
+ i = (32 - nwent->n_length) % 8;
+ if (i != 0)
+ addr &= ~((1 << (i + 1)) - 1);
+ pvt->netent.n_net = addr;
+ return (&pvt->netent);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c
new file mode 100644
index 0000000000..9fb52bc394
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetent_r.c,v 1.6 2005/09/03 12:41:38 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getnetent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#ifdef NET_R_RETURN
+
+static NET_R_RETURN
+copy_netent(struct netent *, struct netent *, NET_R_COPY_ARGS);
+
+NET_R_RETURN
+getnetbyname_r(const char *name, struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetbyname(name);
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+#ifndef GETNETBYADDR_ADDR_T
+#define GETNETBYADDR_ADDR_T long
+#endif
+NET_R_RETURN
+getnetbyaddr_r(GETNETBYADDR_ADDR_T addr, int type, struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetbyaddr(addr, type);
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+NET_R_RETURN
+getnetent_r(struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetent();
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+NET_R_SET_RETURN
+#ifdef NET_R_ENT_ARGS
+setnetent_r(int stay_open, NET_R_ENT_ARGS)
+#else
+setnetent_r(int stay_open)
+#endif
+{
+#ifdef NET_R_ENT_ARGS
+ UNUSED(ndptr);
+#endif
+ setnetent(stay_open);
+#ifdef NET_R_SET_RESULT
+ return (NET_R_SET_RESULT);
+#endif
+}
+
+NET_R_END_RETURN
+#ifdef NET_R_ENT_ARGS
+endnetent_r(NET_R_ENT_ARGS)
+#else
+endnetent_r()
+#endif
+{
+#ifdef NET_R_ENT_ARGS
+ UNUSED(ndptr);
+#endif
+ endnetent();
+ NET_R_END_RESULT(NET_R_OK);
+}
+
+/* Private */
+
+#ifndef NETENT_DATA
+static NET_R_RETURN
+copy_netent(struct netent *ne, struct netent *nptr, NET_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; ne->n_aliases[i]; i++, numptr++) {
+ len += strlen(ne->n_aliases[i]) + 1;
+ }
+ len += strlen(ne->n_name) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (NET_R_BAD);
+ }
+
+ /* copy net value and type */
+ nptr->n_addrtype = ne->n_addrtype;
+ nptr->n_net = ne->n_net;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(ne->n_name) + 1;
+ strcpy(cp, ne->n_name);
+ nptr->n_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ nptr->n_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; ne->n_aliases[i]; i++) {
+ n = strlen(ne->n_aliases[i]) + 1;
+ strcpy(cp, ne->n_aliases[i]);
+ nptr->n_aliases[i] = cp;
+ cp += n;
+ }
+ nptr->n_aliases[i] = NULL;
+
+ return (NET_R_OK);
+}
+#else /* !NETENT_DATA */
+static int
+copy_netent(struct netent *ne, struct netent *nptr, NET_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy net value and type */
+ nptr->n_addrtype = ne->n_addrtype;
+ nptr->n_net = ne->n_net;
+
+ /* copy official name */
+ cp = ndptr->line;
+ eob = ndptr->line + sizeof(ndptr->line);
+ if ((n = strlen(ne->n_name) + 1) < (eob - cp)) {
+ strcpy(cp, ne->n_name);
+ nptr->n_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ nptr->n_aliases = ndptr->net_aliases;
+ while (ne->n_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(ne->n_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, ne->n_aliases[i]);
+ nptr->n_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ nptr->n_aliases[i] = NULL;
+
+ return (NET_R_OK);
+}
+#endif /* !NETENT_DATA */
+#else /* NET_R_RETURN */
+ static int getnetent_r_unknown_system = 0;
+#endif /* NET_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c
new file mode 100644
index 0000000000..40b3e5a8ad
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetgrent.c,v 1.6 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+
+/* Public */
+
+#ifndef SETNETGRENT_ARGS
+#define SETNETGRENT_ARGS const char *netgroup
+#endif
+void
+setnetgrent(SETNETGRENT_ARGS) {
+ struct net_data *net_data = init();
+
+ setnetgrent_p(netgroup, net_data);
+}
+
+void
+endnetgrent(void) {
+ struct net_data *net_data = init();
+
+ endnetgrent_p(net_data);
+}
+
+#ifndef INNETGR_ARGS
+#define INNETGR_ARGS const char *netgroup, const char *host, \
+ const char *user, const char *domain
+#endif
+int
+innetgr(INNETGR_ARGS) {
+ struct net_data *net_data = init();
+
+ return (innetgr_p(netgroup, host, user, domain, net_data));
+}
+
+int
+getnetgrent(NGR_R_CONST char **host, NGR_R_CONST char **user,
+ NGR_R_CONST char **domain)
+{
+ struct net_data *net_data = init();
+ const char *ch, *cu, *cd;
+ int ret;
+
+ ret = getnetgrent_p(&ch, &cu, &cd, net_data);
+ if (ret != 1)
+ return (ret);
+
+ DE_CONST(ch, *host);
+ DE_CONST(cu, *user);
+ DE_CONST(cd, *domain);
+ return (ret);
+}
+
+/* Shared private. */
+
+void
+setnetgrent_p(const char *netgroup, struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if ((net_data != NULL) && ((ng = net_data->ng) != NULL))
+ (*ng->rewind)(ng, netgroup);
+}
+
+void
+endnetgrent_p(struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if (!net_data)
+ return;
+ if ((ng = net_data->ng) != NULL)
+ (*ng->close)(ng);
+ net_data->ng = NULL;
+}
+
+int
+innetgr_p(const char *netgroup, const char *host,
+ const char *user, const char *domain,
+ struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if (!net_data || !(ng = net_data->ng))
+ return (0);
+ return ((*ng->test)(ng, netgroup, host, user, domain));
+}
+
+int
+getnetgrent_p(const char **host, const char **user, const char **domain,
+ struct net_data *net_data ) {
+ struct irs_ng *ng;
+
+ if (!net_data || !(ng = net_data->ng))
+ return (0);
+ return ((*ng->next)(ng, host, user, domain));
+}
+
+/* Private */
+
+static struct net_data *
+init(void) {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ng) {
+ net_data->ng = (*net_data->irs->ng_map)(net_data->irs);
+ if (!net_data->ng) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c
new file mode 100644
index 0000000000..aa8810320d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetgrent_r.c,v 1.14 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getnetgrent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <port_after.h>
+
+#ifdef NGR_R_RETURN
+#ifndef NGR_R_PRIVATE
+#define NGR_R_PRIVATE 0
+#endif
+
+static NGR_R_RETURN
+copy_protoent(NGR_R_CONST char **, NGR_R_CONST char **, NGR_R_CONST char **,
+ const char *, const char *, const char *, NGR_R_COPY_ARGS);
+
+NGR_R_RETURN
+innetgr_r(const char *netgroup, const char *host, const char *user,
+ const char *domain) {
+ char *ng, *ho, *us, *dom;
+
+ DE_CONST(netgroup, ng);
+ DE_CONST(host, ho);
+ DE_CONST(user, us);
+ DE_CONST(domain, dom);
+
+ return (innetgr(ng, ho, us, dom));
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+NGR_R_RETURN
+getnetgrent_r(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, NGR_R_ARGS)
+{
+ NGR_R_CONST char *mp, *up, *dp;
+ int res = getnetgrent(&mp, &up, &dp);
+
+ if (res != 1)
+ return (res);
+
+ return (copy_protoent(machinep, userp, domainp,
+ mp, up, dp, NGR_R_COPY));
+}
+
+#if NGR_R_PRIVATE == 2
+struct private {
+ char *buf;
+};
+
+#endif
+NGR_R_SET_RETURN
+#ifdef NGR_R_SET_ARGS
+setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS)
+#else
+setnetgrent_r(NGR_R_SET_CONST char *netgroup)
+#endif
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p;
+#endif
+ char *tmp;
+#if defined(NGR_R_SET_ARGS) && NGR_R_PRIVATE == 0
+ UNUSED(buf);
+ UNUSED(buflen);
+#endif
+
+ DE_CONST(netgroup, tmp);
+ setnetgrent(tmp);
+
+#if NGR_R_PRIVATE == 1
+ *buf = NULL;
+#elif NGR_R_PRIVATE == 2
+ *buf = p = malloc(sizeof(struct private));
+ if (p == NULL)
+#ifdef NGR_R_SET_RESULT
+ return (NGR_R_BAD);
+#else
+ return;
+#endif
+ p->buf = NULL;
+#endif
+#ifdef NGR_R_SET_RESULT
+ return (NGR_R_SET_RESULT);
+#endif
+}
+
+NGR_R_END_RETURN
+#ifdef NGR_R_END_ARGS
+endnetgrent_r(NGR_R_END_ARGS)
+#else
+endnetgrent_r(void)
+#endif
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p = buf;
+#endif
+#if defined(NGR_R_SET_ARGS) && NGR_R_PRIVATE == 0
+ UNUSED(buf);
+ UNUSED(buflen);
+#endif
+
+ endnetgrent();
+#if NGR_R_PRIVATE == 1
+ if (*buf != NULL)
+ free(*buf);
+ *buf = NULL;
+#elif NGR_R_PRIVATE == 2
+ if (p->buf != NULL)
+ free(p->buf);
+ free(p);
+#endif
+ NGR_R_END_RESULT(NGR_R_OK);
+}
+
+/* Private */
+
+static int
+copy_protoent(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, const char *mp, const char *up,
+ const char *dp, NGR_R_COPY_ARGS)
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p = buf;
+#endif
+ char *cp;
+ int n;
+ int len;
+
+ /* Find out the amount of space required to store the answer. */
+ len = 0;
+ if (mp != NULL) len += strlen(mp) + 1;
+ if (up != NULL) len += strlen(up) + 1;
+ if (dp != NULL) len += strlen(dp) + 1;
+
+#if NGR_R_PRIVATE == 1
+ if (*buf != NULL)
+ free(*buf);
+ *buf = malloc(len);
+ if (*buf == NULL)
+ return(NGR_R_BAD);
+ cp = *buf;
+#elif NGR_R_PRIVATE == 2
+ if (p->buf)
+ free(p->buf);
+ p->buf = malloc(len);
+ if (p->buf == NULL)
+ return(NGR_R_BAD);
+ cp = p->buf;
+#else
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (NGR_R_BAD);
+ }
+ cp = buf;
+#endif
+
+ if (mp != NULL) {
+ n = strlen(mp) + 1;
+ strcpy(cp, mp);
+ *machinep = cp;
+ cp += n;
+ } else
+ *machinep = NULL;
+
+ if (up != NULL) {
+ n = strlen(up) + 1;
+ strcpy(cp, up);
+ *userp = cp;
+ cp += n;
+ } else
+ *userp = NULL;
+
+ if (dp != NULL) {
+ n = strlen(dp) + 1;
+ strcpy(cp, dp);
+ *domainp = cp;
+ cp += n;
+ } else
+ *domainp = NULL;
+
+ return (NGR_R_OK);
+}
+#else /* NGR_R_RETURN */
+ static int getnetgrent_r_unknown_system = 0;
+#endif /* NGR_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c b/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c
new file mode 100644
index 0000000000..32a1040916
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getprotoent.c,v 1.4 2005/04/27 04:56:26 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+/* Public */
+
+struct protoent *
+getprotoent() {
+ struct net_data *net_data = init();
+
+ return (getprotoent_p(net_data));
+}
+
+struct protoent *
+getprotobyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (getprotobyname_p(name, net_data));
+}
+
+struct protoent *
+getprotobynumber(int proto) {
+ struct net_data *net_data = init();
+
+ return (getprotobynumber_p(proto, net_data));
+}
+
+void
+setprotoent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setprotoent_p(stayopen, net_data);
+}
+
+void
+endprotoent() {
+ struct net_data *net_data = init();
+
+ endprotoent_p(net_data);
+}
+
+/* Shared private. */
+
+struct protoent *
+getprotoent_p(struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ net_data->pr_last = (*pr->next)(pr);
+ return (net_data->pr_last);
+}
+
+struct protoent *
+getprotobyname_p(const char *name, struct net_data *net_data) {
+ struct irs_pr *pr;
+ char **pap;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ if (net_data->pr_stayopen && net_data->pr_last) {
+ if (!strcmp(net_data->pr_last->p_name, name))
+ return (net_data->pr_last);
+ for (pap = net_data->pr_last->p_aliases; pap && *pap; pap++)
+ if (!strcmp(name, *pap))
+ return (net_data->pr_last);
+ }
+ net_data->pr_last = (*pr->byname)(pr, name);
+ if (!net_data->pr_stayopen)
+ endprotoent();
+ return (net_data->pr_last);
+}
+
+struct protoent *
+getprotobynumber_p(int proto, struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ if (net_data->pr_stayopen && net_data->pr_last)
+ if (net_data->pr_last->p_proto == proto)
+ return (net_data->pr_last);
+ net_data->pr_last = (*pr->bynumber)(pr, proto);
+ if (!net_data->pr_stayopen)
+ endprotoent();
+ return (net_data->pr_last);
+}
+
+void
+setprotoent_p(int stayopen, struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return;
+ (*pr->rewind)(pr);
+ net_data->pr_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endprotoent_p(struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if ((net_data != NULL) && ((pr = net_data->pr) != NULL))
+ (*pr->minimize)(pr);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->pr) {
+ net_data->pr = (*net_data->irs->pr_map)(net_data->irs);
+
+ if (!net_data->pr || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->pr->res_set)(net_data->pr, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c
new file mode 100644
index 0000000000..d5d9ae53b6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getprotoent_r.c,v 1.6 2006/08/01 01:14:16 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getprotoent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <port_after.h>
+
+#ifdef PROTO_R_RETURN
+
+static PROTO_R_RETURN
+copy_protoent(struct protoent *, struct protoent *, PROTO_R_COPY_ARGS);
+
+PROTO_R_RETURN
+getprotobyname_r(const char *name, struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotobyname(name);
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+PROTO_R_RETURN
+getprotobynumber_r(int proto, struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotobynumber(proto);
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+PROTO_R_RETURN
+getprotoent_r(struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotoent();
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+PROTO_R_SET_RETURN
+#ifdef PROTO_R_ENT_ARGS
+setprotoent_r(int stay_open, PROTO_R_ENT_ARGS)
+#else
+setprotoent_r(int stay_open)
+#endif
+{
+#ifdef PROTO_R_ENT_UNUSED
+ PROTO_R_ENT_UNUSED;
+#endif
+ setprotoent(stay_open);
+#ifdef PROTO_R_SET_RESULT
+ return (PROTO_R_SET_RESULT);
+#endif
+}
+
+PROTO_R_END_RETURN
+#ifdef PROTO_R_ENT_ARGS
+endprotoent_r(PROTO_R_ENT_ARGS)
+#else
+endprotoent_r()
+#endif
+{
+#ifdef PROTO_R_ENT_UNUSED
+ PROTO_R_ENT_UNUSED;
+#endif
+ endprotoent();
+ PROTO_R_END_RESULT(PROTO_R_OK);
+}
+
+/* Private */
+
+#ifndef PROTOENT_DATA
+static PROTO_R_RETURN
+copy_protoent(struct protoent *pe, struct protoent *pptr, PROTO_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; pe->p_aliases[i]; i++, numptr++) {
+ len += strlen(pe->p_aliases[i]) + 1;
+ }
+ len += strlen(pe->p_name) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (PROTO_R_BAD);
+ }
+
+ /* copy protocol value*/
+ pptr->p_proto = pe->p_proto;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(pe->p_name) + 1;
+ strcpy(cp, pe->p_name);
+ pptr->p_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ pptr->p_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; pe->p_aliases[i]; i++) {
+ n = strlen(pe->p_aliases[i]) + 1;
+ strcpy(cp, pe->p_aliases[i]);
+ pptr->p_aliases[i] = cp;
+ cp += n;
+ }
+ pptr->p_aliases[i] = NULL;
+
+ return (PROTO_R_OK);
+}
+#else /* !PROTOENT_DATA */
+static int
+copy_protoent(struct protoent *pe, struct protoent *pptr, PROTO_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy protocol value */
+ pptr->p_proto = pe->p_proto;
+
+ /* copy official name */
+ cp = pdptr->line;
+ eob = pdptr->line + sizeof(pdptr->line);
+ if ((n = strlen(pe->p_name) + 1) < (eob - cp)) {
+ strcpy(cp, pe->p_name);
+ pptr->p_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ pptr->p_aliases = pdptr->proto_aliases;
+ while (pe->p_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(pe->p_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, pe->p_aliases[i]);
+ pptr->p_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ pptr->p_aliases[i] = NULL;
+
+ return (PROTO_R_OK);
+}
+#endif /* PROTOENT_DATA */
+#else /* PROTO_R_RETURN */
+ static int getprotoent_r_unknown_system = 0;
+#endif /* PROTO_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getservent.c b/usr/src/lib/libresolv2_joy/common/irs/getservent.c
new file mode 100644
index 0000000000..31af67e659
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getservent.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getservent.c,v 1.4 2005/04/27 04:56:26 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+/* Public */
+
+struct servent *
+getservent(void) {
+ struct net_data *net_data = init();
+
+ return (getservent_p(net_data));
+}
+
+struct servent *
+getservbyname(const char *name, const char *proto) {
+ struct net_data *net_data = init();
+
+ return (getservbyname_p(name, proto, net_data));
+}
+
+struct servent *
+getservbyport(int port, const char *proto) {
+ struct net_data *net_data = init();
+
+ return (getservbyport_p(port, proto, net_data));
+}
+
+void
+setservent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setservent_p(stayopen, net_data);
+}
+
+void
+endservent() {
+ struct net_data *net_data = init();
+
+ endservent_p(net_data);
+}
+
+/* Shared private. */
+
+struct servent *
+getservent_p(struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ net_data->sv_last = (*sv->next)(sv);
+ return (net_data->sv_last);
+}
+
+struct servent *
+getservbyname_p(const char *name, const char *proto,
+ struct net_data *net_data) {
+ struct irs_sv *sv;
+ char **sap;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ if (net_data->sv_stayopen && net_data->sv_last)
+ if (!proto || !strcmp(net_data->sv_last->s_proto, proto)) {
+ if (!strcmp(net_data->sv_last->s_name, name))
+ return (net_data->sv_last);
+ for (sap = net_data->sv_last->s_aliases;
+ sap && *sap; sap++)
+ if (!strcmp(name, *sap))
+ return (net_data->sv_last);
+ }
+ net_data->sv_last = (*sv->byname)(sv, name, proto);
+ if (!net_data->sv_stayopen)
+ endservent();
+ return (net_data->sv_last);
+}
+
+struct servent *
+getservbyport_p(int port, const char *proto, struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ if (net_data->sv_stayopen && net_data->sv_last)
+ if (port == net_data->sv_last->s_port &&
+ ( !proto ||
+ !strcmp(net_data->sv_last->s_proto, proto)))
+ return (net_data->sv_last);
+ net_data->sv_last = (*sv->byport)(sv, port, proto);
+ return (net_data->sv_last);
+}
+
+void
+setservent_p(int stayopen, struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return;
+ (*sv->rewind)(sv);
+ net_data->sv_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endservent_p(struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if ((net_data != NULL) && ((sv = net_data->sv) != NULL))
+ (*sv->minimize)(sv);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->sv) {
+ net_data->sv = (*net_data->irs->sv_map)(net_data->irs);
+
+ if (!net_data->sv || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->sv->res_set)(net_data->sv, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c
new file mode 100644
index 0000000000..42d1e46163
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getservent_r.c,v 1.6 2006/08/01 01:14:16 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getservent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#ifdef SERV_R_RETURN
+
+static SERV_R_RETURN
+copy_servent(struct servent *, struct servent *, SERV_R_COPY_ARGS);
+
+SERV_R_RETURN
+getservbyname_r(const char *name, const char *proto,
+ struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservbyname(name, proto);
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+SERV_R_RETURN
+getservbyport_r(int port, const char *proto,
+ struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservbyport(port, proto);
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+SERV_R_RETURN
+getservent_r(struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservent();
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+SERV_R_SET_RETURN
+#ifdef SERV_R_ENT_ARGS
+setservent_r(int stay_open, SERV_R_ENT_ARGS)
+#else
+setservent_r(int stay_open)
+#endif
+{
+#ifdef SERV_R_ENT_UNUSED
+ SERV_R_ENT_UNUSED;
+#endif
+ setservent(stay_open);
+#ifdef SERV_R_SET_RESULT
+ return (SERV_R_SET_RESULT);
+#endif
+}
+
+SERV_R_END_RETURN
+#ifdef SERV_R_ENT_ARGS
+endservent_r(SERV_R_ENT_ARGS)
+#else
+endservent_r()
+#endif
+{
+#ifdef SERV_R_ENT_UNUSED
+ SERV_R_ENT_UNUSED;
+#endif
+ endservent();
+ SERV_R_END_RESULT(SERV_R_OK);
+}
+
+/* Private */
+
+#ifndef SERVENT_DATA
+static SERV_R_RETURN
+copy_servent(struct servent *se, struct servent *sptr, SERV_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; se->s_aliases[i]; i++, numptr++) {
+ len += strlen(se->s_aliases[i]) + 1;
+ }
+ len += strlen(se->s_name) + 1;
+ len += strlen(se->s_proto) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (SERV_R_BAD);
+ }
+
+ /* copy port value */
+ sptr->s_port = se->s_port;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(se->s_name) + 1;
+ strcpy(cp, se->s_name);
+ sptr->s_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ sptr->s_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; se->s_aliases[i]; i++) {
+ n = strlen(se->s_aliases[i]) + 1;
+ strcpy(cp, se->s_aliases[i]);
+ sptr->s_aliases[i] = cp;
+ cp += n;
+ }
+ sptr->s_aliases[i] = NULL;
+
+ /* copy proto */
+ n = strlen(se->s_proto) + 1;
+ strcpy(cp, se->s_proto);
+ sptr->s_proto = cp;
+ cp += n;
+
+ return (SERV_R_OK);
+}
+#else /* !SERVENT_DATA */
+static int
+copy_servent(struct servent *se, struct servent *sptr, SERV_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy port value */
+ sptr->s_port = se->s_port;
+
+ /* copy official name */
+ cp = sdptr->line;
+ eob = sdptr->line + sizeof(sdptr->line);
+ if ((n = strlen(se->s_name) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_name);
+ sptr->s_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ sptr->s_aliases = sdptr->serv_aliases;
+ while (se->s_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(se->s_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_aliases[i]);
+ sptr->s_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ sptr->s_aliases[i] = NULL;
+
+ /* copy proto */
+ if ((n = strlen(se->s_proto) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_proto);
+ sptr->s_proto = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ return (SERV_R_OK);
+}
+#endif /* !SERVENT_DATA */
+#else /*SERV_R_RETURN */
+ static int getservent_r_unknown_system = 0;
+#endif /*SERV_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/hesiod.c b/usr/src/lib/libresolv2_joy/common/irs/hesiod.c
new file mode 100644
index 0000000000..7641bf80ac
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/hesiod.c
@@ -0,0 +1,505 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: hesiod.c,v 1.7 2005/07/28 06:51:48 marka Exp $";
+#endif
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+/*! \file
+ * \brief
+ * hesiod.c --- the core portion of the hesiod resolver.
+ *
+ * This file is derived from the hesiod library from Project Athena;
+ * It has been extensively rewritten by Theodore Ts'o to have a more
+ * thread-safe interface.
+ * \author
+ * This file is primarily maintained by &lt;tytso@mit.edu&gt; and &lt;ghudson@mit.edu&gt;.
+ */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#include "pathnames.h"
+#include "hesiod.h"
+#include "hesiod_p.h"
+
+/* Forward */
+
+int hesiod_init(void **context);
+void hesiod_end(void *context);
+char * hesiod_to_bind(void *context, const char *name,
+ const char *type);
+char ** hesiod_resolve(void *context, const char *name,
+ const char *type);
+void hesiod_free_list(void *context, char **list);
+
+static int parse_config_file(struct hesiod_p *ctx, const char *filename);
+static char ** get_txt_records(struct hesiod_p *ctx, int class,
+ const char *name);
+static int init(struct hesiod_p *ctx);
+
+/* Public */
+
+/*%
+ * This function is called to initialize a hesiod_p.
+ */
+int
+hesiod_init(void **context) {
+ struct hesiod_p *ctx;
+ char *cp;
+
+ ctx = malloc(sizeof(struct hesiod_p));
+ if (ctx == 0) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ memset(ctx, 0, sizeof (*ctx));
+
+ if (parse_config_file(ctx, _PATH_HESIOD_CONF) < 0) {
+#ifdef DEF_RHS
+ /*
+ * Use compiled in defaults.
+ */
+ ctx->LHS = malloc(strlen(DEF_LHS) + 1);
+ ctx->RHS = malloc(strlen(DEF_RHS) + 1);
+ if (ctx->LHS == NULL || ctx->RHS == NULL) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ strcpy(ctx->LHS, DEF_LHS); /* (checked) */
+ strcpy(ctx->RHS, DEF_RHS); /* (checked) */
+#else
+ goto cleanup;
+#endif
+ }
+ /*
+ * The default RHS can be overridden by an environment
+ * variable.
+ */
+ if ((cp = getenv("HES_DOMAIN")) != NULL) {
+ size_t RHSlen = strlen(cp) + 2;
+ if (ctx->RHS)
+ free(ctx->RHS);
+ ctx->RHS = malloc(RHSlen);
+ if (!ctx->RHS) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ if (cp[0] == '.') {
+ strcpy(ctx->RHS, cp); /* (checked) */
+ } else {
+ strcpy(ctx->RHS, "."); /* (checked) */
+ strcat(ctx->RHS, cp); /* (checked) */
+ }
+ }
+
+ /*
+ * If there is no default hesiod realm set, we return an
+ * error.
+ */
+ if (!ctx->RHS) {
+ errno = ENOEXEC;
+ goto cleanup;
+ }
+
+#if 0
+ if (res_ninit(ctx->res) < 0)
+ goto cleanup;
+#endif
+
+ *context = ctx;
+ return (0);
+
+ cleanup:
+ hesiod_end(ctx);
+ return (-1);
+}
+
+/*%
+ * This function deallocates the hesiod_p
+ */
+void
+hesiod_end(void *context) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ int save_errno = errno;
+
+ if (ctx->res)
+ res_nclose(ctx->res);
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ if (ctx->res && ctx->free_res)
+ (*ctx->free_res)(ctx->res);
+ free(ctx);
+ errno = save_errno;
+}
+
+/*%
+ * This function takes a hesiod (name, type) and returns a DNS
+ * name which is to be resolved.
+ */
+char *
+hesiod_to_bind(void *context, const char *name, const char *type) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ char *bindname;
+ char **rhs_list = NULL;
+ const char *RHS, *cp;
+
+ /* Decide what our RHS is, and set cp to the end of the actual name. */
+ if ((cp = strchr(name, '@')) != NULL) {
+ if (strchr(cp + 1, '.'))
+ RHS = cp + 1;
+ else if ((rhs_list = hesiod_resolve(context, cp + 1,
+ "rhs-extension")) != NULL)
+ RHS = *rhs_list;
+ else {
+ errno = ENOENT;
+ return (NULL);
+ }
+ } else {
+ RHS = ctx->RHS;
+ cp = name + strlen(name);
+ }
+
+ /*
+ * Allocate the space we need, including up to three periods and
+ * the terminating NUL.
+ */
+ if ((bindname = malloc((cp - name) + strlen(type) + strlen(RHS) +
+ (ctx->LHS ? strlen(ctx->LHS) : 0) + 4)) == NULL) {
+ errno = ENOMEM;
+ if (rhs_list)
+ hesiod_free_list(context, rhs_list);
+ return NULL;
+ }
+
+ /* Now put together the DNS name. */
+ memcpy(bindname, name, cp - name);
+ bindname[cp - name] = '\0';
+ strcat(bindname, ".");
+ strcat(bindname, type);
+ if (ctx->LHS) {
+ if (ctx->LHS[0] != '.')
+ strcat(bindname, ".");
+ strcat(bindname, ctx->LHS);
+ }
+ if (RHS[0] != '.')
+ strcat(bindname, ".");
+ strcat(bindname, RHS);
+
+ if (rhs_list)
+ hesiod_free_list(context, rhs_list);
+
+ return (bindname);
+}
+
+/*%
+ * This is the core function. Given a hesiod (name, type), it
+ * returns an array of strings returned by the resolver.
+ */
+char **
+hesiod_resolve(void *context, const char *name, const char *type) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ char *bindname = hesiod_to_bind(context, name, type);
+ char **retvec;
+
+ if (bindname == NULL)
+ return (NULL);
+ if (init(ctx) == -1) {
+ free(bindname);
+ return (NULL);
+ }
+
+ if ((retvec = get_txt_records(ctx, C_IN, bindname))) {
+ free(bindname);
+ return (retvec);
+ }
+
+ if (errno != ENOENT)
+ return (NULL);
+
+ retvec = get_txt_records(ctx, C_HS, bindname);
+ free(bindname);
+ return (retvec);
+}
+
+void
+hesiod_free_list(void *context, char **list) {
+ char **p;
+
+ UNUSED(context);
+
+ for (p = list; *p; p++)
+ free(*p);
+ free(list);
+}
+
+/*%
+ * This function parses the /etc/hesiod.conf file
+ */
+static int
+parse_config_file(struct hesiod_p *ctx, const char *filename) {
+ char *key, *data, *cp, **cpp;
+ char buf[MAXDNAME+7];
+ FILE *fp;
+
+ /*
+ * Clear the existing configuration variable, just in case
+ * they're set.
+ */
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ ctx->RHS = ctx->LHS = 0;
+
+ /*
+ * Now open and parse the file...
+ */
+ if (!(fp = fopen(filename, "r")))
+ return (-1);
+
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ cp = buf;
+ if (*cp == '#' || *cp == '\n' || *cp == '\r')
+ continue;
+ while(*cp == ' ' || *cp == '\t')
+ cp++;
+ key = cp;
+ while(*cp != ' ' && *cp != '\t' && *cp != '=')
+ cp++;
+ *cp++ = '\0';
+
+ while(*cp == ' ' || *cp == '\t' || *cp == '=')
+ cp++;
+ data = cp;
+ while(*cp != ' ' && *cp != '\n' && *cp != '\r')
+ cp++;
+ *cp++ = '\0';
+
+ if (strcmp(key, "lhs") == 0)
+ cpp = &ctx->LHS;
+ else if (strcmp(key, "rhs") == 0)
+ cpp = &ctx->RHS;
+ else
+ continue;
+
+ *cpp = malloc(strlen(data) + 1);
+ if (!*cpp) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ strcpy(*cpp, data);
+ }
+ fclose(fp);
+ return (0);
+
+ cleanup:
+ fclose(fp);
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ ctx->RHS = ctx->LHS = 0;
+ return (-1);
+}
+
+/*%
+ * Given a DNS class and a DNS name, do a lookup for TXT records, and
+ * return a list of them.
+ */
+static char **
+get_txt_records(struct hesiod_p *ctx, int class, const char *name) {
+ struct {
+ int type; /*%< RR type */
+ int class; /*%< RR class */
+ int dlen; /*%< len of data section */
+ u_char *data; /*%< pointer to data */
+ } rr;
+ HEADER *hp;
+ u_char qbuf[MAX_HESRESP], abuf[MAX_HESRESP];
+ u_char *cp, *erdata, *eom;
+ char *dst, *edst, **list;
+ int ancount, qdcount;
+ int i, j, n, skip;
+
+ /*
+ * Construct the query and send it.
+ */
+ n = res_nmkquery(ctx->res, QUERY, name, class, T_TXT, NULL, 0,
+ NULL, qbuf, MAX_HESRESP);
+ if (n < 0) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ n = res_nsend(ctx->res, qbuf, n, abuf, MAX_HESRESP);
+ if (n < 0) {
+ errno = ECONNREFUSED;
+ return (NULL);
+ }
+ if (n < HFIXEDSZ) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+
+ /*
+ * OK, parse the result.
+ */
+ hp = (HEADER *) abuf;
+ ancount = ntohs(hp->ancount);
+ qdcount = ntohs(hp->qdcount);
+ cp = abuf + sizeof(HEADER);
+ eom = abuf + n;
+
+ /* Skip query, trying to get to the answer section which follows. */
+ for (i = 0; i < qdcount; i++) {
+ skip = dn_skipname(cp, eom);
+ if (skip < 0 || cp + skip + QFIXEDSZ > eom) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ cp += skip + QFIXEDSZ;
+ }
+
+ list = malloc((ancount + 1) * sizeof(char *));
+ if (!list) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ j = 0;
+ for (i = 0; i < ancount; i++) {
+ skip = dn_skipname(cp, eom);
+ if (skip < 0) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ cp += skip;
+ if (cp + 3 * INT16SZ + INT32SZ > eom) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ rr.type = ns_get16(cp);
+ cp += INT16SZ;
+ rr.class = ns_get16(cp);
+ cp += INT16SZ + INT32SZ; /*%< skip the ttl, too */
+ rr.dlen = ns_get16(cp);
+ cp += INT16SZ;
+ if (cp + rr.dlen > eom) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ rr.data = cp;
+ cp += rr.dlen;
+ if (rr.class != class || rr.type != T_TXT)
+ continue;
+ if (!(list[j] = malloc(rr.dlen)))
+ goto cleanup;
+ dst = list[j++];
+ edst = dst + rr.dlen;
+ erdata = rr.data + rr.dlen;
+ cp = rr.data;
+ while (cp < erdata) {
+ n = (unsigned char) *cp++;
+ if (cp + n > eom || dst + n > edst) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ memcpy(dst, cp, n);
+ cp += n;
+ dst += n;
+ }
+ if (cp != erdata) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ *dst = '\0';
+ }
+ list[j] = NULL;
+ if (j == 0) {
+ errno = ENOENT;
+ goto cleanup;
+ }
+ return (list);
+
+ cleanup:
+ for (i = 0; i < j; i++)
+ free(list[i]);
+ free(list);
+ return (NULL);
+}
+
+struct __res_state *
+__hesiod_res_get(void *context) {
+ struct hesiod_p *ctx = context;
+
+ if (!ctx->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ __hesiod_res_set(ctx, res, free);
+ }
+
+ return (ctx->res);
+}
+
+void
+__hesiod_res_set(void *context, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct hesiod_p *ctx = context;
+
+ if (ctx->res && ctx->free_res) {
+ res_nclose(ctx->res);
+ (*ctx->free_res)(ctx->res);
+ }
+
+ ctx->res = res;
+ ctx->free_res = free_res;
+}
+
+static int
+init(struct hesiod_p *ctx) {
+
+ if (!ctx->res && !__hesiod_res_get(ctx))
+ return (-1);
+
+ if (((ctx->res->options & RES_INIT) == 0U) &&
+ (res_ninit(ctx->res) == -1))
+ return (-1);
+
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h b/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h
new file mode 100644
index 0000000000..99da15d0cd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: hesiod_p.h,v 1.3 2005/04/27 04:56:27 sra Exp $
+ */
+
+#ifndef _HESIOD_P_H_INCLUDED
+#define _HESIOD_P_H_INCLUDED
+
+/** \file
+ * \brief
+ * hesiod_p.h -- private definitions for the hesiod library.
+ *
+ * \author
+ * This file is primarily maintained by tytso@mit.edu and ghudson@mit.edu.
+ */
+
+#define DEF_RHS ".Athena.MIT.EDU" /*%< Defaults if HESIOD_CONF */
+#define DEF_LHS ".ns" /*%< file is not */
+ /*%< present. */
+struct hesiod_p {
+ char * LHS; /*%< normally ".ns" */
+ char * RHS; /*%< AKA the default hesiod domain */
+ struct __res_state * res; /*%< resolver context */
+ void (*free_res)(void *);
+ void (*res_set)(struct hesiod_p *, struct __res_state *,
+ void (*)(void *));
+ struct __res_state * (*res_get)(struct hesiod_p *);
+};
+
+#define MAX_HESRESP 1024
+
+#endif /*_HESIOD_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp.c b/usr/src/lib/libresolv2_joy/common/irs/irp.c
new file mode 100644
index 0000000000..ef10631c22
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1998-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irp.c,v 1.12 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <syslog.h>
+#include <ctype.h>
+#include <unistd.h>
+
+#include <isc/memcluster.h>
+
+#include <irs.h>
+#include <irp.h>
+
+#include "irs_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void irp_close(struct irs_acc *);
+
+#define LINEINCR 128
+
+#if !defined(SUN_LEN)
+#define SUN_LEN(su) \
+ (sizeof (*(su)) - sizeof ((su)->sun_path) + strlen((su)->sun_path))
+#endif
+
+
+/* Public */
+
+
+/* send errors to syslog if true. */
+int irp_log_errors = 1;
+
+/*%
+ * This module handles the irp module connection to irpd.
+ *
+ * The client expects a synchronous interface to functions like
+ * getpwnam(3), so we can't use the ctl_* i/o library on this end of
+ * the wire (it's used in the server).
+ */
+
+/*%
+ * irs_acc *irs_irp_acc(const char *options);
+ *
+ * Initialize the irp module.
+ */
+struct irs_acc *
+irs_irp_acc(const char *options) {
+ struct irs_acc *acc;
+ struct irp_p *irp;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(irp = memget(sizeof *irp))) {
+ errno = ENOMEM;
+ free(acc);
+ return (NULL);
+ }
+ irp->inlast = 0;
+ irp->incurr = 0;
+ irp->fdCxn = -1;
+ acc->private = irp;
+
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_irp_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_irp_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_irp_sv;
+ acc->pr_map = irs_irp_pr;
+ acc->ho_map = irs_irp_ho;
+ acc->nw_map = irs_irp_nw;
+ acc->ng_map = irs_irp_ng;
+ acc->close = irp_close;
+ return (acc);
+}
+
+
+int
+irs_irp_connection_setup(struct irp_p *cxndata, int *warned) {
+ if (irs_irp_is_connected(cxndata)) {
+ return (0);
+ } else if (irs_irp_connect(cxndata) != 0) {
+ if (warned != NULL && !*warned) {
+ syslog(LOG_ERR, "irpd connection failed: %m\n");
+ (*warned)++;
+ }
+
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_connect(void);
+ *
+ * Sets up the connection to the remote irpd server.
+ *
+ * Returns:
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+int
+irs_irp_connect(struct irp_p *pvt) {
+ int flags;
+ struct sockaddr *addr;
+ struct sockaddr_in iaddr;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un uaddr;
+#endif
+ long ipaddr;
+ const char *irphost;
+ int code;
+ char text[256];
+ int socklen = 0;
+
+ if (pvt->fdCxn != -1) {
+ perror("fd != 1");
+ return (-1);
+ }
+
+#ifndef NO_SOCKADDR_UN
+ memset(&uaddr, 0, sizeof uaddr);
+#endif
+ memset(&iaddr, 0, sizeof iaddr);
+
+ irphost = getenv(IRPD_HOST_ENV);
+ if (irphost == NULL) {
+ irphost = "127.0.0.1";
+ }
+
+#ifndef NO_SOCKADDR_UN
+ if (irphost[0] == '/') {
+ addr = (struct sockaddr *)&uaddr;
+ strncpy(uaddr.sun_path, irphost, sizeof uaddr.sun_path);
+ uaddr.sun_family = AF_UNIX;
+ socklen = SUN_LEN(&uaddr);
+#ifdef HAVE_SA_LEN
+ uaddr.sun_len = socklen;
+#endif
+ } else
+#endif
+ {
+ if (inet_pton(AF_INET, irphost, &ipaddr) != 1) {
+ errno = EADDRNOTAVAIL;
+ perror("inet_pton");
+ return (-1);
+ }
+
+ addr = (struct sockaddr *)&iaddr;
+ socklen = sizeof iaddr;
+#ifdef HAVE_SA_LEN
+ iaddr.sin_len = socklen;
+#endif
+ iaddr.sin_family = AF_INET;
+ iaddr.sin_port = htons(IRPD_PORT);
+ iaddr.sin_addr.s_addr = ipaddr;
+ }
+
+
+ pvt->fdCxn = socket(addr->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (pvt->fdCxn < 0) {
+ perror("socket");
+ return (-1);
+ }
+
+ if (connect(pvt->fdCxn, addr, socklen) != 0) {
+ perror("connect");
+ return (-1);
+ }
+
+ flags = fcntl(pvt->fdCxn, F_GETFL, 0);
+ if (flags < 0) {
+ close(pvt->fdCxn);
+ perror("close");
+ return (-1);
+ }
+
+#if 0
+ flags |= O_NONBLOCK;
+ if (fcntl(pvt->fdCxn, F_SETFL, flags) < 0) {
+ close(pvt->fdCxn);
+ perror("fcntl");
+ return (-1);
+ }
+#endif
+
+ code = irs_irp_read_response(pvt, text, sizeof text);
+ if (code != IRPD_WELCOME_CODE) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "Connection failed: %s", text);
+ }
+ irs_irp_disconnect(pvt);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_is_connected(struct irp_p *pvt);
+ *
+ * Returns:
+ *
+ * Non-zero if streams are setup to remote.
+ *
+ */
+
+int
+irs_irp_is_connected(struct irp_p *pvt) {
+ return (pvt->fdCxn >= 0);
+}
+
+/*%
+ * void
+ * irs_irp_disconnect(struct irp_p *pvt);
+ *
+ * Closes streams to remote.
+ */
+
+void
+irs_irp_disconnect(struct irp_p *pvt) {
+ if (pvt->fdCxn != -1) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ }
+}
+
+
+
+int
+irs_irp_read_line(struct irp_p *pvt, char *buffer, int len) {
+ char *realstart = &pvt->inbuffer[0];
+ char *p, *start, *end;
+ int spare;
+ int i;
+ int buffpos = 0;
+ int left = len - 1;
+
+ while (left > 0) {
+ start = p = &pvt->inbuffer[pvt->incurr];
+ end = &pvt->inbuffer[pvt->inlast];
+
+ while (p != end && *p != '\n')
+ p++;
+
+ if (p == end) {
+ /* Found no newline so shift data down if necessary
+ * and append new data to buffer
+ */
+ if (start > realstart) {
+ memmove(realstart, start, end - start);
+ pvt->inlast = end - start;
+ start = realstart;
+ pvt->incurr = 0;
+ end = &pvt->inbuffer[pvt->inlast];
+ }
+
+ spare = sizeof (pvt->inbuffer) - pvt->inlast;
+
+ p = end;
+ i = read(pvt->fdCxn, end, spare);
+ if (i < 0) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ return (buffpos > 0 ? buffpos : -1);
+ } else if (i == 0) {
+ return (buffpos);
+ }
+
+ end += i;
+ pvt->inlast += i;
+
+ while (p != end && *p != '\n')
+ p++;
+ }
+
+ if (p == end) {
+ /* full buffer and still no newline */
+ i = sizeof pvt->inbuffer;
+ } else {
+ /* include newline */
+ i = p - start + 1;
+ }
+
+ if (i > left)
+ i = left;
+ memcpy(buffer + buffpos, start, i);
+ pvt->incurr += i;
+ buffpos += i;
+ buffer[buffpos] = '\0';
+
+ if (p != end) {
+ left = 0;
+ } else {
+ left -= i;
+ }
+ }
+
+#if 0
+ fprintf(stderr, "read line: %s\n", buffer);
+#endif
+ return (buffpos);
+}
+
+/*%
+ * int irp_read_response(struct irp_p *pvt);
+ *
+ * Returns:
+ *
+ * The number found at the beginning of the line read from
+ * FP. 0 on failure(0 is not a legal response code). The
+ * rest of the line is discarded.
+ *
+ */
+
+int
+irs_irp_read_response(struct irp_p *pvt, char *text, size_t textlen) {
+ char line[1024];
+ int code;
+ char *p;
+
+ if (irs_irp_read_line(pvt, line, sizeof line) <= 0) {
+ return (0);
+ }
+
+ p = strchr(line, '\n');
+ if (p == NULL) {
+ return (0);
+ }
+
+ if (sscanf(line, "%d", &code) != 1) {
+ code = 0;
+ } else if (text != NULL && textlen > 0U) {
+ p = line;
+ while (isspace((unsigned char)*p)) p++;
+ while (isdigit((unsigned char)*p)) p++;
+ while (isspace((unsigned char)*p)) p++;
+ strncpy(text, p, textlen - 1);
+ p[textlen - 1] = '\0';
+ }
+
+ return (code);
+}
+
+/*%
+ * char *irp_read_body(struct irp_p *pvt, size_t *size);
+ *
+ * Read in the body of a response. Terminated by a line with
+ * just a dot on it. Lines should be terminated with a CR-LF
+ * sequence, but we're nt piccky if the CR is missing.
+ * No leading dot escaping is done as the protcol doesn't
+ * use leading dots anywhere.
+ *
+ * Returns:
+ *
+ * Pointer to null-terminated buffer allocated by memget.
+ * *SIZE is set to the length of the buffer.
+ *
+ */
+
+char *
+irs_irp_read_body(struct irp_p *pvt, size_t *size) {
+ char line[1024];
+ u_int linelen;
+ size_t len = LINEINCR;
+ char *buffer = memget(len);
+ int idx = 0;
+
+ if (buffer == NULL)
+ return (NULL);
+
+ for (;;) {
+ if (irs_irp_read_line(pvt, line, sizeof line) <= 0 ||
+ strchr(line, '\n') == NULL)
+ goto death;
+
+ linelen = strlen(line);
+
+ if (line[linelen - 1] != '\n')
+ goto death;
+
+ /* We're not strict about missing \r. Should we be?? */
+ if (linelen > 2 && line[linelen - 2] == '\r') {
+ line[linelen - 2] = '\n';
+ line[linelen - 1] = '\0';
+ linelen--;
+ }
+
+ if (linelen == 2 && line[0] == '.') {
+ *size = len;
+ buffer[idx] = '\0';
+
+ return (buffer);
+ }
+
+ if (linelen > (len - (idx + 1))) {
+ char *p = memget(len + LINEINCR);
+
+ if (p == NULL)
+ goto death;
+ memcpy(p, buffer, len);
+ memput(buffer, len);
+ buffer = p;
+ len += LINEINCR;
+ }
+
+ memcpy(buffer + idx, line, linelen);
+ idx += linelen;
+ }
+ death:
+ memput(buffer, len);
+ return (NULL);
+}
+
+/*%
+ * int irs_irp_get_full_response(struct irp_p *pvt, int *code,
+ * char **body, size_t *bodylen);
+ *
+ * Gets the response to a command. If the response indicates
+ * there's a body to follow(code % 10 == 1), then the
+ * body buffer is allcoated with memget and stored in
+ * *BODY. The length of the allocated body buffer is stored
+ * in *BODY. The caller must give the body buffer back to
+ * memput when done. The results code is stored in *CODE.
+ *
+ * Returns:
+ *
+ * 0 if a result was read. -1 on some sort of failure.
+ *
+ */
+
+int
+irs_irp_get_full_response(struct irp_p *pvt, int *code, char *text,
+ size_t textlen, char **body, size_t *bodylen) {
+ int result = irs_irp_read_response(pvt, text, textlen);
+
+ *body = NULL;
+
+ if (result == 0) {
+ return (-1);
+ }
+
+ *code = result;
+
+ /* Code that matches 2xx is a good result code.
+ * Code that matches xx1 means there's a response body coming.
+ */
+ if ((result / 100) == 2 && (result % 10) == 1) {
+ *body = irs_irp_read_body(pvt, bodylen);
+ if (*body == NULL) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_send_command(struct irp_p *pvt, const char *fmt, ...);
+ *
+ * Sends command to remote connected via the PVT
+ * structure. FMT and args after it are fprintf-like
+ * arguments for formatting.
+ *
+ * Returns:
+ *
+ * 0 on success, -1 on failure.
+ */
+
+int
+irs_irp_send_command(struct irp_p *pvt, const char *fmt, ...) {
+ va_list ap;
+ char buffer[1024];
+ int pos = 0;
+ int i, todo;
+
+
+ if (pvt->fdCxn < 0) {
+ return (-1);
+ }
+
+ va_start(ap, fmt);
+ (void) vsprintf(buffer, fmt, ap);
+ todo = strlen(buffer);
+ va_end(ap);
+ if (todo > (int)sizeof(buffer) - 3) {
+ syslog(LOG_CRIT, "memory overrun in irs_irp_send_command()");
+ exit(1);
+ }
+ strcat(buffer, "\r\n");
+ todo = strlen(buffer);
+
+ while (todo > 0) {
+ i = write(pvt->fdCxn, buffer + pos, todo);
+#if 0
+ /* XXX brister */
+ fprintf(stderr, "Wrote: \"");
+ fwrite(buffer + pos, sizeof (char), todo, stderr);
+ fprintf(stderr, "\"\n");
+#endif
+ if (i < 0) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ return (-1);
+ }
+ todo -= i;
+ }
+
+ return (0);
+}
+
+
+/* Methods */
+
+/*%
+ * void irp_close(struct irs_acc *this)
+ *
+ */
+
+static void
+irp_close(struct irs_acc *this) {
+ struct irp_p *irp = (struct irp_p *)this->private;
+
+ if (irp != NULL) {
+ irs_irp_disconnect(irp);
+ memput(irp, sizeof *irp);
+ }
+
+ memput(this, sizeof *this);
+}
+
+
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c b/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c
new file mode 100644
index 0000000000..b0de31dc89
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_ho.c,v 1.3 2005/04/27 04:56:28 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+#include <isc/memcluster.h>
+
+#include "irs_p.h"
+#include "dns_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+#define Max(a,b) ((a) > (b) ? (a) : (b))
+
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct hostent host;
+};
+
+/* Forward. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+
+static void free_host(struct hostent *ho);
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+/* Public. */
+
+/*%
+ * struct irs_ho * irs_irp_ho(struct irs_acc *this)
+ *
+ * Notes:
+ *
+ * Initializes the irp_ho module.
+ *
+ */
+
+struct irs_ho *
+irs_irp_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ if (!(ho = memget(sizeof *ho))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x0, sizeof *ho);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ho, sizeof *ho);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->addrinfo = ho_addrinfo;
+
+ return (ho);
+}
+
+/* Methods. */
+
+/*%
+ * Closes down the module.
+ *
+ */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+
+ free_host(&pvt->host);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+
+
+/*
+ * struct hostent * ho_byname(struct irs_ho *this, const char *name)
+ *
+ */
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ return (ho_byname2(this, name, AF_INET));
+}
+
+
+
+
+
+/*
+ * struct hostent * ho_byname2(struct irs_ho *this, const char *name, int af)
+ *
+ */
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (ho->h_name != NULL &&
+ strcmp(name, ho->h_name) == 0 &&
+ af == ho->h_addrtype) {
+ return (ho);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostbyname2 %s %s",
+ name, ADDR_T_STR(af)) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+
+
+/*
+ * struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ * int len, int af)
+ *
+ */
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char **p;
+ char paddr[MAXPADDRSIZE];
+ char text[256];
+
+ if (ho->h_name != NULL &&
+ af == ho->h_addrtype &&
+ len == ho->h_length) {
+ for (p = ho->h_addr_list ; *p != NULL ; p++) {
+ if (memcmp(*p, addr, len) == 0)
+ return (ho);
+ }
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (inet_ntop(af, addr, paddr, sizeof paddr) == NULL) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostbyaddr %s %s",
+ paddr, ADDR_T_STR(af)) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+/*%
+ * The implementation for gethostent(3). The first time it's
+ * called all the data is pulled from the remote(i.e. what
+ * the maximum number of gethostent(3) calls would return)
+ * and that data is cached.
+ *
+ */
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+/*%
+ * void ho_rewind(struct irs_ho *this)
+ *
+ */
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "sethostent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETHOST_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "sethostent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * void ho_minimize(struct irs_ho *this)
+ *
+ */
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ free_host(&pvt->host);
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+/*%
+ * void free_host(struct hostent *ho)
+ *
+ */
+
+static void
+free_host(struct hostent *ho) {
+ char **p;
+
+ if (ho == NULL) {
+ return;
+ }
+
+ if (ho->h_name != NULL)
+ free(ho->h_name);
+
+ if (ho->h_aliases != NULL) {
+ for (p = ho->h_aliases ; *p != NULL ; p++)
+ free(*p);
+ free(ho->h_aliases);
+ }
+
+ if (ho->h_addr_list != NULL) {
+ for (p = ho->h_addr_list ; *p != NULL ; p++)
+ free(*p);
+ free(ho->h_addr_list);
+ }
+}
+
+/* dummy */
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ UNUSED(this);
+ UNUSED(name);
+ UNUSED(pai);
+ return(NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c b/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c
new file mode 100644
index 0000000000..1af862cab4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996, 1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irp_ng.c,v 1.4 2006/12/07 04:46:27 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "irs_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Definitions */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+};
+
+
+/* Forward */
+
+static void ng_rewind(struct irs_ng *, const char*);
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **, const char **,
+ const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_minimize(struct irs_ng *);
+
+
+/* Public */
+
+/*%
+ * Intialize the irp netgroup module.
+ *
+ */
+
+struct irs_ng *
+irs_irp_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+
+
+/*
+ * void ng_close(struct irs_ng *this)
+ *
+ */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ng_minimize(this);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+
+
+
+/*
+ * void ng_rewind(struct irs_ng *this, const char *group)
+ *
+ *
+ */
+
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata,
+ "setnetgrent %s", group) != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETNETGR_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setnetgrent(%s) failed: %s",
+ group, text);
+ }
+ }
+
+ return;
+}
+
+/*
+ * Get the next netgroup item from the cache.
+ *
+ */
+
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int code;
+ char *body = NULL;
+ size_t bodylen;
+ int rval = 0;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (0);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetgrent") != 0)
+ return (0);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (0);
+ }
+
+ if (code == IRPD_GETNETGR_OK) {
+ if (irp_unmarshall_ng(host, user, domain, body) == 0) {
+ rval = 1;
+ }
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (rval);
+}
+
+/*
+ * Search for a match in a netgroup.
+ *
+ */
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *host, const char *user, const char *domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *body = NULL;
+ size_t bodylen = 0;
+ int code;
+ char text[256];
+ int rval = 0;
+
+ UNUSED(name);
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (0);
+ }
+
+ if (irp_marshall_ng(host, user, domain, &body, &bodylen) != 0) {
+ return (0);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "innetgr %s", body) == 0) {
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code == IRPD_GETNETGR_MATCHES) {
+ rval = 1;
+ }
+ }
+
+ memput(body, bodylen);
+
+ return (rval);
+}
+
+
+
+
+/*
+ * void ng_minimize(struct irs_ng *this)
+ *
+ */
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+/* Private */
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c b/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c
new file mode 100644
index 0000000000..3f2381f95d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_nw.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#if 0
+
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+
+#include <isc/memcluster.h>
+#include <isc/misc.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+#define MAXALIASES 35
+#define MAXADDRSIZE 4
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct nwent net;
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+
+static void free_nw(struct nwent *nw);
+
+
+/* Public */
+
+/*%
+ * struct irs_nw * irs_irp_nw(struct irs_acc *this)
+ *
+ */
+
+struct irs_nw *
+irs_irp_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x0, sizeof *nw);
+ pvt->girpdata = this->private;
+
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ return (nw);
+}
+
+/* Methods */
+
+/*%
+ * void nw_close(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ free_nw(&pvt->net);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * struct nwent * nw_byaddr(struct irs_nw *this, void *net,
+ * int length, int type)
+ *
+ */
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char paddr[24]; /*%< bigenough for ip4 w/ cidr spec. */
+ char text[256];
+
+ if (inet_net_ntop(type, net, length, paddr, sizeof paddr) == NULL) {
+ return (NULL);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetbyaddr %s %s",
+ paddr, ADDR_T_STR(type)) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (nw);
+}
+
+/*%
+ * struct nwent * nw_byname(struct irs_nw *this, const char *name, int type)
+ *
+ */
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (nw->n_name != NULL &&
+ strcmp(name, nw->n_name) == 0 &&
+ nw->n_addrtype == type) {
+ return (nw);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetbyname %s", name) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (nw);
+}
+
+/*%
+ * void nw_rewind(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setnetent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETNET_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setnetent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * Prepares the cache if necessary and returns the first, or
+ * next item from it.
+ */
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL)
+ memput(body, bodylen);
+ return (nw);
+}
+
+/*%
+ * void nw_minimize(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+/* private. */
+
+/*%
+ * deallocate all the memory irp_unmarshall_pw allocated.
+ *
+ */
+
+static void
+free_nw(struct nwent *nw) {
+ char **p;
+
+ if (nw == NULL)
+ return;
+
+ if (nw->n_name != NULL)
+ free(nw->n_name);
+
+ if (nw->n_aliases != NULL) {
+ for (p = nw->n_aliases ; *p != NULL ; p++) {
+ free(*p);
+ }
+ free(nw->n_aliases);
+ }
+
+ if (nw->n_addr != NULL)
+ free(nw->n_addr);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_p.h b/usr/src/lib/libresolv2_joy/common/irs/irp_p.h
new file mode 100644
index 0000000000..4f943f81bd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_p.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irp_p.h,v 1.5 2005/04/27 04:56:28 sra Exp $
+ */
+
+#ifndef _IRP_P_H_INCLUDED
+#define _IRP_P_H_INCLUDED
+
+#include <stdio.h>
+
+struct irp_p {
+ char inbuffer[1024];
+ int inlast; /*%< index of one past the last char in buffer */
+ int incurr; /*%< index of the next char to be read from buffer */
+ int fdCxn;
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_irp_acc __P((const char *));
+extern struct irs_gr * irs_irp_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_irp_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_irp_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_irp_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_irp_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_irp_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_irp_ng __P((struct irs_acc *));
+
+int irs_irp_connect(struct irp_p *pvt);
+int irs_irp_is_connected(struct irp_p *pvt);
+void irs_irp_disconnect(struct irp_p *pvt);
+int irs_irp_read_response(struct irp_p *pvt, char *text, size_t textlen);
+char *irs_irp_read_body(struct irp_p *pvt, size_t *size);
+int irs_irp_get_full_response(struct irp_p *pvt, int *code,
+ char *text, size_t textlen,
+ char **body, size_t *bodylen);
+
+extern int irp_log_errors;
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c b/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c
new file mode 100644
index 0000000000..ea876e8281
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_pr.c,v 1.3 2005/04/27 04:56:29 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+
+#define MAXALIASES 35
+
+/* Types */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct protoent proto;
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+
+static void free_proto(struct protoent *pr);
+
+/* Public */
+
+/*%
+ * struct irs_pr * irs_irp_pr(struct irs_acc *this)
+ *
+ */
+
+struct irs_pr *
+irs_irp_pr(struct irs_acc *this) {
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x0, sizeof *pr);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *pr);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ return (pr);
+}
+
+/* Methods */
+
+/*%
+ * void pr_close(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ pr_minimize(this);
+
+ free_proto(&pvt->proto);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * struct protoent * pr_byname(struct irs_pr *this, const char *name)
+ *
+ */
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ int i;
+ char text[256];
+
+ if (pr->p_name != NULL && strcmp(name, pr->p_name) == 0) {
+ return (pr);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ i = irs_irp_send_command(pvt->girpdata, "getprotobyname %s", name);
+ if (i != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * struct protoent * pr_bynumber(struct irs_pr *this, int proto)
+ *
+ */
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ int i;
+ char text[256];
+
+ if (pr->p_name != NULL && proto == pr->p_proto) {
+ return (pr);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ i = irs_irp_send_command(pvt->girpdata, "getprotobynumber %d", proto);
+ if (i != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * void pr_rewind(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setprotoent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETPROTO_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setprotoent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * Prepares the cache if necessary and returns the next item in it.
+ *
+ */
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getprotoent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * void pr_minimize(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+/*%
+ * Deallocate all the memory irp_unmarshall_pr allocated.
+ *
+ */
+
+static void
+free_proto(struct protoent *pr) {
+ char **p;
+
+ if (pr == NULL)
+ return;
+
+ if (pr->p_name != NULL)
+ free(pr->p_name);
+
+ for (p = pr->p_aliases ; p != NULL && *p != NULL ; p++)
+ free(*p);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c b/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c
new file mode 100644
index 0000000000..577e697fe6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_sv.c,v 1.3 2005/04/27 04:56:29 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifdef IRS_LCL_SV_DB
+#include <db.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+#include <isc/memcluster.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Types */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct servent service;
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+
+static void free_service(struct servent *sv);
+
+
+
+/* Public */
+
+/*%
+ * struct irs_sv * irs_irp_sv(struct irs_acc *this)
+ *
+ */
+
+struct irs_sv *
+irs_irp_sv(struct irs_acc *this) {
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if ((sv = memget(sizeof *sv)) == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x0, sizeof *sv);
+
+ if ((pvt = memget(sizeof *pvt)) == NULL) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+
+ return (sv);
+}
+
+/* Methods */
+
+/*%
+ * void sv_close(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ sv_minimize(this);
+
+ free_service(&pvt->service);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * Fills the cache if necessary and returns the next item from it.
+ *
+ */
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * struct servent * sv_byname(struct irs_sv *this, const char *name,
+ * const char *proto)
+ *
+ */
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ char text[256];
+ size_t bodylen;
+ int code;
+
+ if (sv->s_name != NULL &&
+ strcmp(name, sv->s_name) == 0 &&
+ strcasecmp(proto, sv->s_proto) == 0) {
+ return (sv);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservbyname %s %s",
+ name, proto) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * struct servent * sv_byport(struct irs_sv *this, int port,
+ * const char *proto)
+ *
+ */
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ size_t bodylen;
+ char text[256];
+ int code;
+
+ if (sv->s_name != NULL &&
+ port == sv->s_port &&
+ strcasecmp(proto, sv->s_proto) == 0) {
+ return (sv);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservbyport %d %s",
+ ntohs((short)port), proto) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * void sv_rewind(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setservent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETSERVICE_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setservent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * void sv_minimize(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+
+
+static void
+free_service(struct servent *sv) {
+ char **p;
+
+ if (sv == NULL) {
+ return;
+ }
+
+ if (sv->s_name != NULL) {
+ free(sv->s_name);
+ }
+
+ for (p = sv->s_aliases ; p != NULL && *p != NULL ; p++) {
+ free(*p);
+ }
+
+ if (sv->s_proto != NULL) {
+ free(sv->s_proto);
+ }
+}
+
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c b/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c
new file mode 100644
index 0000000000..85ffff1866
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c
@@ -0,0 +1,2301 @@
+/*
+ * Copyright(c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irpmarshall.c,v 1.7 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#if 0
+
+Check values are in approrpriate endian order.
+
+Double check memory allocations on unmarhsalling
+
+#endif
+
+
+/* Extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <stdio.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <utmp.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "port_after.h"
+
+
+#ifndef HAVE_STRNDUP
+static char *strndup(const char *str, size_t len);
+#endif
+
+static char **splitarray(const char *buffer, const char *buffend, char delim);
+static int joinarray(char * const * argv, char *buffer, char delim);
+static char *getfield(char **res, size_t reslen, char **buffer, char delim);
+static size_t joinlength(char * const *argv);
+static void free_array(char **argv, size_t entries);
+
+#define ADDR_T_STR(x) (x == AF_INET ? "AF_INET" :\
+ (x == AF_INET6 ? "AF_INET6" : "UNKNOWN"))
+
+#define MAXPADDRSIZE (sizeof "255.255.255.255" + 1)
+
+static char COMMA = ',';
+
+static const char *COMMASTR = ",";
+static const char *COLONSTR = ":";
+
+
+
+/* See big comment at bottom of irpmarshall.h for description. */
+
+
+#ifdef WANT_IRS_PW
+/* +++++++++++++++++++++++++ struct passwd +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_pw(const struct passwd *pw, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on sucess, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_pw(const struct passwd *pw, char **buffer, size_t *len) {
+ size_t need = 1 ; /*%< for null byte */
+ char pwUid[24];
+ char pwGid[24];
+ char pwChange[24];
+ char pwExpire[24];
+ const char *pwClass;
+ const char *fieldsep = COLONSTR;
+
+ if (pw == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(pwUid, "%ld", (long)pw->pw_uid);
+ sprintf(pwGid, "%ld", (long)pw->pw_gid);
+
+#ifdef HAVE_PW_CHANGE
+ sprintf(pwChange, "%ld", (long)pw->pw_change);
+#else
+ pwChange[0] = '0';
+ pwChange[1] = '\0';
+#endif
+
+#ifdef HAVE_PW_EXPIRE
+ sprintf(pwExpire, "%ld", (long)pw->pw_expire);
+#else
+ pwExpire[0] = '0';
+ pwExpire[1] = '\0';
+#endif
+
+#ifdef HAVE_PW_CLASS
+ pwClass = pw->pw_class;
+#else
+ pwClass = "";
+#endif
+
+ need += strlen(pw->pw_name) + 1; /*%< one for fieldsep */
+ need += strlen(pw->pw_passwd) + 1;
+ need += strlen(pwUid) + 1;
+ need += strlen(pwGid) + 1;
+ need += strlen(pwClass) + 1;
+ need += strlen(pwChange) + 1;
+ need += strlen(pwExpire) + 1;
+ need += strlen(pw->pw_gecos) + 1;
+ need += strlen(pw->pw_dir) + 1;
+ need += strlen(pw->pw_shell) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, pw->pw_name); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_passwd); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwUid); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwGid); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwClass); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwChange); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwExpire); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_gecos); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_dir); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_shell); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_pw(struct passwd *pw, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ *
+ */
+
+int
+irp_unmarshall_pw(struct passwd *pw, char *buffer) {
+ char *name, *pass, *class, *gecos, *dir, *shell;
+ uid_t pwuid;
+ gid_t pwgid;
+ time_t pwchange;
+ time_t pwexpire;
+ char *p;
+ long t;
+ char tmpbuf[24];
+ char *tb = &tmpbuf[0];
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ name = pass = class = gecos = dir = shell = NULL;
+ p = buffer;
+
+ /* pw_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0) {
+ goto error;
+ }
+
+ /* pw_passwd field */
+ pass = NULL;
+ if (getfield(&pass, 0, &p, fieldsep) == NULL) { /*%< field can be empty */
+ goto error;
+ }
+
+
+ /* pw_uid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwuid = (uid_t)t;
+ if ((long) pwuid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_gid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwgid = (gid_t)t;
+ if ((long)pwgid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_class field */
+ class = NULL;
+ if (getfield(&class, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_change field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwchange = (time_t)t;
+ if ((long)pwchange != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_expire field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwexpire = (time_t)t;
+ if ((long) pwexpire != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_gecos field */
+ gecos = NULL;
+ if (getfield(&gecos, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_dir field */
+ dir = NULL;
+ if (getfield(&dir, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_shell field */
+ shell = NULL;
+ if (getfield(&shell, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ pw->pw_name = name;
+ pw->pw_passwd = pass;
+ pw->pw_uid = pwuid;
+ pw->pw_gid = pwgid;
+ pw->pw_gecos = gecos;
+ pw->pw_dir = dir;
+ pw->pw_shell = shell;
+
+#ifdef HAVE_PW_CHANGE
+ pw->pw_change = pwchange;
+#endif
+#ifdef HAVE_PW_CLASS
+ pw->pw_class = class;
+#endif
+#ifdef HAVE_PW_EXPIRE
+ pw->pw_expire = pwexpire;
+#endif
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (pass != NULL) free(pass);
+ if (gecos != NULL) free(gecos);
+ if (dir != NULL) free(dir);
+ if (shell != NULL) free(shell);
+
+ return (-1);
+}
+
+/* ------------------------- struct passwd ------------------------- */
+#endif /* WANT_IRS_PW */
+/* +++++++++++++++++++++++++ struct group +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_gr(const struct group *gr, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ */
+
+int
+irp_marshall_gr(const struct group *gr, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char grGid[24];
+ const char *fieldsep = COLONSTR;
+
+ if (gr == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(grGid, "%ld", (long)gr->gr_gid);
+
+ need += strlen(gr->gr_name) + 1;
+#ifndef MISSING_GR_PASSWD
+ need += strlen(gr->gr_passwd) + 1;
+#else
+ need++;
+#endif
+ need += strlen(grGid) + 1;
+ need += joinlength(gr->gr_mem) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, gr->gr_name); strcat(*buffer, fieldsep);
+#ifndef MISSING_GR_PASSWD
+ strcat(*buffer, gr->gr_passwd);
+#endif
+ strcat(*buffer, fieldsep);
+ strcat(*buffer, grGid); strcat(*buffer, fieldsep);
+ joinarray(gr->gr_mem, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_gr(struct group *gr, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_gr(struct group *gr, char *buffer) {
+ char *p, *q;
+ gid_t grgid;
+ long t;
+ char *name = NULL;
+ char *pass = NULL;
+ char **members = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (gr == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* gr_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* gr_passwd field */
+ pass = NULL;
+ if (getfield(&pass, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+ /* gr_gid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ grgid = (gid_t)t;
+ if ((long) grgid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+ /* gr_mem field. Member names are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ members = splitarray(p, q, COMMA);
+ if (members == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ gr->gr_name = name;
+#ifndef MISSING_GR_PASSWD
+ gr->gr_passwd = pass;
+#endif
+ gr->gr_gid = grgid;
+ gr->gr_mem = members;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (pass != NULL) free(pass);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct group ------------------------- */
+
+
+
+
+/* +++++++++++++++++++++++++ struct servent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_sv(const struct servent *sv, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_sv(const struct servent *sv, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char svPort[24];
+ const char *fieldsep = COLONSTR;
+ short realport;
+
+ if (sv == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* the int s_port field is actually a short in network order. We
+ want host order to make the marshalled data look correct */
+ realport = ntohs((short)sv->s_port);
+ sprintf(svPort, "%d", realport);
+
+ need += strlen(sv->s_name) + 1;
+ need += joinlength(sv->s_aliases) + 1;
+ need += strlen(svPort) + 1;
+ need += strlen(sv->s_proto) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, sv->s_name); strcat(*buffer, fieldsep);
+ joinarray(sv->s_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, svPort); strcat(*buffer, fieldsep);
+ strcat(*buffer, sv->s_proto); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_sv(struct servent *sv, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_sv(struct servent *sv, char *buffer) {
+ char *p, *q;
+ short svport;
+ long t;
+ char *name = NULL;
+ char *proto = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (sv == NULL || buffer == NULL)
+ return (-1);
+
+ p = buffer;
+
+
+ /* s_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* s_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* s_port field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ svport = (short)t;
+ if ((long) svport != t) { /*%< value must have been too big. */
+ goto error;
+ }
+ svport = htons(svport);
+
+ /* s_proto field */
+ proto = NULL;
+ if (getfield(&proto, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+ sv->s_name = name;
+ sv->s_aliases = aliases;
+ sv->s_port = svport;
+ sv->s_proto = proto;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (proto != NULL) free(proto);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct servent ------------------------- */
+
+/* +++++++++++++++++++++++++ struct protoent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_pr(struct protoent *pr, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_pr(struct protoent *pr, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char prProto[24];
+ const char *fieldsep = COLONSTR;
+
+ if (pr == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(prProto, "%d", (int)pr->p_proto);
+
+ need += strlen(pr->p_name) + 1;
+ need += joinlength(pr->p_aliases) + 1;
+ need += strlen(prProto) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, pr->p_name); strcat(*buffer, fieldsep);
+ joinarray(pr->p_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, prProto); strcat(*buffer, fieldsep);
+
+ return (0);
+
+}
+
+/*%
+ * int irp_unmarshall_pr(struct protoent *pr, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ *
+ */
+
+int irp_unmarshall_pr(struct protoent *pr, char *buffer) {
+ char *p, *q;
+ int prproto;
+ long t;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (pr == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* p_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* p_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* p_proto field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ prproto = (int)t;
+ if ((long) prproto != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+ pr->p_name = name;
+ pr->p_aliases = aliases;
+ pr->p_proto = prproto;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+/* ------------------------- struct protoent ------------------------- */
+
+
+
+/* +++++++++++++++++++++++++ struct hostent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ho(struct hostent *ho, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_ho(struct hostent *ho, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char hoaddrtype[24];
+ char holength[24];
+ char **av;
+ char *p;
+ int addrlen;
+ int malloced = 0;
+ size_t remlen;
+ const char *fieldsep = "@";
+
+ if (ho == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ switch(ho->h_addrtype) {
+ case AF_INET:
+ strcpy(hoaddrtype, "AF_INET");
+ break;
+
+ case AF_INET6:
+ strcpy(hoaddrtype, "AF_INET6");
+ break;
+
+ default:
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(holength, "%d", ho->h_length);
+
+ need += strlen(ho->h_name) + 1;
+ need += joinlength(ho->h_aliases) + 1;
+ need += strlen(hoaddrtype) + 1;
+ need += strlen(holength) + 1;
+
+ /* we determine an upper bound on the string length needed, not an
+ exact length. */
+ addrlen = (ho->h_addrtype == AF_INET ? 16 : 46) ; /*%< XX other AF's?? */
+ for (av = ho->h_addr_list; av != NULL && *av != NULL ; av++)
+ need += addrlen;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ malloced = 1;
+ }
+
+ strcpy(*buffer, ho->h_name); strcat(*buffer, fieldsep);
+ joinarray(ho->h_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, hoaddrtype); strcat(*buffer, fieldsep);
+ strcat(*buffer, holength); strcat(*buffer, fieldsep);
+
+ p = *buffer + strlen(*buffer);
+ remlen = need - strlen(*buffer);
+ for (av = ho->h_addr_list ; av != NULL && *av != NULL ; av++) {
+ if (inet_ntop(ho->h_addrtype, *av, p, remlen) == NULL) {
+ goto error;
+ }
+ if (*(av + 1) != NULL)
+ strcat(p, COMMASTR);
+ remlen -= strlen(p);
+ p += strlen(p);
+ }
+ strcat(*buffer, fieldsep);
+
+ return (0);
+
+ error:
+ if (malloced) {
+ memput(*buffer, need);
+ }
+
+ return (-1);
+}
+
+/*%
+ * int irp_unmarshall_ho(struct hostent *ho, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_ho(struct hostent *ho, char *buffer) {
+ char *p, *q, *r;
+ int hoaddrtype;
+ int holength;
+ long t;
+ char *name;
+ char **aliases = NULL;
+ char **hohaddrlist = NULL;
+ size_t hoaddrsize;
+ char tmpbuf[24];
+ char *tb;
+ char **alist;
+ int addrcount;
+ char fieldsep = '@';
+ int myerrno = EINVAL;
+
+ if (ho == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* h_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* h_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ hoaddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ hoaddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* h_length field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ holength = (int)t;
+ if ((long) holength != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+ /* h_addr_list field */
+ q = strchr(p, fieldsep);
+ if (q == NULL)
+ goto error;
+
+ /* count how many addresss are in there */
+ if (q > p + 1) {
+ for (addrcount = 1, r = p ; r != q ; r++) {
+ if (*r == COMMA)
+ addrcount++;
+ }
+ } else {
+ addrcount = 0;
+ }
+
+ hoaddrsize = (addrcount + 1) * sizeof (char *);
+ hohaddrlist = malloc(hoaddrsize);
+ if (hohaddrlist == NULL) {
+ myerrno = ENOMEM;
+ goto error;
+ }
+
+ memset(hohaddrlist, 0x0, hoaddrsize);
+
+ alist = hohaddrlist;
+ for (t = 0, r = p ; r != q ; p = r + 1, t++) {
+ char saved;
+ while (r != q && *r != COMMA) r++;
+ saved = *r;
+ *r = 0x0;
+
+ alist[t] = malloc(hoaddrtype == AF_INET ? 4 : 16);
+ if (alist[t] == NULL) {
+ myerrno = ENOMEM;
+ goto error;
+ }
+
+ if (inet_pton(hoaddrtype, p, alist[t]) == -1)
+ goto error;
+ *r = saved;
+ }
+ alist[t] = NULL;
+
+ ho->h_name = name;
+ ho->h_aliases = aliases;
+ ho->h_addrtype = hoaddrtype;
+ ho->h_length = holength;
+ ho->h_addr_list = hohaddrlist;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(hohaddrlist, 0);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+/* ------------------------- struct hostent------------------------- */
+
+
+
+/* +++++++++++++++++++++++++ struct netgrp +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ng(const char *host, const char *user,
+ * const char *domain, char *buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See note for irp_marshall_ng_start
+ *
+ * return: \li
+ *
+ * 0 on success, 0 on failure.
+ *
+ */
+
+int
+irp_marshall_ng(const char *host, const char *user, const char *domain,
+ char **buffer, size_t *len) {
+ size_t need = 1; /*%< for nul byte */
+ const char *fieldsep = ",";
+
+ if (len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ need += 4; /*%< two parens and two commas */
+ need += (host == NULL ? 0 : strlen(host));
+ need += (user == NULL ? 0 : strlen(user));
+ need += (domain == NULL ? 0 : strlen(domain));
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ } else if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ (*buffer)[0] = '(';
+ (*buffer)[1] = '\0';
+
+ if (host != NULL)
+ strcat(*buffer, host);
+ strcat(*buffer, fieldsep);
+
+ if (user != NULL)
+ strcat(*buffer, user);
+ strcat(*buffer, fieldsep);
+
+ if (domain != NULL)
+ strcat(*buffer, domain);
+ strcat(*buffer, ")");
+
+ return (0);
+}
+
+
+
+/* ---------- */
+
+/*%
+ * int irp_unmarshall_ng(const char **host, const char **user,
+ * const char **domain, char *buffer)
+ *
+ * notes: \li
+ *
+ * Unpacks the BUFFER into 3 character arrays it allocates and assigns
+ * to *HOST, *USER and *DOMAIN. If any field of the value is empty,
+ * then the corresponding paramater value will be set to NULL.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ */
+
+int
+irp_unmarshall_ng(const char **hostp, const char **userp, const char **domainp,
+ char *buffer)
+{
+ char *p, *q;
+ char fieldsep = ',';
+ int myerrno = EINVAL;
+ char *host, *user, *domain;
+
+ if (userp == NULL || hostp == NULL ||
+ domainp == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ host = user = domain = NULL;
+
+ p = buffer;
+ while (isspace((unsigned char)*p)) {
+ p++;
+ }
+ if (*p != '(') {
+ goto error;
+ }
+
+ q = p + 1;
+ while (*q && *q != fieldsep)
+ q++;
+ if (!*q) {
+ goto error;
+ } else if (q > p + 1) {
+ host = strndup(p, q - p);
+ }
+
+ p = q + 1;
+ if (!*p) {
+ goto error;
+ } else if (*p != fieldsep) {
+ q = p + 1;
+ while (*q && *q != fieldsep)
+ q++;
+ if (!*q) {
+ goto error;
+ }
+ user = strndup(p, q - p);
+ } else {
+ p++;
+ }
+
+ if (!*p) {
+ goto error;
+ } else if (*p != ')') {
+ q = p + 1;
+ while (*q && *q != ')')
+ q++;
+ if (!*q) {
+ goto error;
+ }
+ domain = strndup(p, q - p);
+ }
+ *hostp = host;
+ *userp = user;
+ *domainp = domain;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (host != NULL) free(host);
+ if (user != NULL) free(user);
+
+ return (-1);
+}
+
+/* ------------------------- struct netgrp ------------------------- */
+
+
+
+
+/* +++++++++++++++++++++++++ struct nwent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_nw(struct nwent *ne, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See at top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_nw(struct nwent *ne, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char nAddrType[24];
+ char nNet[MAXPADDRSIZE];
+ const char *fieldsep = COLONSTR;
+
+ if (ne == NULL || len == NULL) {
+ return (-1);
+ }
+
+ strcpy(nAddrType, ADDR_T_STR(ne->n_addrtype));
+
+ if (inet_net_ntop(ne->n_addrtype, ne->n_addr, ne->n_length,
+ nNet, sizeof nNet) == NULL) {
+ return (-1);
+ }
+
+
+ need += strlen(ne->n_name) + 1;
+ need += joinlength(ne->n_aliases) + 1;
+ need += strlen(nAddrType) + 1;
+ need += strlen(nNet) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, ne->n_name); strcat(*buffer, fieldsep);
+ joinarray(ne->n_aliases, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+ strcat(*buffer, nAddrType); strcat(*buffer, fieldsep);
+ strcat(*buffer, nNet); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_nw(struct nwent *ne, char *buffer)
+ *
+ * notes: \li
+ *
+ * See note up top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_nw(struct nwent *ne, char *buffer) {
+ char *p, *q;
+ int naddrtype;
+ long nnet;
+ int bits;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (ne == NULL || buffer == NULL) {
+ goto error;
+ }
+
+ p = buffer;
+
+ /* n_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* n_aliases field. Aliases are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ naddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ naddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* n_net field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ nnet = 0;
+ bits = inet_net_pton(naddrtype, tmpbuf, &nnet, sizeof nnet);
+ if (bits < 0) {
+ goto error;
+ }
+
+ /* nnet = ntohl(nnet); */ /* keep in network order for nwent */
+
+ ne->n_name = name;
+ ne->n_aliases = aliases;
+ ne->n_addrtype = naddrtype;
+ ne->n_length = bits;
+ ne->n_addr = malloc(sizeof nnet);
+ if (ne->n_addr == NULL) {
+ goto error;
+ }
+
+ memcpy(ne->n_addr, &nnet, sizeof nnet);
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct nwent ------------------------- */
+
+
+/* +++++++++++++++++++++++++ struct netent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ne(struct netent *ne, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See at top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_ne(struct netent *ne, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char nAddrType[24];
+ char nNet[MAXPADDRSIZE];
+ const char *fieldsep = COLONSTR;
+ long nval;
+
+ if (ne == NULL || len == NULL) {
+ return (-1);
+ }
+
+ strcpy(nAddrType, ADDR_T_STR(ne->n_addrtype));
+
+ nval = htonl(ne->n_net);
+ if (inet_ntop(ne->n_addrtype, &nval, nNet, sizeof nNet) == NULL) {
+ return (-1);
+ }
+
+ need += strlen(ne->n_name) + 1;
+ need += joinlength(ne->n_aliases) + 1;
+ need += strlen(nAddrType) + 1;
+ need += strlen(nNet) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, ne->n_name); strcat(*buffer, fieldsep);
+ joinarray(ne->n_aliases, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+ strcat(*buffer, nAddrType); strcat(*buffer, fieldsep);
+ strcat(*buffer, nNet); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_ne(struct netent *ne, char *buffer)
+ *
+ * notes: \li
+ *
+ * See note up top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_ne(struct netent *ne, char *buffer) {
+ char *p, *q;
+ int naddrtype;
+ long nnet;
+ int bits;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (ne == NULL || buffer == NULL) {
+ goto error;
+ }
+
+ p = buffer;
+
+ /* n_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* n_aliases field. Aliases are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ naddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ naddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* n_net field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ bits = inet_net_pton(naddrtype, tmpbuf, &nnet, sizeof nnet);
+ if (bits < 0) {
+ goto error;
+ }
+ nnet = ntohl(nnet);
+
+ ne->n_name = name;
+ ne->n_aliases = aliases;
+ ne->n_addrtype = naddrtype;
+ ne->n_net = nnet;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct netent ------------------------- */
+
+
+/* =========================================================================== */
+
+/*%
+ * static char ** splitarray(const char *buffer, const char *buffend, char delim)
+ *
+ * notes: \li
+ *
+ * Split a delim separated astring. Not allowed
+ * to have two delims next to each other. BUFFER points to begining of
+ * string, BUFFEND points to one past the end of the string
+ * (i.e. points at where the null byte would be if null
+ * terminated).
+ *
+ * return: \li
+ *
+ * Returns a malloced array of pointers, each pointer pointing to a
+ * malloced string. If BUFEER is an empty string, then return values is
+ * array of 1 pointer that is NULL. Returns NULL on failure.
+ *
+ */
+
+static char **
+splitarray(const char *buffer, const char *buffend, char delim) {
+ const char *p, *q;
+ int count = 0;
+ char **arr = NULL;
+ char **aptr;
+
+ if (buffend < buffer)
+ return (NULL);
+ else if (buffend > buffer && *buffer == delim)
+ return (NULL);
+ else if (buffend > buffer && *(buffend - 1) == delim)
+ return (NULL);
+
+ /* count the number of field and make sure none are empty */
+ if (buffend > buffer + 1) {
+ for (count = 1, q = buffer ; q != buffend ; q++) {
+ if (*q == delim) {
+ if (q > buffer && (*(q - 1) == delim)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ count++;
+ }
+ }
+ }
+
+ if (count > 0) {
+ count++ ; /*%< for NULL at end */
+ aptr = arr = malloc(count * sizeof (char *));
+ if (aptr == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+
+ memset(arr, 0x0, count * sizeof (char *));
+ for (p = buffer ; p < buffend ; p++) {
+ for (q = p ; *q != delim && q != buffend ; q++)
+ /* nothing */;
+ *aptr = strndup(p, q - p);
+
+ p = q;
+ aptr++;
+ }
+ *aptr = NULL;
+ } else {
+ arr = malloc(sizeof (char *));
+ if (arr == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+
+ *arr = NULL;
+ }
+
+ return (arr);
+}
+
+/*%
+ * static size_t joinlength(char * const *argv)
+ *
+ * return: \li
+ *
+ * the number of bytes in all the arrays pointed at
+ * by argv, including their null bytes(which will usually be turned
+ * into commas).
+ *
+ *
+ */
+
+static size_t
+joinlength(char * const *argv) {
+ int len = 0;
+
+ while (argv && *argv) {
+ len += (strlen(*argv) + 1);
+ argv++;
+ }
+
+ return (len);
+}
+
+/*%
+ * int joinarray(char * const *argv, char *buffer, char delim)
+ *
+ * notes: \li
+ *
+ * Copy all the ARGV strings into the end of BUFFER
+ * separating them with DELIM. BUFFER is assumed to have
+ * enough space to hold everything and to be already null-terminated.
+ *
+ * return: \li
+ *
+ * 0 unless argv or buffer is NULL.
+ *
+ *
+ */
+
+static int
+joinarray(char * const *argv, char *buffer, char delim) {
+ char * const *p;
+ char sep[2];
+
+ if (argv == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sep[0] = delim;
+ sep[1] = 0x0;
+
+ for (p = argv ; *p != NULL ; p++) {
+ strcat(buffer, *p);
+ if (*(p + 1) != NULL) {
+ strcat(buffer, sep);
+ }
+ }
+
+ return (0);
+}
+
+/*%
+ * static char * getfield(char **res, size_t reslen, char **ptr, char delim)
+ *
+ * notes: \li
+ *
+ * Stores in *RES, which is a buffer of length RESLEN, a
+ * copy of the bytes from *PTR up to and including the first
+ * instance of DELIM. If *RES is NULL, then it will be
+ * assigned a malloced buffer to hold the copy. *PTR is
+ * modified to point at the found delimiter.
+ *
+ * return: \li
+ *
+ * If there was no delimiter, then NULL is returned,
+ * otherewise *RES is returned.
+ *
+ */
+
+static char *
+getfield(char **res, size_t reslen, char **ptr, char delim) {
+ char *q;
+
+ if (res == NULL || ptr == NULL || *ptr == NULL) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ q = strchr(*ptr, delim);
+
+ if (q == NULL) {
+ errno = EINVAL;
+ return (NULL);
+ } else {
+ if (*res == NULL) {
+ *res = strndup(*ptr, q - *ptr);
+ } else {
+ if ((size_t)(q - *ptr + 1) > reslen) { /*%< to big for res */
+ errno = EINVAL;
+ return (NULL);
+ } else {
+ strncpy(*res, *ptr, q - *ptr);
+ (*res)[q - *ptr] = 0x0;
+ }
+ }
+ *ptr = q + 1;
+ }
+
+ return (*res);
+}
+
+
+
+
+
+#ifndef HAVE_STRNDUP
+/*
+ * static char * strndup(const char *str, size_t len)
+ *
+ * notes: \li
+ *
+ * like strdup, except do len bytes instead of the whole string. Always
+ * null-terminates.
+ *
+ * return: \li
+ *
+ * The newly malloced string.
+ *
+ */
+
+static char *
+strndup(const char *str, size_t len) {
+ char *p = malloc(len + 1);
+
+ if (p == NULL)
+ return (NULL);
+ strncpy(p, str, len);
+ p[len] = 0x0;
+ return (p);
+}
+#endif
+
+#if WANT_MAIN
+
+/*%
+ * static int strcmp_nws(const char *a, const char *b)
+ *
+ * notes: \li
+ *
+ * do a strcmp, except uneven lengths of whitespace compare the same
+ *
+ * return: \li
+ *
+ */
+
+static int
+strcmp_nws(const char *a, const char *b) {
+ while (*a && *b) {
+ if (isspace(*a) && isspace(*b)) {
+ do {
+ a++;
+ } while (isspace(*a));
+ do {
+ b++;
+ } while (isspace(*b));
+ }
+ if (*a < *b)
+ return (-1);
+ else if (*a > *b)
+ return (1);
+
+ a++;
+ b++;;
+ }
+
+ if (*a == *b)
+ return (0);
+ else if (*a > *b)
+ return (1);
+ else
+ return (-1);
+}
+
+#endif
+
+/*%
+ * static void free_array(char **argv, size_t entries)
+ *
+ * notes: \li
+ *
+ * Free argv and each of the pointers inside it. The end of
+ * the array is when a NULL pointer is found inside. If
+ * entries is > 0, then NULL pointers inside the array do
+ * not indicate the end of the array.
+ *
+ */
+
+static void
+free_array(char **argv, size_t entries) {
+ char **p = argv;
+ int useEntries = (entries > 0U);
+
+ if (argv == NULL)
+ return;
+
+ while ((useEntries && entries > 0U) || *p) {
+ if (*p)
+ free(*p);
+ p++;
+ if (useEntries)
+ entries--;
+ }
+ free(argv);
+}
+
+
+
+
+
+/* ************************************************** */
+
+#if WANT_MAIN
+
+/*% takes an option to indicate what sort of marshalling(read the code) and
+ an argument. If the argument looks like a marshalled buffer(has a ':'
+ embedded) then it's unmarshalled and the remarshalled and the new string
+ is compared to the old one.
+*/
+
+int
+main(int argc, char **argv) {
+ char buffer[1024];
+ char *b = &buffer[0];
+ size_t len = sizeof buffer;
+ char option;
+
+ if (argc < 2 || argv[1][0] != '-')
+ exit(1);
+
+ option = argv[1][1];
+ argv++;
+ argc--;
+
+
+#if 0
+ {
+ char buff[10];
+ char *p = argv[1], *q = &buff[0];
+
+ while (getfield(&q, sizeof buff, &p, ':') != NULL) {
+ printf("field: \"%s\"\n", q);
+ p++;
+ }
+ printf("p is now \"%s\"\n", p);
+ }
+#endif
+
+#if 0
+ {
+ char **x = splitarray(argv[1], argv[1] + strlen(argv[1]),
+ argv[2][0]);
+ char **p;
+
+ if (x == NULL)
+ printf("split failed\n");
+
+ for (p = x ; p != NULL && *p != NULL ; p++) {
+ printf("\"%s\"\n", *p);
+ }
+ }
+#endif
+
+#if 1
+ switch(option) {
+ case 'n': {
+ struct nwent ne;
+ int i;
+
+ if (strchr(argv[1], ':') != NULL) {
+ if (irp_unmarshall_nw(&ne, argv[1]) != 0) {
+ printf("Unmarhsalling failed\n");
+ exit(1);
+ }
+
+ printf("Name: \"%s\"\n", ne.n_name);
+ printf("Aliases:");
+ for (i = 0 ; ne.n_aliases[i] != NULL ; i++)
+ printf("\n\t\"%s\"", ne.n_aliases[i]);
+ printf("\nAddrtype: %s\n", ADDR_T_STR(ne.n_addrtype));
+ inet_net_ntop(ne.n_addrtype, ne.n_addr, ne.n_length,
+ buffer, sizeof buffer);
+ printf("Net: \"%s\"\n", buffer);
+ *((long*)ne.n_addr) = htonl(*((long*)ne.n_addr));
+ inet_net_ntop(ne.n_addrtype, ne.n_addr, ne.n_length,
+ buffer, sizeof buffer);
+ printf("Corrected Net: \"%s\"\n", buffer);
+ } else {
+ struct netent *np1 = getnetbyname(argv[1]);
+ ne.n_name = np1->n_name;
+ ne.n_aliases = np1->n_aliases;
+ ne.n_addrtype = np1->n_addrtype;
+ ne.n_addr = &np1->n_net;
+ ne.n_length = (IN_CLASSA(np1->n_net) ?
+ 8 :
+ (IN_CLASSB(np1->n_net) ?
+ 16 :
+ (IN_CLASSC(np1->n_net) ?
+ 24 : -1)));
+ np1->n_net = htonl(np1->n_net);
+ if (irp_marshall_nw(&ne, &b, &len) != 0) {
+ printf("Marshalling failed\n");
+ }
+ printf("%s\n", b);
+ }
+ break;
+ }
+
+
+ case 'r': {
+ char **hosts, **users, **domains;
+ size_t entries;
+ int i;
+ char *buff;
+ size_t size;
+ char *ngname;
+
+ if (strchr(argv[1], '(') != NULL) {
+ if (irp_unmarshall_ng(&ngname, &entries,
+ &hosts, &users, &domains,
+ argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+#define STRVAL(x) (x == NULL ? "*" : x)
+
+ printf("%s {\n", ngname);
+ for (i = 0 ; i < entries ; i++)
+ printf("\t\"%s\" : \"%s\" : \"%s\"\n",
+ STRVAL(hosts[i]),
+ STRVAL(users[i]),
+ STRVAL(domains[i]));
+ printf("}\n\n\n");
+
+
+ irp_marshall_ng_start(ngname, NULL, &size);
+ for (i = 0 ; i < entries ; i++)
+ irp_marshall_ng_next(hosts[i], users[i],
+ domains[i], NULL, &size);
+ irp_marshall_ng_end(NULL, &size);
+
+ buff = malloc(size);
+
+ irp_marshall_ng_start(ngname, buff, &size);
+ for (i = 0 ; i < entries ; i++) {
+ if (irp_marshall_ng_next(hosts[i], users[i],
+ domains[i], buff,
+ &size) != 0)
+ printf("next marshalling failed.\n");
+ }
+ irp_marshall_ng_end(buff, &size);
+
+ if (strcmp_nws(argv[1], buff) != 0) {
+ printf("compare failed:\n\t%s\n\t%s\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ char *h, *u, *d, *buff;
+ size_t size;
+
+ /* run through two times. First to figure out how
+ much of a buffer we need. Second to do the
+ actual marshalling */
+
+ setnetgrent(argv[1]);
+ irp_marshall_ng_start(argv[1], NULL, &size);
+ while (getnetgrent(&h, &u, &d) == 1)
+ irp_marshall_ng_next(h, u, d, NULL, &size);
+ irp_marshall_ng_end(NULL, &size);
+ endnetgrent(argv[1]);
+
+ buff = malloc(size);
+
+ setnetgrent(argv[1]);
+ if (irp_marshall_ng_start(argv[1], buff, &size) != 0)
+ printf("Marshalling start failed\n");
+
+ while (getnetgrent(&h, &u, &d) == 1) {
+ if (irp_marshall_ng_next(h, u, d, buff, &size)
+ != 0) {
+ printf("Marshalling failed\n");
+ }
+ }
+
+ irp_marshall_ng_end(buff, &size);
+ endnetgrent();
+
+ printf("success: %s\n", buff);
+ }
+ break;
+ }
+
+
+
+ case 'h': {
+ struct hostent he, *hp;
+ int i;
+
+
+ if (strchr(argv[1], '@') != NULL) {
+ if (irp_unmarshall_ho(&he, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+ printf("Host: \"%s\"\nAliases:", he.h_name);
+ for (i = 0 ; he.h_aliases[i] != NULL ; i++)
+ printf("\n\t\t\"%s\"", he.h_aliases[i]);
+ printf("\nAddr Type: \"%s\"\n",
+ ADDR_T_STR(he.h_addrtype));
+ printf("Length: %d\nAddresses:", he.h_length);
+ for (i = 0 ; he.h_addr_list[i] != 0 ; i++) {
+ inet_ntop(he.h_addrtype, he.h_addr_list[i],
+ buffer, sizeof buffer);
+ printf("\n\t\"%s\"\n", buffer);
+ }
+ printf("\n\n");
+
+ irp_marshall_ho(&he, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((hp = gethostbyname(argv[1])) == NULL) {
+ perror("gethostbyname");
+ printf("\"%s\"\n", argv[1]);
+ exit(1);
+ }
+
+ if (irp_marshall_ho(hp, &b, &len) != 0) {
+ printf("irp_marshall_ho failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+
+ case 's': {
+ struct servent *sv;
+ struct servent sv1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ sv = &sv1;
+ memset(sv, 0xef, sizeof (struct servent));
+ if (irp_unmarshall_sv(sv, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+
+ }
+
+ irp_marshall_sv(sv, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((sv = getservbyname(argv[1], argv[2])) == NULL) {
+ perror("getservent");
+ exit(1);
+ }
+
+ if (irp_marshall_sv(sv, &b, &len) != 0) {
+ printf("irp_marshall_sv failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+ case 'g': {
+ struct group *gr;
+ struct group gr1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ gr = &gr1;
+ memset(gr, 0xef, sizeof (struct group));
+ if (irp_unmarshall_gr(gr, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+
+ }
+
+ irp_marshall_gr(gr, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((gr = getgrnam(argv[1])) == NULL) {
+ perror("getgrnam");
+ exit(1);
+ }
+
+ if (irp_marshall_gr(gr, &b, &len) != 0) {
+ printf("irp_marshall_gr failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+
+ case 'p': {
+ struct passwd *pw;
+ struct passwd pw1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ pw = &pw1;
+ memset(pw, 0xef, sizeof (*pw));
+ if (irp_unmarshall_pw(pw, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+ printf("User: \"%s\"\nPasswd: \"%s\"\nUid: %ld\nGid: %ld\n",
+ pw->pw_name, pw->pw_passwd, (long)pw->pw_uid,
+ (long)pw->pw_gid);
+ printf("Class: \"%s\"\nChange: %ld\nGecos: \"%s\"\n",
+ pw->pw_class, (long)pw->pw_change, pw->pw_gecos);
+ printf("Shell: \"%s\"\nDirectory: \"%s\"\n",
+ pw->pw_shell, pw->pw_dir);
+
+ pw = getpwnam(pw->pw_name);
+ irp_marshall_pw(pw, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((pw = getpwnam(argv[1])) == NULL) {
+ perror("getpwnam");
+ exit(1);
+ }
+
+ if (irp_marshall_pw(pw, &b, &len) != 0) {
+ printf("irp_marshall_pw failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+ default:
+ printf("Wrong option: %c\n", option);
+ break;
+ }
+
+#endif
+
+ return (0);
+}
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_data.c b/usr/src/lib/libresolv2_joy/common/irs/irs_data.c
new file mode 100644
index 0000000000..f12ca20f38
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_data.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irs_data.c,v 1.12 2007/08/27 03:32:26 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#ifndef __BIND_NOSTATIC
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <isc/memcluster.h>
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#endif
+
+#include <irs.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+#undef _res
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+#undef h_errno
+extern int h_errno;
+#endif
+
+extern struct __res_state _res;
+
+#ifdef DO_PTHREADS
+static pthread_key_t key;
+static int once = 0;
+#else
+static struct net_data *net_data;
+#endif
+
+void
+irs_destroy(void) {
+#ifndef DO_PTHREADS
+ if (net_data != NULL)
+ net_data_destroy(net_data);
+ net_data = NULL;
+#endif
+}
+
+void
+net_data_destroy(void *p) {
+ struct net_data *net_data = p;
+
+ res_ndestroy(net_data->res);
+ if (net_data->gr != NULL) {
+ (*net_data->gr->close)(net_data->gr);
+ net_data->gr = NULL;
+ }
+ if (net_data->pw != NULL) {
+ (*net_data->pw->close)(net_data->pw);
+ net_data->pw = NULL;
+ }
+ if (net_data->sv != NULL) {
+ (*net_data->sv->close)(net_data->sv);
+ net_data->sv = NULL;
+ }
+ if (net_data->pr != NULL) {
+ (*net_data->pr->close)(net_data->pr);
+ net_data->pr = NULL;
+ }
+ if (net_data->ho != NULL) {
+ (*net_data->ho->close)(net_data->ho);
+ net_data->ho = NULL;
+ }
+ if (net_data->nw != NULL) {
+ (*net_data->nw->close)(net_data->nw);
+ net_data->nw = NULL;
+ }
+ if (net_data->ng != NULL) {
+ (*net_data->ng->close)(net_data->ng);
+ net_data->ng = NULL;
+ }
+ if (net_data->ho_data != NULL) {
+ free(net_data->ho_data);
+ net_data->ho_data = NULL;
+ }
+ if (net_data->nw_data != NULL) {
+ free(net_data->nw_data);
+ net_data->nw_data = NULL;
+ }
+
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof *net_data);
+}
+
+/*%
+ * applications that need a specific config file other than
+ * _PATH_IRS_CONF should call net_data_init directly rather than letting
+ * the various wrapper functions make the first call. - brister
+ */
+
+struct net_data *
+net_data_init(const char *conf_file) {
+#ifdef DO_PTHREADS
+#ifndef LIBBIND_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+ static pthread_mutex_t keylock = LIBBIND_MUTEX_INITIALIZER;
+ struct net_data *net_data;
+
+ if (!once) {
+ if (pthread_mutex_lock(&keylock) != 0)
+ return (NULL);
+ if (!once) {
+ if (pthread_key_create(&key, net_data_destroy) != 0) {
+ (void)pthread_mutex_unlock(&keylock);
+ return (NULL);
+ }
+ once = 1;
+ }
+ if (pthread_mutex_unlock(&keylock) != 0)
+ return (NULL);
+ }
+ net_data = pthread_getspecific(key);
+#endif
+
+ if (net_data == NULL) {
+ net_data = net_data_create(conf_file);
+ if (net_data == NULL)
+ return (NULL);
+#ifdef DO_PTHREADS
+ if (pthread_setspecific(key, net_data) != 0) {
+ net_data_destroy(net_data);
+ return (NULL);
+ }
+#endif
+ }
+
+ return (net_data);
+}
+
+struct net_data *
+net_data_create(const char *conf_file) {
+ struct net_data *net_data;
+
+ net_data = memget(sizeof (struct net_data));
+ if (net_data == NULL)
+ return (NULL);
+ memset(net_data, 0, sizeof (struct net_data));
+
+ if ((net_data->irs = irs_gen_acc("", conf_file)) == NULL) {
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+#ifndef DO_PTHREADS
+ (*net_data->irs->res_set)(net_data->irs, &_res, NULL);
+#endif
+
+ net_data->res = (*net_data->irs->res_get)(net_data->irs);
+ if (net_data->res == NULL) {
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+
+ if ((net_data->res->options & RES_INIT) == 0U &&
+ res_ninit(net_data->res) == -1) {
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+
+ return (net_data);
+}
+
+void
+net_data_minimize(struct net_data *net_data) {
+ res_nclose(net_data->res);
+}
+
+#ifdef _REENTRANT
+struct __res_state *
+__res_state(void) {
+ /* NULL param here means use the default config file. */
+ struct net_data *net_data = net_data_init(NULL);
+ if (net_data && net_data->res)
+ return (net_data->res);
+
+ return (&_res);
+}
+#else
+#ifdef __linux
+struct __res_state *
+__res_state(void) {
+ return (&_res);
+}
+#endif
+#endif
+
+int *
+__h_errno(void) {
+ /* NULL param here means use the default config file. */
+ struct net_data *net_data = net_data_init(NULL);
+ if (net_data && net_data->res)
+ return (&net_data->res->res_h_errno);
+#ifdef ORIGINAL_ISC_CODE
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+ return(&_res.res_h_errno);
+#else
+ return (&h_errno);
+#endif
+#else
+ return (&h_errno);
+#endif /* ORIGINAL_ISC_CODE */
+}
+
+void
+__h_errno_set(struct __res_state *res, int err) {
+
+
+#if (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+ res->res_h_errno = err;
+#else
+ h_errno = res->res_h_errno = err;
+#endif
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_data.h b/usr/src/lib/libresolv2_joy/common/irs/irs_data.h
new file mode 100644
index 0000000000..cb814fd8b1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_data.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs_data.h,v 1.3 2005/04/27 04:56:30 sra Exp $
+ */
+
+#ifndef __BIND_NOSTATIC
+
+#define net_data_init __net_data_init
+
+struct net_data {
+ struct irs_acc * irs;
+
+ struct irs_gr * gr;
+ struct irs_pw * pw;
+ struct irs_sv * sv;
+ struct irs_pr * pr;
+ struct irs_ho * ho;
+ struct irs_nw * nw;
+ struct irs_ng * ng;
+
+ struct group * gr_last;
+ struct passwd * pw_last;
+ struct servent * sv_last;
+ struct protoent * pr_last;
+ struct netent * nw_last; /*%< should have been ne_last */
+ struct nwent * nww_last;
+ struct hostent * ho_last;
+
+ unsigned int gr_stayopen :1;
+ unsigned int pw_stayopen :1;
+ unsigned int sv_stayopen :1;
+ unsigned int pr_stayopen :1;
+ unsigned int ho_stayopen :1;
+ unsigned int nw_stayopen :1;
+
+ void * nw_data;
+ void * ho_data;
+
+ struct __res_state * res; /*%< for gethostent.c */
+};
+
+extern struct net_data * net_data_init(const char *conf_file);
+extern void net_data_minimize(struct net_data *);
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_p.h b/usr/src/lib/libresolv2_joy/common/irs/irs_p.h
new file mode 100644
index 0000000000..2a0a933fce
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_p.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs_p.h,v 1.3 2005/04/27 04:56:30 sra Exp $
+ */
+
+#ifndef _IRS_P_H_INCLUDED
+#define _IRS_P_H_INCLUDED
+
+#include <stdio.h>
+
+#include "pathnames.h"
+
+#define IRS_SV_MAXALIASES 35
+
+struct lcl_sv {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ struct servent serv;
+ char * serv_aliases[IRS_SV_MAXALIASES];
+};
+
+#define irs_nul_ng __irs_nul_ng
+#define map_v4v6_address __map_v4v6_address
+#define make_group_list __make_group_list
+#define irs_lclsv_fnxt __irs_lclsv_fnxt
+
+extern void map_v4v6_address(const char *src, char *dst);
+extern int make_group_list(struct irs_gr *, const char *,
+ gid_t, gid_t *, int *);
+extern struct irs_ng * irs_nul_ng(struct irs_acc *);
+extern struct servent * irs_lclsv_fnxt(struct lcl_sv *);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl.c b/usr/src/lib/libresolv2_joy/common/irs/lcl.c
new file mode 100644
index 0000000000..9dd6967b87
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: lcl.c,v 1.4 2005/04/27 04:56:30 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+/* Forward. */
+
+static void lcl_close(struct irs_acc *);
+static struct __res_state * lcl_res_get(struct irs_acc *);
+static void lcl_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_acc *
+irs_lcl_acc(const char *options) {
+ struct irs_acc *acc;
+ struct lcl_p *lcl;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(lcl = memget(sizeof *lcl))) {
+ errno = ENOMEM;
+ free(acc);
+ return (NULL);
+ }
+ memset(lcl, 0x5e, sizeof *lcl);
+ lcl->res = NULL;
+ lcl->free_res = NULL;
+ acc->private = lcl;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_lcl_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_lcl_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_lcl_sv;
+ acc->pr_map = irs_lcl_pr;
+ acc->ho_map = irs_lcl_ho;
+ acc->nw_map = irs_lcl_nw;
+ acc->ng_map = irs_lcl_ng;
+ acc->res_get = lcl_res_get;
+ acc->res_set = lcl_res_set;
+ acc->close = lcl_close;
+ return (acc);
+}
+
+/* Methods */
+static struct __res_state *
+lcl_res_get(struct irs_acc *this) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ lcl_res_set(this, res, free);
+ }
+
+ if ((lcl->res->options & RES_INIT) == 0U &&
+ res_ninit(lcl->res) < 0)
+ return (NULL);
+
+ return (lcl->res);
+}
+
+static void
+lcl_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl->res && lcl->free_res) {
+ res_nclose(lcl->res);
+ (*lcl->free_res)(lcl->res);
+ }
+
+ lcl->res = res;
+ lcl->free_res = free_res;
+}
+
+static void
+lcl_close(struct irs_acc *this) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl) {
+ if (lcl->free_res)
+ (*lcl->free_res)(lcl->res);
+ memput(lcl, sizeof *lcl);
+ }
+ memput(this, sizeof *this);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c
new file mode 100644
index 0000000000..17c9a5e725
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 1985, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* from gethostnamadr.c 8.1 (Berkeley) 6/4/93 */
+/* BIND Id: gethnamaddr.c,v 8.15 1996/05/22 04:56:30 vixie Exp $ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_ho.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+#include "lcl_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+#define Max(a,b) ((a) > (b) ? (a) : (b))
+
+#if PACKETSZ > 1024
+#define MAXPACKET PACKETSZ
+#else
+#define MAXPACKET 1024
+#endif
+
+struct pvt {
+ FILE * fp;
+ struct hostent host;
+ char * h_addr_ptrs[MAXADDRS + 1];
+ char * host_aliases[MAXALIASES];
+ char hostbuf[8*1024];
+ u_char host_addr[16]; /*%< IPv4 or IPv6 */
+ struct __res_state *res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ int32_t al;
+ char ac;
+} align;
+
+static const u_char mapped[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0xff,0xff };
+static const u_char tunnelled[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
+
+/* Forward. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static size_t ns_namelen(const char *);
+static int init(struct irs_ho *this);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public. */
+
+struct irs_ho *
+irs_lcl_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->fp)
+ (void) fclose(pvt->fp);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (pvt->res->options & RES_USE_INET6) {
+ hp = ho_byname2(this, name, AF_INET6);
+ if (hp)
+ return (hp);
+ }
+ return (ho_byname2(this, name, AF_INET));
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+ char **hap;
+ size_t n;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ ho_rewind(this);
+ n = ns_namelen(name);
+ while ((hp = ho_next(this)) != NULL) {
+ size_t nn;
+
+ if (hp->h_addrtype != af)
+ continue;
+ nn = ns_namelen(hp->h_name);
+ if (strncasecmp(hp->h_name, name, Max(n, nn)) == 0)
+ goto found;
+ for (hap = hp->h_aliases; *hap; hap++) {
+ nn = ns_namelen(*hap);
+ if (strncasecmp(*hap, name, Max(n, nn)) == 0)
+ goto found;
+ }
+ }
+ found:
+ if (!hp) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (hp);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ const u_char *uaddr = addr;
+ struct hostent *hp;
+ int size;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (af == AF_INET6 && len == IN6ADDRSZ &&
+ (!memcmp(uaddr, mapped, sizeof mapped) ||
+ !memcmp(uaddr, tunnelled, sizeof tunnelled))) {
+ /* Unmap. */
+ addr = (const u_char *)addr + sizeof mapped;
+ uaddr += sizeof mapped;
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (size > len) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ /*
+ * Do the search.
+ */
+ ho_rewind(this);
+ while ((hp = ho_next(this)) != NULL) {
+ char **hap;
+
+ for (hap = hp->h_addr_list; *hap; hap++) {
+ const u_char *taddr = (const u_char *)*hap;
+ int taf = hp->h_addrtype;
+ int tlen = hp->h_length;
+
+ if (taf == AF_INET6 && tlen == IN6ADDRSZ &&
+ (!memcmp(taddr, mapped, sizeof mapped) ||
+ !memcmp(taddr, tunnelled, sizeof tunnelled))) {
+ /* Unmap. */
+ taddr += sizeof mapped;
+ taf = AF_INET;
+ tlen = INADDRSZ;
+ }
+ if (taf == af && tlen == len &&
+ !memcmp(taddr, uaddr, tlen))
+ goto found;
+ }
+ }
+ found:
+ if (!hp) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (hp);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *cp, **q, *p;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, af, len, bufsiz, offset;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (!pvt->fp)
+ ho_rewind(this);
+ if (!pvt->fp) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ bufp = pvt->hostbuf;
+ bufsiz = sizeof pvt->hostbuf;
+ offset = 0;
+ again:
+ if (!(p = fgets(bufp + offset, bufsiz - offset, pvt->fp))) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ if (dbuf)
+ free(dbuf);
+ return (NULL);
+ }
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+ if ((cp = strpbrk(p, "#\n")) != NULL)
+ *cp = '\0';
+ if (!(cp = strpbrk(p, " \t")))
+ goto again;
+ *cp++ = '\0';
+ if (inet_pton(AF_INET6, p, pvt->host_addr) > 0) {
+ af = AF_INET6;
+ len = IN6ADDRSZ;
+ } else if (inet_aton(p, (struct in_addr *)pvt->host_addr) > 0) {
+ if (pvt->res->options & RES_USE_INET6) {
+ map_v4v6_address((char*)pvt->host_addr,
+ (char*)pvt->host_addr);
+ af = AF_INET6;
+ len = IN6ADDRSZ;
+ } else {
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ } else {
+ goto again;
+ }
+ pvt->h_addr_ptrs[0] = (char *)pvt->host_addr;
+ pvt->h_addr_ptrs[1] = NULL;
+ pvt->host.h_addr_list = pvt->h_addr_ptrs;
+ pvt->host.h_length = len;
+ pvt->host.h_addrtype = af;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ pvt->host.h_name = cp;
+ q = pvt->host.h_aliases = pvt->host_aliases;
+ if ((cp = strpbrk(cp, " \t")) != NULL)
+ *cp++ = '\0';
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->host_aliases[MAXALIASES - 1])
+ *q++ = cp;
+ if ((cp = strpbrk(cp, " \t")) != NULL)
+ *cp++ = '\0';
+ }
+ *q = NULL;
+ if (dbuf)
+ free(dbuf);
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (&pvt->host);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_HOSTS, "r")))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+struct lcl_res_target {
+ struct lcl_res_target *next;
+ int family;
+};
+
+/* XXX */
+extern struct addrinfo *hostent2addrinfo __P((struct hostent *,
+ const struct addrinfo *pai));
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+ struct lcl_res_target q, q2, *p;
+ struct addrinfo sentinel, *cur;
+
+ memset(&q, 0, sizeof(q2));
+ memset(&q2, 0, sizeof(q2));
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ switch(pai->ai_family) {
+ case AF_UNSPEC: /*%< INET6 then INET4 */
+ q.family = AF_INET6;
+ q.next = &q2;
+ q2.family = AF_INET;
+ break;
+ case AF_INET6:
+ q.family = AF_INET6;
+ break;
+ case AF_INET:
+ q.family = AF_INET;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY); /*%< ??? */
+ return(NULL);
+ }
+
+ for (p = &q; p; p = p->next) {
+ struct addrinfo *ai;
+
+ hp = (*this->byname2)(this, name, p->family);
+ if (hp == NULL) {
+ /* byname2 should've set an appropriate error */
+ continue;
+ }
+ if ((hp->h_name == NULL) || (hp->h_name[0] == 0) ||
+ (hp->h_addr_list[0] == NULL)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ continue;
+ }
+
+ ai = hostent2addrinfo(hp, pai);
+ if (ai) {
+ cur->ai_next = ai;
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ }
+ }
+
+ if (sentinel.ai_next == NULL)
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+
+ return(sentinel.ai_next);
+}
+
+/* Private. */
+
+static size_t
+ns_namelen(const char *s) {
+ int i;
+
+ for (i = strlen(s); i > 0 && s[i-1] == '.'; i--)
+ (void)NULL;
+ return ((size_t) i);
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c
new file mode 100644
index 0000000000..319725ce70
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: lcl_ng.c,v 1.3 2005/04/27 04:56:31 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+/* Definitions */
+
+#define NG_HOST 0 /*%< Host name */
+#define NG_USER 1 /*%< User name */
+#define NG_DOM 2 /*%< and Domain name */
+#define LINSIZ 1024 /*%< Length of netgroup file line */
+/*
+ * XXX Warning XXX
+ * This code is a hack-and-slash special. It realy needs to be
+ * rewritten with things like strdup, and realloc in mind.
+ * More reasonable data structures would not be a bad thing.
+ */
+
+/*%
+ * Static Variables and functions used by setnetgrent(), getnetgrent() and
+ * endnetgrent().
+ *
+ * There are two linked lists:
+ * \li linelist is just used by setnetgrent() to parse the net group file via.
+ * parse_netgrp()
+ * \li netgrp is the list of entries for the current netgroup
+ */
+struct linelist {
+ struct linelist *l_next; /*%< Chain ptr. */
+ int l_parsed; /*%< Flag for cycles */
+ char * l_groupname; /*%< Name of netgroup */
+ char * l_line; /*%< Netgroup entrie(s) to be parsed */
+};
+
+struct ng_old_struct {
+ struct ng_old_struct *ng_next; /*%< Chain ptr */
+ char * ng_str[3]; /*%< Field pointers, see below */
+};
+
+struct pvt {
+ FILE *fp;
+ struct linelist *linehead;
+ struct ng_old_struct *nextgrp;
+ struct {
+ struct ng_old_struct *gr;
+ char *grname;
+ } grouphead;
+};
+
+/* Forward */
+
+static void ng_rewind(struct irs_ng *, const char*);
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_minimize(struct irs_ng *);
+
+static int parse_netgrp(struct irs_ng *, const char*);
+static struct linelist *read_for_group(struct irs_ng *, const char *);
+static void freelists(struct irs_ng *);
+
+/* Public */
+
+struct irs_ng *
+irs_lcl_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL)
+ fclose(pvt->fp);
+ freelists(this);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * Parse the netgroup file looking for the netgroup and build the list
+ * of netgrp structures. Let parse_netgrp() and read_for_group() do
+ * most of the work.
+ */
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL && fseek(pvt->fp, SEEK_CUR, 0L) == -1) {
+ fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+
+ if (pvt->fp == NULL || pvt->grouphead.gr == NULL ||
+ strcmp(group, pvt->grouphead.grname)) {
+ freelists(this);
+ if (pvt->fp != NULL)
+ fclose(pvt->fp);
+ pvt->fp = fopen(_PATH_NETGROUP, "r");
+ if (pvt->fp != NULL) {
+ if (parse_netgrp(this, group))
+ freelists(this);
+ if (!(pvt->grouphead.grname = strdup(group)))
+ freelists(this);
+ fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+ }
+ pvt->nextgrp = pvt->grouphead.gr;
+}
+
+/*%
+ * Get the next netgroup off the list.
+ */
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->nextgrp) {
+ *host = pvt->nextgrp->ng_str[NG_HOST];
+ *user = pvt->nextgrp->ng_str[NG_USER];
+ *domain = pvt->nextgrp->ng_str[NG_DOM];
+ pvt->nextgrp = pvt->nextgrp->ng_next;
+ return (1);
+ }
+ return (0);
+}
+
+/*%
+ * Search for a match in a netgroup.
+ */
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *host, const char *user, const char *domain)
+{
+ const char *ng_host, *ng_user, *ng_domain;
+
+ ng_rewind(this, name);
+ while (ng_next(this, &ng_host, &ng_user, &ng_domain))
+ if ((host == NULL || ng_host == NULL ||
+ !strcmp(host, ng_host)) &&
+ (user == NULL || ng_user == NULL ||
+ !strcmp(user, ng_user)) &&
+ (domain == NULL || ng_domain == NULL ||
+ !strcmp(domain, ng_domain))) {
+ freelists(this);
+ return (1);
+ }
+ freelists(this);
+ return (0);
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+/* Private */
+
+/*%
+ * endnetgrent() - cleanup
+ */
+static void
+freelists(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct linelist *lp, *olp;
+ struct ng_old_struct *gp, *ogp;
+
+ lp = pvt->linehead;
+ while (lp) {
+ olp = lp;
+ lp = lp->l_next;
+ free(olp->l_groupname);
+ free(olp->l_line);
+ free((char *)olp);
+ }
+ pvt->linehead = NULL;
+ if (pvt->grouphead.grname) {
+ free(pvt->grouphead.grname);
+ pvt->grouphead.grname = NULL;
+ }
+ gp = pvt->grouphead.gr;
+ while (gp) {
+ ogp = gp;
+ gp = gp->ng_next;
+ if (ogp->ng_str[NG_HOST])
+ free(ogp->ng_str[NG_HOST]);
+ if (ogp->ng_str[NG_USER])
+ free(ogp->ng_str[NG_USER]);
+ if (ogp->ng_str[NG_DOM])
+ free(ogp->ng_str[NG_DOM]);
+ free((char *)ogp);
+ }
+ pvt->grouphead.gr = NULL;
+}
+
+/*%
+ * Parse the netgroup file setting up the linked lists.
+ */
+static int
+parse_netgrp(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *spos, *epos;
+ int len, strpos;
+ char *pos, *gpos;
+ struct ng_old_struct *grp;
+ struct linelist *lp = pvt->linehead;
+
+ /*
+ * First, see if the line has already been read in.
+ */
+ while (lp) {
+ if (!strcmp(group, lp->l_groupname))
+ break;
+ lp = lp->l_next;
+ }
+ if (lp == NULL &&
+ (lp = read_for_group(this, group)) == NULL)
+ return (1);
+ if (lp->l_parsed) {
+ /*fprintf(stderr, "Cycle in netgroup %s\n", lp->l_groupname);*/
+ return (1);
+ } else
+ lp->l_parsed = 1;
+ pos = lp->l_line;
+ while (*pos != '\0') {
+ if (*pos == '(') {
+ if (!(grp = malloc(sizeof (struct ng_old_struct)))) {
+ freelists(this);
+ errno = ENOMEM;
+ return (1);
+ }
+ memset(grp, 0, sizeof (struct ng_old_struct));
+ grp->ng_next = pvt->grouphead.gr;
+ pvt->grouphead.gr = grp;
+ pos++;
+ gpos = strsep(&pos, ")");
+ for (strpos = 0; strpos < 3; strpos++) {
+ if ((spos = strsep(&gpos, ","))) {
+ while (*spos == ' ' || *spos == '\t')
+ spos++;
+ if ((epos = strpbrk(spos, " \t"))) {
+ *epos = '\0';
+ len = epos - spos;
+ } else
+ len = strlen(spos);
+ if (len > 0) {
+ if(!(grp->ng_str[strpos]
+ = (char *)
+ malloc(len + 1))) {
+ freelists(this);
+ return (1);
+ }
+ memcpy(grp->ng_str[strpos],
+ spos,
+ len + 1);
+ }
+ } else
+ goto errout;
+ }
+ } else {
+ spos = strsep(&pos, ", \t");
+ if (spos != NULL && parse_netgrp(this, spos)) {
+ freelists(this);
+ return (1);
+ }
+ }
+ if (pos == NULL)
+ break;
+ while (*pos == ' ' || *pos == ',' || *pos == '\t')
+ pos++;
+ }
+ return (0);
+ errout:
+ /*fprintf(stderr, "Bad netgroup %s at ..%s\n", lp->l_groupname,
+ spos);*/
+ return (1);
+}
+
+/*%
+ * Read the netgroup file and save lines until the line for the netgroup
+ * is found. Return 1 if eof is encountered.
+ */
+static struct linelist *
+read_for_group(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *pos, *spos, *linep = NULL, *olinep;
+ int len, olen, cont;
+ struct linelist *lp;
+ char line[LINSIZ + 1];
+
+ while (fgets(line, LINSIZ, pvt->fp) != NULL) {
+ pos = line;
+ if (*pos == '#')
+ continue;
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+ spos = pos;
+ while (*pos != ' ' && *pos != '\t' && *pos != '\n' &&
+ *pos != '\0')
+ pos++;
+ len = pos - spos;
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+ if (*pos != '\n' && *pos != '\0') {
+ if (!(lp = malloc(sizeof (*lp)))) {
+ freelists(this);
+ return (NULL);
+ }
+ lp->l_parsed = 0;
+ if (!(lp->l_groupname = malloc(len + 1))) {
+ free(lp);
+ freelists(this);
+ return (NULL);
+ }
+ memcpy(lp->l_groupname, spos, len);
+ *(lp->l_groupname + len) = '\0';
+ len = strlen(pos);
+ olen = 0;
+ olinep = NULL;
+
+ /*
+ * Loop around handling line continuations.
+ */
+ do {
+ if (*(pos + len - 1) == '\n')
+ len--;
+ if (*(pos + len - 1) == '\\') {
+ len--;
+ cont = 1;
+ } else
+ cont = 0;
+ if (len > 0) {
+ if (!(linep = malloc(olen + len + 1))){
+ if (olen > 0)
+ free(olinep);
+ free(lp->l_groupname);
+ free(lp);
+ freelists(this);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ if (olen > 0) {
+ memcpy(linep, olinep, olen);
+ free(olinep);
+ }
+ memcpy(linep + olen, pos, len);
+ olen += len;
+ *(linep + olen) = '\0';
+ olinep = linep;
+ }
+ if (cont) {
+ if (fgets(line, LINSIZ, pvt->fp)) {
+ pos = line;
+ len = strlen(pos);
+ } else
+ cont = 0;
+ }
+ } while (cont);
+ lp->l_line = linep;
+ lp->l_next = pvt->linehead;
+ pvt->linehead = lp;
+
+ /*
+ * If this is the one we wanted, we are done.
+ */
+ if (!strcmp(lp->l_groupname, group))
+ return (lp);
+ }
+ }
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c
new file mode 100644
index 0000000000..2e5cd55a6c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_nw.c,v 1.4 2005/04/27 04:56:31 sra Exp $";
+/* from getgrent.c 8.2 (Berkeley) 3/21/94"; */
+/* from BSDI Id: getgrent.c,v 2.8 1996/05/28 18:15:14 bostic Exp $ */
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include <isc/misc.h>
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#define MAXALIASES 35
+#define MAXADDRSIZE 4
+
+struct pvt {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ struct nwent net;
+ char * aliases[MAXALIASES];
+ char addr[MAXADDRSIZE];
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static int init(struct irs_nw *this);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_nw *
+irs_lcl_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ if (pvt->fp)
+ (void)fclose(pvt->fp);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct nwent *p;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ nw_rewind(this);
+ while ((p = nw_next(this)) != NULL)
+ if (p->n_addrtype == type && p->n_length == length)
+ if (bitncmp(p->n_addr, net, length) == 0)
+ break;
+ return (p);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct nwent *p;
+ char **ap;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ nw_rewind(this);
+ while ((p = nw_next(this)) != NULL) {
+ if (ns_samename(p->n_name, name) == 1 &&
+ p->n_addrtype == type)
+ break;
+ for (ap = p->n_aliases; *ap; ap++)
+ if ((ns_samename(*ap, name) == 1) &&
+ (p->n_addrtype == type))
+ goto found;
+ }
+ found:
+ return (p);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_NETWORKS, "r")))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *ret = NULL;
+ char *p, *cp, **q;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, bufsiz, offset = 0;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ if (pvt->fp == NULL)
+ nw_rewind(this);
+ if (pvt->fp == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ bufp = pvt->line;
+ bufsiz = sizeof(pvt->line);
+
+ again:
+ p = fgets(bufp + offset, bufsiz - offset, pvt->fp);
+ if (p == NULL)
+ goto cleanup;
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+
+ cp = strpbrk(p, "#\n");
+ if (cp != NULL)
+ *cp = '\0';
+ pvt->net.n_name = p;
+ cp = strpbrk(p, " \t");
+ if (cp == NULL)
+ goto again;
+ *cp++ = '\0';
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ p = strpbrk(cp, " \t");
+ if (p != NULL)
+ *p++ = '\0';
+ pvt->net.n_length = inet_net_pton(AF_INET, cp, pvt->addr,
+ sizeof pvt->addr);
+ if (pvt->net.n_length < 0)
+ goto again;
+ pvt->net.n_addrtype = AF_INET;
+ pvt->net.n_addr = pvt->addr;
+ q = pvt->net.n_aliases = pvt->aliases;
+ if (p != NULL) {
+ cp = p;
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->aliases[MAXALIASES - 1])
+ *q++ = cp;
+ cp = strpbrk(cp, " \t");
+ if (cp != NULL)
+ *cp++ = '\0';
+ }
+ }
+ *q = NULL;
+ ret = &pvt->net;
+
+ cleanup:
+ if (dbuf)
+ free(dbuf);
+
+ return (ret);
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h b/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h
new file mode 100644
index 0000000000..e3f4f009cb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: lcl_p.h,v 1.3 2005/04/27 04:56:31 sra Exp $
+ */
+
+/*! \file
+ * \brief
+ * lcl_p.h - private include file for the local accessor functions.
+ */
+
+#ifndef _LCL_P_H_INCLUDED
+#define _LCL_P_H_INCLUDED
+
+/*%
+ * Object state.
+ */
+struct lcl_p {
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_lcl_acc __P((const char *));
+extern struct irs_gr * irs_lcl_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_lcl_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_lcl_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_lcl_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_lcl_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_lcl_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_lcl_ng __P((struct irs_acc *));
+
+#endif /*_LCL_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c
new file mode 100644
index 0000000000..e1538530eb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_pr.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#ifndef _PATH_PROTOCOLS
+#define _PATH_PROTOCOLS "/etc/protocols"
+#endif
+#define MAXALIASES 35
+
+/* Types */
+
+struct pvt {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ char * dbuf;
+ struct protoent proto;
+ char * proto_aliases[MAXALIASES];
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_pr *
+irs_lcl_pr(struct irs_acc *this) {
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *this);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ pr->res_get = NULL;
+ pr->res_set = NULL;
+ return (pr);
+}
+
+/* Methods */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp)
+ (void) fclose(pvt->fp);
+ if (pvt->dbuf)
+ free(pvt->dbuf);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+
+ struct protoent *p;
+ char **cp;
+
+ pr_rewind(this);
+ while ((p = pr_next(this))) {
+ if (!strcmp(p->p_name, name))
+ goto found;
+ for (cp = p->p_aliases; *cp; cp++)
+ if (!strcmp(*cp, name))
+ goto found;
+ }
+ found:
+ return (p);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct protoent *p;
+
+ pr_rewind(this);
+ while ((p = pr_next(this)))
+ if (p->p_proto == proto)
+ break;
+ return (p);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_PROTOCOLS, "r" )))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **q;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, bufsiz, offset;
+
+ if (!pvt->fp)
+ pr_rewind(this);
+ if (!pvt->fp)
+ return (NULL);
+ if (pvt->dbuf) {
+ free(pvt->dbuf);
+ pvt->dbuf = NULL;
+ }
+ bufp = pvt->line;
+ bufsiz = BUFSIZ;
+ offset = 0;
+ again:
+ if ((p = fgets(bufp + offset, bufsiz - offset, pvt->fp)) == NULL) {
+ if (dbuf)
+ free(dbuf);
+ return (NULL);
+ }
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+ cp = strpbrk(p, "#\n");
+ if (cp != NULL)
+ *cp = '\0';
+ pvt->proto.p_name = p;
+ cp = strpbrk(p, " \t");
+ if (cp == NULL)
+ goto again;
+ *cp++ = '\0';
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ p = strpbrk(cp, " \t");
+ if (p != NULL)
+ *p++ = '\0';
+ pvt->proto.p_proto = atoi(cp);
+ q = pvt->proto.p_aliases = pvt->proto_aliases;
+ if (p != NULL) {
+ cp = p;
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->proto_aliases[MAXALIASES - 1])
+ *q++ = cp;
+ cp = strpbrk(cp, " \t");
+ if (cp != NULL)
+ *cp++ = '\0';
+ }
+ }
+ *q = NULL;
+ pvt->dbuf = dbuf;
+ return (&pvt->proto);
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c
new file mode 100644
index 0000000000..ad6526430c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_sv.c,v 1.4 2005/04/27 04:56:31 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#ifdef IRS_LCL_SV_DB
+#include <db.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Types */
+
+struct pvt {
+#ifdef IRS_LCL_SV_DB
+ DB * dbh;
+ int dbf;
+#endif
+ struct lcl_sv sv;
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+/*global*/ struct servent * irs_lclsv_fnxt(struct lcl_sv *);
+#ifdef IRS_LCL_SV_DB
+static struct servent * sv_db_rec(struct lcl_sv *, DBT *, DBT *);
+#endif
+
+/* Portability */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_sv *
+irs_lcl_sv(struct irs_acc *this) {
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if ((sv = memget(sizeof *sv)) == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ if ((pvt = memget(sizeof *pvt)) == NULL) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+ sv->res_get = NULL;
+ sv->res_set = NULL;
+#ifdef IRS_LCL_SV_DB
+ pvt->dbf = R_FIRST;
+#endif
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL)
+ (*pvt->dbh->close)(pvt->dbh);
+#endif
+ if (pvt->sv.fp)
+ fclose(pvt->sv.fp);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+#ifdef IRS_LCL_SV_DB
+ struct pvt *pvt = (struct pvt *)this->private;
+#endif
+ struct servent *p;
+ char **cp;
+
+ sv_rewind(this);
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+
+ /* Note that (sizeof "/") == 2. */
+ if ((strlen(name) + sizeof "/" + proto ? strlen(proto) : 0)
+ > sizeof pvt->sv.line)
+ goto try_local;
+ key.data = pvt->sv.line;
+ key.size = SPRINTF((pvt->sv.line, "%s/%s", name,
+ proto ? proto : "")) + 1;
+ if (proto != NULL) {
+ if ((*pvt->dbh->get)(pvt->dbh, &key, &data, 0) != 0)
+ return (NULL);
+ } else if ((*pvt->dbh->seq)(pvt->dbh, &key, &data, R_CURSOR)
+ != 0)
+ return (NULL);
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+ try_local:
+#endif
+
+ while ((p = sv_next(this))) {
+ if (strcmp(name, p->s_name) == 0)
+ goto gotname;
+ for (cp = p->s_aliases; *cp; cp++)
+ if (strcmp(name, *cp) == 0)
+ goto gotname;
+ continue;
+ gotname:
+ if (proto == NULL || strcmp(p->s_proto, proto) == 0)
+ break;
+ }
+ return (p);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+#ifdef IRS_LCL_SV_DB
+ struct pvt *pvt = (struct pvt *)this->private;
+#endif
+ struct servent *p;
+
+ sv_rewind(this);
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+ u_short *ports;
+
+ ports = (u_short *)pvt->sv.line;
+ ports[0] = 0;
+ ports[1] = port;
+ key.data = ports;
+ key.size = sizeof(u_short) * 2;
+ if (proto && *proto) {
+ strncpy((char *)ports + key.size, proto,
+ BUFSIZ - key.size);
+ key.size += strlen((char *)ports + key.size) + 1;
+ if ((*pvt->dbh->get)(pvt->dbh, &key, &data, 0) != 0)
+ return (NULL);
+ } else {
+ if ((*pvt->dbh->seq)(pvt->dbh, &key, &data, R_CURSOR)
+ != 0)
+ return (NULL);
+ }
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+#endif
+ while ((p = sv_next(this))) {
+ if (p->s_port != port)
+ continue;
+ if (proto == NULL || strcmp(p->s_proto, proto) == 0)
+ break;
+ }
+ return (p);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->sv.fp) {
+ if (fseek(pvt->sv.fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+#ifdef IRS_LCL_SV_DB
+ pvt->dbf = R_FIRST;
+ if (pvt->dbh != NULL)
+ return;
+ pvt->dbh = dbopen(_PATH_SERVICES_DB, O_RDONLY,O_RDONLY,DB_BTREE, NULL);
+ if (pvt->dbh != NULL) {
+ if (fcntl((*pvt->dbh->fd)(pvt->dbh), F_SETFD, 1) < 0) {
+ (*pvt->dbh->close)(pvt->dbh);
+ pvt->dbh = NULL;
+ }
+ return;
+ }
+#endif
+ if ((pvt->sv.fp = fopen(_PATH_SERVICES, "r")) == NULL)
+ return;
+ if (fcntl(fileno(pvt->sv.fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh == NULL && pvt->sv.fp == NULL)
+#else
+ if (pvt->sv.fp == NULL)
+#endif
+ sv_rewind(this);
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+
+ while ((*pvt->dbh->seq)(pvt->dbh, &key, &data, pvt->dbf) == 0){
+ pvt->dbf = R_NEXT;
+ if (((char *)key.data)[0])
+ continue;
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+ }
+#endif
+
+ if (pvt->sv.fp == NULL)
+ return (NULL);
+ return (irs_lclsv_fnxt(&pvt->sv));
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ (*pvt->dbh->close)(pvt->dbh);
+ pvt->dbh = NULL;
+ }
+#endif
+ if (pvt->sv.fp != NULL) {
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+}
+
+/* Quasipublic. */
+
+struct servent *
+irs_lclsv_fnxt(struct lcl_sv *sv) {
+ char *p, *cp, **q;
+
+ again:
+ if ((p = fgets(sv->line, BUFSIZ, sv->fp)) == NULL)
+ return (NULL);
+ if (*p == '#')
+ goto again;
+ sv->serv.s_name = p;
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ goto again;
+ *p++ = '\0';
+ while (*p == ' ' || *p == '\t')
+ p++;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ goto again;
+ sv->serv.s_port = htons((u_short)strtol(p, &cp, 10));
+ if (cp == p || (*cp != '/' && *cp != ','))
+ goto again;
+ p = cp + 1;
+ sv->serv.s_proto = p;
+
+ q = sv->serv.s_aliases = sv->serv_aliases;
+
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+
+ while (*p == ' ' || *p == '\t') {
+ *p++ = '\0';
+ while (*p == ' ' || *p == '\t')
+ ++p;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ break;
+ if (q < &sv->serv_aliases[IRS_SV_MAXALIASES - 1])
+ *q++ = p;
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+ }
+
+ *p = '\0';
+ *q = NULL;
+ return (&sv->serv);
+}
+
+/* Private. */
+
+#ifdef IRS_LCL_SV_DB
+static struct servent *
+sv_db_rec(struct lcl_sv *sv, DBT *key, DBT *data) {
+ char *p, **q;
+ int n;
+
+ p = data->data;
+ p[data->size - 1] = '\0'; /*%< should be, but we depend on it */
+ if (((char *)key->data)[0] == '\0') {
+ if (key->size < sizeof(u_short)*2 || data->size < 2)
+ return (NULL);
+ sv->serv.s_port = ((u_short *)key->data)[1];
+ n = strlen(p) + 1;
+ if ((size_t)n > sizeof(sv->line)) {
+ n = sizeof(sv->line);
+ }
+ memcpy(sv->line, p, n);
+ sv->serv.s_name = sv->line;
+ if ((sv->serv.s_proto = strchr(sv->line, '/')) != NULL)
+ *(sv->serv.s_proto)++ = '\0';
+ p += n;
+ data->size -= n;
+ } else {
+ if (data->size < sizeof(u_short) + 1)
+ return (NULL);
+ if (key->size > sizeof(sv->line))
+ key->size = sizeof(sv->line);
+ ((char *)key->data)[key->size - 1] = '\0';
+ memcpy(sv->line, key->data, key->size);
+ sv->serv.s_name = sv->line;
+ if ((sv->serv.s_proto = strchr(sv->line, '/')) != NULL)
+ *(sv->serv.s_proto)++ = '\0';
+ sv->serv.s_port = *(u_short *)data->data;
+ p += sizeof(u_short);
+ data->size -= sizeof(u_short);
+ }
+ q = sv->serv.s_aliases = sv->serv_aliases;
+ while (data->size > 0 && q < &sv->serv_aliases[IRS_SV_MAXALIASES - 1]) {
+
+ *q++ = p;
+ n = strlen(p) + 1;
+ data->size -= n;
+ p += n;
+ }
+ *q = NULL;
+ return (&sv->serv);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nis.c b/usr/src/lib/libresolv2_joy/common/irs/nis.c
new file mode 100644
index 0000000000..ac1796543c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nis.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nis.c,v 1.3 2005/04/27 04:56:32 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#ifdef WANT_IRS_NIS
+
+#include <rpc/rpc.h>
+#include <rpc/xdr.h>
+#include <rpcsvc/yp_prot.h>
+#include <rpcsvc/ypclnt.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#ifdef T_NULL
+#undef T_NULL /* Silence re-definition warning of T_NULL. */
+#endif
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "nis_p.h"
+
+/* Forward */
+
+static void nis_close(struct irs_acc *);
+static struct __res_state * nis_res_get(struct irs_acc *);
+static void nis_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_acc *
+irs_nis_acc(const char *options) {
+ struct nis_p *nis;
+ struct irs_acc *acc;
+ char *domain;
+
+ UNUSED(options);
+
+ if (yp_get_default_domain(&domain) != 0)
+ return (NULL);
+ if (!(nis = memget(sizeof *nis))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nis, 0, sizeof *nis);
+ if (!(acc = memget(sizeof *acc))) {
+ memput(nis, sizeof *nis);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ acc->private = nis;
+ nis->domain = strdup(domain);
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_nis_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_nis_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_nis_sv;
+ acc->pr_map = irs_nis_pr;
+ acc->ho_map = irs_nis_ho;
+ acc->nw_map = irs_nis_nw;
+ acc->ng_map = irs_nis_ng;
+ acc->res_get = nis_res_get;
+ acc->res_set = nis_res_set;
+ acc->close = nis_close;
+ return (acc);
+}
+
+/* Methods */
+
+static struct __res_state *
+nis_res_get(struct irs_acc *this) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ nis_res_set(this, res, free);
+ }
+
+ if ((nis->res->options & RES_INIT) == 0 &&
+ res_ninit(nis->res) < 0)
+ return (NULL);
+
+ return (nis->res);
+}
+
+static void
+nis_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res && nis->free_res) {
+ res_nclose(nis->res);
+ (*nis->free_res)(nis->res);
+ }
+
+ nis->res = res;
+ nis->free_res = free_res;
+}
+
+static void
+nis_close(struct irs_acc *this) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res && nis->free_res)
+ (*nis->free_res)(nis->res);
+ free(nis->domain);
+ memput(nis, sizeof *nis);
+ memput(this, sizeof *this);
+}
+
+#endif /*WANT_IRS_NIS*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nis_p.h b/usr/src/lib/libresolv2_joy/common/irs/nis_p.h
new file mode 100644
index 0000000000..70e2948d67
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nis_p.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: nis_p.h,v 1.3 2005/04/27 04:56:33 sra Exp $
+ */
+
+/*! \file
+ * \brief
+ * nis_p.h - private include file for the NIS functions.
+ */
+
+/*%
+ * Object state.
+ */
+struct nis_p {
+ char * domain;
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+
+/*
+ * Methods.
+ */
+
+extern struct irs_gr * irs_nis_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_nis_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_nis_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_nis_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_nis_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_nis_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_nis_ng __P((struct irs_acc *));
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c b/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c
new file mode 100644
index 0000000000..504813bde1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nul_ng.c,v 1.3 2005/04/27 04:56:34 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * nul_ng.c - the netgroup accessor null map
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Forward. */
+
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *,
+ const char *, const char *,
+ const char *, const char *);
+static void ng_rewind(struct irs_ng *, const char *);
+static void ng_minimize(struct irs_ng *);
+
+/* Public. */
+
+struct irs_ng *
+irs_nul_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+
+ UNUSED(this);
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ ng->private = NULL;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods. */
+
+static void
+ng_close(struct irs_ng *this) {
+ memput(this, sizeof *this);
+}
+
+/* ARGSUSED */
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ UNUSED(this);
+ UNUSED(host);
+ UNUSED(user);
+ UNUSED(domain);
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *user, const char *host, const char *domain)
+{
+ UNUSED(this);
+ UNUSED(name);
+ UNUSED(user);
+ UNUSED(host);
+ UNUSED(domain);
+ errno = ENODEV;
+ return (-1);
+}
+
+static void
+ng_rewind(struct irs_ng *this, const char *netgroup) {
+ UNUSED(this);
+ UNUSED(netgroup);
+ /* NOOP */
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ UNUSED(this);
+ /* NOOP */
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/pathnames.h b/usr/src/lib/libresolv2_joy/common/irs/pathnames.h
new file mode 100644
index 0000000000..1646842155
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/pathnames.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: pathnames.h,v 1.3 2005/04/27 04:56:34 sra Exp $
+ */
+
+#ifndef _PATH_IRS_CONF
+#define _PATH_IRS_CONF "/etc/irs.conf"
+#endif
+
+#ifndef _PATH_NETWORKS
+#define _PATH_NETWORKS "/etc/networks"
+#endif
+
+#ifndef _PATH_GROUP
+#define _PATH_GROUP "/etc/group"
+#endif
+
+#ifndef _PATH_NETGROUP
+#define _PATH_NETGROUP "/etc/netgroup"
+#endif
+
+#ifndef _PATH_SERVICES
+#define _PATH_SERVICES "/etc/services"
+#endif
+
+#ifdef IRS_LCL_SV_DB
+#ifndef _PATH_SERVICES_DB
+#define _PATH_SERVICES_DB _PATH_SERVICES ".db"
+#endif
+#endif
+
+#ifndef _PATH_HESIOD_CONF
+#define _PATH_HESIOD_CONF "/etc/hesiod.conf"
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/util.c b/usr/src/lib/libresolv2_joy/common/irs/util.c
new file mode 100644
index 0000000000..9c70eabddc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/util.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: util.c,v 1.3 2005/04/27 04:56:34 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+void
+map_v4v6_address(const char *src, char *dst) {
+ u_char *p = (u_char *)dst;
+ char tmp[NS_INADDRSZ];
+ int i;
+
+ /* Stash a temporary copy so our caller can update in place. */
+ memcpy(tmp, src, NS_INADDRSZ);
+ /* Mark this ipv6 addr as a mapped ipv4. */
+ for (i = 0; i < 10; i++)
+ *p++ = 0x00;
+ *p++ = 0xff;
+ *p++ = 0xff;
+ /* Retrieve the saved copy and we're done. */
+ memcpy((void*)p, tmp, NS_INADDRSZ);
+}
+
+int
+make_group_list(struct irs_gr *this, const char *name,
+ gid_t basegid, gid_t *groups, int *ngroups)
+{
+ struct group *grp;
+ int i, ng;
+ int ret, maxgroups;
+
+ ret = -1;
+ ng = 0;
+ maxgroups = *ngroups;
+ /*
+ * When installing primary group, duplicate it;
+ * the first element of groups is the effective gid
+ * and will be overwritten when a setgid file is executed.
+ */
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = basegid;
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = basegid;
+ /*
+ * Scan the group file to find additional groups.
+ */
+ (*this->rewind)(this);
+ while ((grp = (*this->next)(this)) != NULL) {
+ if ((gid_t)grp->gr_gid == basegid)
+ continue;
+ for (i = 0; grp->gr_mem[i]; i++) {
+ if (!strcmp(grp->gr_mem[i], name)) {
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = grp->gr_gid;
+ break;
+ }
+ }
+ }
+ ret = 0;
+ done:
+ *ngroups = ng;
+ return (ret);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/assertions.c b/usr/src/lib/libresolv2_joy/common/isc/assertions.c
new file mode 100644
index 0000000000..b71e5a32d3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/assertions.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1997, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: assertions.c,v 1.5 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+/*
+ * Forward.
+ */
+
+static void default_assertion_failed(const char *, int, assertion_type,
+ const char *, int);
+
+/*
+ * Public.
+ */
+
+assertion_failure_callback __assertion_failed = default_assertion_failed;
+
+void
+set_assertion_failure_callback(assertion_failure_callback f) {
+ if (f == NULL)
+ __assertion_failed = default_assertion_failed;
+ else
+ __assertion_failed = f;
+}
+
+const char *
+assertion_type_to_text(assertion_type type) {
+ const char *result;
+
+ switch (type) {
+ case assert_require:
+ result = "REQUIRE";
+ break;
+ case assert_ensure:
+ result = "ENSURE";
+ break;
+ case assert_insist:
+ result = "INSIST";
+ break;
+ case assert_invariant:
+ result = "INVARIANT";
+ break;
+ default:
+ result = NULL;
+ }
+ return (result);
+}
+
+/*
+ * Private.
+ */
+
+/* coverity[+kill] */
+static void
+default_assertion_failed(const char *file, int line, assertion_type type,
+ const char *cond, int print_errno)
+{
+ fprintf(stderr, "%s:%d: %s(%s)%s%s failed.\n",
+ file, line, assertion_type_to_text(type), cond,
+ (print_errno) ? ": " : "",
+ (print_errno) ? strerror(errno) : "");
+ abort();
+ /* NOTREACHED */
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/base64.c b/usr/src/lib/libresolv2_joy/common/isc/base64.c
new file mode 100644
index 0000000000..79c35722b1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/base64.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: base64.c,v 1.4 2005/04/27 04:56:34 sra Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __b64_ntop = b64_ntop
+#pragma weak __b64_pton = b64_pton
+#endif /* ORIGINAL_ISC_CODE */
+
+#define Assert(Cond) if (!(Cond)) abort()
+
+static const char Base64[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char Pad64 = '=';
+
+/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
+ The following encoding technique is taken from RFC1521 by Borenstein
+ and Freed. It is reproduced here in a slightly edited form for
+ convenience.
+
+ A 65-character subset of US-ASCII is used, enabling 6 bits to be
+ represented per printable character. (The extra 65th character, "=",
+ is used to signify a special processing function.)
+
+ The encoding process represents 24-bit groups of input bits as output
+ strings of 4 encoded characters. Proceeding from left to right, a
+ 24-bit input group is formed by concatenating 3 8-bit input groups.
+ These 24 bits are then treated as 4 concatenated 6-bit groups, each
+ of which is translated into a single digit in the base64 alphabet.
+
+ Each 6-bit group is used as an index into an array of 64 printable
+ characters. The character referenced by the index is placed in the
+ output string.
+
+ Table 1: The Base64 Alphabet
+
+ Value Encoding Value Encoding Value Encoding Value Encoding
+ 0 A 17 R 34 i 51 z
+ 1 B 18 S 35 j 52 0
+ 2 C 19 T 36 k 53 1
+ 3 D 20 U 37 l 54 2
+ 4 E 21 V 38 m 55 3
+ 5 F 22 W 39 n 56 4
+ 6 G 23 X 40 o 57 5
+ 7 H 24 Y 41 p 58 6
+ 8 I 25 Z 42 q 59 7
+ 9 J 26 a 43 r 60 8
+ 10 K 27 b 44 s 61 9
+ 11 L 28 c 45 t 62 +
+ 12 M 29 d 46 u 63 /
+ 13 N 30 e 47 v
+ 14 O 31 f 48 w (pad) =
+ 15 P 32 g 49 x
+ 16 Q 33 h 50 y
+
+ Special processing is performed if fewer than 24 bits are available
+ at the end of the data being encoded. A full encoding quantum is
+ always completed at the end of a quantity. When fewer than 24 input
+ bits are available in an input group, zero bits are added (on the
+ right) to form an integral number of 6-bit groups. Padding at the
+ end of the data is performed using the '=' character.
+
+ Since all base64 input is an integral number of octets, only the
+ -------------------------------------------------
+ following cases can arise:
+
+ (1) the final quantum of encoding input is an integral
+ multiple of 24 bits; here, the final unit of encoded
+ output will be an integral multiple of 4 characters
+ with no "=" padding,
+ (2) the final quantum of encoding input is exactly 8 bits;
+ here, the final unit of encoded output will be two
+ characters followed by two "=" padding characters, or
+ (3) the final quantum of encoding input is exactly 16 bits;
+ here, the final unit of encoded output will be three
+ characters followed by one "=" padding character.
+ */
+
+int
+b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize) {
+ size_t datalength = 0;
+ u_char input[3];
+ u_char output[4];
+ size_t i;
+
+ while (2U < srclength) {
+ input[0] = *src++;
+ input[1] = *src++;
+ input[2] = *src++;
+ srclength -= 3;
+
+ output[0] = input[0] >> 2;
+ output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+ output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+ output[3] = input[2] & 0x3f;
+ Assert(output[0] < 64);
+ Assert(output[1] < 64);
+ Assert(output[2] < 64);
+ Assert(output[3] < 64);
+
+ if (datalength + 4 > targsize)
+ return (-1);
+ target[datalength++] = Base64[output[0]];
+ target[datalength++] = Base64[output[1]];
+ target[datalength++] = Base64[output[2]];
+ target[datalength++] = Base64[output[3]];
+ }
+
+ /* Now we worry about padding. */
+ if (0U != srclength) {
+ /* Get what's left. */
+ input[0] = input[1] = input[2] = '\0';
+ for (i = 0; i < srclength; i++)
+ input[i] = *src++;
+
+ output[0] = input[0] >> 2;
+ output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+ output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+ Assert(output[0] < 64);
+ Assert(output[1] < 64);
+ Assert(output[2] < 64);
+
+ if (datalength + 4 > targsize)
+ return (-1);
+ target[datalength++] = Base64[output[0]];
+ target[datalength++] = Base64[output[1]];
+ if (srclength == 1U)
+ target[datalength++] = Pad64;
+ else
+ target[datalength++] = Base64[output[2]];
+ target[datalength++] = Pad64;
+ }
+ if (datalength >= targsize)
+ return (-1);
+ target[datalength] = '\0'; /*%< Returned value doesn't count \\0. */
+ return (datalength);
+}
+
+/* skips all whitespace anywhere.
+ converts characters, four at a time, starting at (or after)
+ src from base - 64 numbers into three 8 bit bytes in the target area.
+ it returns the number of data bytes stored at the target, or -1 on error.
+ */
+
+int
+b64_pton(src, target, targsize)
+ char const *src;
+ u_char *target;
+ size_t targsize;
+{
+ int tarindex, state, ch;
+ char *pos;
+
+ state = 0;
+ tarindex = 0;
+
+ while ((ch = *src++) != '\0') {
+ if (isspace(ch)) /*%< Skip whitespace anywhere. */
+ continue;
+
+ if (ch == Pad64)
+ break;
+
+ pos = strchr(Base64, ch);
+ if (pos == 0) /*%< A non-base64 character. */
+ return (-1);
+
+ switch (state) {
+ case 0:
+ if (target) {
+ if ((size_t)tarindex >= targsize)
+ return (-1);
+ target[tarindex] = (pos - Base64) << 2;
+ }
+ state = 1;
+ break;
+ case 1:
+ if (target) {
+ if ((size_t)tarindex + 1 >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64) >> 4;
+ target[tarindex+1] = ((pos - Base64) & 0x0f)
+ << 4 ;
+ }
+ tarindex++;
+ state = 2;
+ break;
+ case 2:
+ if (target) {
+ if ((size_t)tarindex + 1 >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64) >> 2;
+ target[tarindex+1] = ((pos - Base64) & 0x03)
+ << 6;
+ }
+ tarindex++;
+ state = 3;
+ break;
+ case 3:
+ if (target) {
+ if ((size_t)tarindex >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64);
+ }
+ tarindex++;
+ state = 0;
+ break;
+ default:
+ abort();
+ }
+ }
+
+ /*
+ * We are done decoding Base-64 chars. Let's see if we ended
+ * on a byte boundary, and/or with erroneous trailing characters.
+ */
+
+ if (ch == Pad64) { /*%< We got a pad char. */
+ ch = *src++; /*%< Skip it, get next. */
+ switch (state) {
+ case 0: /*%< Invalid = in first position */
+ case 1: /*%< Invalid = in second position */
+ return (-1);
+
+ case 2: /*%< Valid, means one byte of info */
+ /* Skip any number of spaces. */
+ for ((void)NULL; ch != '\0'; ch = *src++)
+ if (!isspace(ch))
+ break;
+ /* Make sure there is another trailing = sign. */
+ if (ch != Pad64)
+ return (-1);
+ ch = *src++; /*%< Skip the = */
+ /* Fall through to "single trailing =" case. */
+ /* FALLTHROUGH */
+
+ case 3: /*%< Valid, means two bytes of info */
+ /*
+ * We know this char is an =. Is there anything but
+ * whitespace after it?
+ */
+ for ((void)NULL; ch != '\0'; ch = *src++)
+ if (!isspace(ch))
+ return (-1);
+
+ /*
+ * Now make sure for cases 2 and 3 that the "extra"
+ * bits that slopped past the last full byte were
+ * zeros. If we don't check them, they become a
+ * subliminal channel.
+ */
+ if (target && target[tarindex] != 0)
+ return (-1);
+ }
+ } else {
+ /*
+ * We ended by seeing the end of the string. Make sure we
+ * have no partial bytes lying around.
+ */
+ if (state != 0)
+ return (-1);
+ }
+
+ return (tarindex);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c b/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c
new file mode 100644
index 0000000000..efe5009292
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: bitncmp.c,v 1.5 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#include <isc/misc.h>
+
+/*%
+ * int
+ * bitncmp(l, r, n)
+ * compare bit masks l and r, for n bits.
+ * return:
+ * -1, 1, or 0 in the libc tradition.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+int
+bitncmp(const void *l, const void *r, int n) {
+ u_int lb, rb;
+ int x, b;
+
+ b = n / 8;
+ x = memcmp(l, r, b);
+ if (x || (n % 8) == 0)
+ return (x);
+
+ lb = ((const u_char *)l)[b];
+ rb = ((const u_char *)r)[b];
+ for (b = n % 8; b > 0; b--) {
+ if ((lb & 0x80) != (rb & 0x80)) {
+ if (lb & 0x80)
+ return (1);
+ return (-1);
+ }
+ lb <<= 1;
+ rb <<= 1;
+ }
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c
new file mode 100644
index 0000000000..f71001a6d4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_clnt.c,v 1.11 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+
+#include <isc/assertions.h>
+#include <isc/ctl.h>
+#include <isc/eventlib.h>
+#include <isc/list.h>
+#include <isc/memcluster.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+
+/* Macros. */
+
+#define donefunc_p(ctx) ((ctx).donefunc != NULL)
+#define arpacode_p(line) (isdigit((unsigned char)(line[0])) && \
+ isdigit((unsigned char)(line[1])) && \
+ isdigit((unsigned char)(line[2])))
+#define arpacont_p(line) (line[3] == '-')
+#define arpadone_p(line) (line[3] == ' ' || line[3] == '\t' || \
+ line[3] == '\r' || line[3] == '\0')
+
+/* Types. */
+
+enum state {
+ initializing = 0, connecting, connected, destroyed
+};
+
+struct ctl_tran {
+ LINK(struct ctl_tran) link;
+ LINK(struct ctl_tran) wlink;
+ struct ctl_cctx * ctx;
+ struct ctl_buf outbuf;
+ ctl_clntdone donefunc;
+ void * uap;
+};
+
+struct ctl_cctx {
+ enum state state;
+ evContext ev;
+ int sock;
+ ctl_logfunc logger;
+ ctl_clntdone donefunc;
+ void * uap;
+ evConnID coID;
+ evTimerID tiID;
+ evFileID rdID;
+ evStreamID wrID;
+ struct ctl_buf inbuf;
+ struct timespec timeout;
+ LIST(struct ctl_tran) tran;
+ LIST(struct ctl_tran) wtran;
+};
+
+/* Forward. */
+
+static struct ctl_tran *new_tran(struct ctl_cctx *, ctl_clntdone, void *, int);
+static void start_write(struct ctl_cctx *);
+static void destroy(struct ctl_cctx *, int);
+static void error(struct ctl_cctx *);
+static void new_state(struct ctl_cctx *, enum state);
+static void conn_done(evContext, void *, int,
+ const void *, int,
+ const void *, int);
+static void write_done(evContext, void *, int, int);
+static void start_read(struct ctl_cctx *);
+static void stop_read(struct ctl_cctx *);
+static void readable(evContext, void *, int, int);
+static void start_timer(struct ctl_cctx *);
+static void stop_timer(struct ctl_cctx *);
+static void touch_timer(struct ctl_cctx *);
+static void timer(evContext, void *,
+ struct timespec, struct timespec);
+
+#ifndef HAVE_MEMCHR
+static void *
+memchr(const void *b, int c, size_t len) {
+ const unsigned char *p = b;
+ size_t i;
+
+ for (i = 0; i < len; i++, p++)
+ if (*p == (unsigned char)c)
+ return ((void *)p);
+ return (NULL);
+}
+#endif
+
+/* Private data. */
+
+static const char * const state_names[] = {
+ "initializing", "connecting", "connected", "destroyed"
+};
+
+/* Public. */
+
+/*%
+ * void
+ * ctl_client()
+ * create, condition, and connect to a listener on the control port.
+ */
+struct ctl_cctx *
+ctl_client(evContext lev, const struct sockaddr *cap, size_t cap_len,
+ const struct sockaddr *sap, size_t sap_len,
+ ctl_clntdone donefunc, void *uap,
+ u_int timeout, ctl_logfunc logger)
+{
+ static const char me[] = "ctl_client";
+ static const int on = 1;
+ struct ctl_cctx *ctx;
+ struct sockaddr *captmp;
+
+ if (logger == NULL)
+ logger = ctl_logger;
+ ctx = memget(sizeof *ctx);
+ if (ctx == NULL) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ goto fatal;
+ }
+ ctx->state = initializing;
+ ctx->ev = lev;
+ ctx->logger = logger;
+ ctx->timeout = evConsTime(timeout, 0);
+ ctx->donefunc = donefunc;
+ ctx->uap = uap;
+ ctx->coID.opaque = NULL;
+ ctx->tiID.opaque = NULL;
+ ctx->rdID.opaque = NULL;
+ ctx->wrID.opaque = NULL;
+ buffer_init(ctx->inbuf);
+ INIT_LIST(ctx->tran);
+ INIT_LIST(ctx->wtran);
+ ctx->sock = socket(sap->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (ctx->sock > evHighestFD(ctx->ev)) {
+ ctx->sock = -1;
+ errno = ENOTSOCK;
+ }
+ if (ctx->sock < 0) {
+ (*ctx->logger)(ctl_error, "%s: socket: %s",
+ me, strerror(errno));
+ goto fatal;
+ }
+ if (cap != NULL) {
+ if (setsockopt(ctx->sock, SOL_SOCKET, SO_REUSEADDR,
+ (const char *)&on, sizeof on) != 0) {
+ (*ctx->logger)(ctl_warning,
+ "%s: setsockopt(REUSEADDR): %s",
+ me, strerror(errno));
+ }
+ DE_CONST(cap, captmp);
+ if (bind(ctx->sock, captmp, cap_len) < 0) {
+ (*ctx->logger)(ctl_error, "%s: bind: %s", me,
+ strerror(errno));
+ goto fatal;
+ }
+ }
+ if (evConnect(lev, ctx->sock, (const struct sockaddr *)sap, sap_len,
+ conn_done, ctx, &ctx->coID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evConnect(fd %d): %s",
+ me, ctx->sock, strerror(errno));
+ fatal:
+ if (ctx != NULL) {
+ if (ctx->sock >= 0)
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ }
+ return (NULL);
+ }
+ new_state(ctx, connecting);
+ return (ctx);
+}
+
+/*%
+ * void
+ * ctl_endclient(ctx)
+ * close a client and release all of its resources.
+ */
+void
+ctl_endclient(struct ctl_cctx *ctx) {
+ if (ctx->state != destroyed)
+ destroy(ctx, 0);
+ memput(ctx, sizeof *ctx);
+}
+
+/*%
+ * int
+ * ctl_command(ctx, cmd, len, donefunc, uap)
+ * Queue a transaction, which will begin with sending cmd
+ * and complete by calling donefunc with the answer.
+ */
+int
+ctl_command(struct ctl_cctx *ctx, const char *cmd, size_t len,
+ ctl_clntdone donefunc, void *uap)
+{
+ struct ctl_tran *tran;
+ char *pc;
+ unsigned int n;
+
+ switch (ctx->state) {
+ case destroyed:
+ errno = ENOTCONN;
+ return (-1);
+ case connecting:
+ case connected:
+ break;
+ default:
+ abort();
+ }
+ if (len >= (size_t)MAX_LINELEN) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ tran = new_tran(ctx, donefunc, uap, 1);
+ if (tran == NULL)
+ return (-1);
+ if (ctl_bufget(&tran->outbuf, ctx->logger) < 0)
+ return (-1);
+ memcpy(tran->outbuf.text, cmd, len);
+ tran->outbuf.used = len;
+ for (pc = tran->outbuf.text, n = 0; n < tran->outbuf.used; pc++, n++)
+ if (!isascii((unsigned char)*pc) ||
+ !isprint((unsigned char)*pc))
+ *pc = '\040';
+ start_write(ctx);
+ return (0);
+}
+
+/* Private. */
+
+static struct ctl_tran *
+new_tran(struct ctl_cctx *ctx, ctl_clntdone donefunc, void *uap, int w) {
+ struct ctl_tran *new = memget(sizeof *new);
+
+ if (new == NULL)
+ return (NULL);
+ new->ctx = ctx;
+ buffer_init(new->outbuf);
+ new->donefunc = donefunc;
+ new->uap = uap;
+ INIT_LINK(new, link);
+ INIT_LINK(new, wlink);
+ APPEND(ctx->tran, new, link);
+ if (w)
+ APPEND(ctx->wtran, new, wlink);
+ return (new);
+}
+
+static void
+start_write(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_write";
+ struct ctl_tran *tran;
+ struct iovec iov[2], *iovp = iov;
+ char * tmp;
+
+ REQUIRE(ctx->state == connecting || ctx->state == connected);
+ /* If there is a write in progress, don't try to write more yet. */
+ if (ctx->wrID.opaque != NULL)
+ return;
+ /* If there are no trans, make sure timer is off, and we're done. */
+ if (EMPTY(ctx->wtran)) {
+ if (ctx->tiID.opaque != NULL)
+ stop_timer(ctx);
+ return;
+ }
+ /* Pull it off the head of the write queue. */
+ tran = HEAD(ctx->wtran);
+ UNLINK(ctx->wtran, tran, wlink);
+ /* Since there are some trans, make sure timer is successfully "on". */
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ else
+ start_timer(ctx);
+ if (ctx->state == destroyed)
+ return;
+ /* Marshall a newline-terminated message and clock it out. */
+ *iovp++ = evConsIovec(tran->outbuf.text, tran->outbuf.used);
+ DE_CONST("\r\n", tmp);
+ *iovp++ = evConsIovec(tmp, 2);
+ if (evWrite(ctx->ev, ctx->sock, iov, iovp - iov,
+ write_done, tran, &ctx->wrID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evWrite: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ if (evTimeRW(ctx->ev, ctx->wrID, ctx->tiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evTimeRW: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+destroy(struct ctl_cctx *ctx, int notify) {
+ struct ctl_tran *this, *next;
+
+ if (ctx->sock != -1) {
+ (void) close(ctx->sock);
+ ctx->sock = -1;
+ }
+ switch (ctx->state) {
+ case connecting:
+ REQUIRE(ctx->wrID.opaque == NULL);
+ REQUIRE(EMPTY(ctx->tran));
+ /*
+ * This test is nec'y since destroy() can be called from
+ * start_read() while the state is still "connecting".
+ */
+ if (ctx->coID.opaque != NULL) {
+ (void)evCancelConn(ctx->ev, ctx->coID);
+ ctx->coID.opaque = NULL;
+ }
+ break;
+ case connected:
+ REQUIRE(ctx->coID.opaque == NULL);
+ if (ctx->wrID.opaque != NULL) {
+ (void)evCancelRW(ctx->ev, ctx->wrID);
+ ctx->wrID.opaque = NULL;
+ }
+ if (ctx->rdID.opaque != NULL)
+ stop_read(ctx);
+ break;
+ case destroyed:
+ break;
+ default:
+ abort();
+ }
+ if (allocated_p(ctx->inbuf))
+ ctl_bufput(&ctx->inbuf);
+ for (this = HEAD(ctx->tran); this != NULL; this = next) {
+ next = NEXT(this, link);
+ if (allocated_p(this->outbuf))
+ ctl_bufput(&this->outbuf);
+ if (notify && this->donefunc != NULL)
+ (*this->donefunc)(ctx, this->uap, NULL, 0);
+ memput(this, sizeof *this);
+ }
+ if (ctx->tiID.opaque != NULL)
+ stop_timer(ctx);
+ new_state(ctx, destroyed);
+}
+
+static void
+error(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->state != destroyed);
+ destroy(ctx, 1);
+}
+
+static void
+new_state(struct ctl_cctx *ctx, enum state new_state) {
+ static const char me[] = "isc/ctl_clnt::new_state";
+
+ (*ctx->logger)(ctl_debug, "%s: %s -> %s", me,
+ state_names[ctx->state], state_names[new_state]);
+ ctx->state = new_state;
+}
+
+static void
+conn_done(evContext ev, void *uap, int fd,
+ const void *la, int lalen,
+ const void *ra, int ralen)
+{
+ static const char me[] = "isc/ctl_clnt::conn_done";
+ struct ctl_cctx *ctx = uap;
+ struct ctl_tran *tran;
+
+ UNUSED(ev);
+ UNUSED(la);
+ UNUSED(lalen);
+ UNUSED(ra);
+ UNUSED(ralen);
+
+ ctx->coID.opaque = NULL;
+ if (fd < 0) {
+ (*ctx->logger)(ctl_error, "%s: evConnect: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ new_state(ctx, connected);
+ tran = new_tran(ctx, ctx->donefunc, ctx->uap, 0);
+ if (tran == NULL) {
+ (*ctx->logger)(ctl_error, "%s: new_tran failed: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ start_read(ctx);
+ if (ctx->state == destroyed) {
+ (*ctx->logger)(ctl_error, "%s: start_read failed: %s",
+ me, strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+write_done(evContext lev, void *uap, int fd, int bytes) {
+ struct ctl_tran *tran = (struct ctl_tran *)uap;
+ struct ctl_cctx *ctx = tran->ctx;
+
+ UNUSED(lev);
+ UNUSED(fd);
+
+ ctx->wrID.opaque = NULL;
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ ctl_bufput(&tran->outbuf);
+ start_write(ctx);
+ if (bytes < 0)
+ destroy(ctx, 1);
+ else
+ start_read(ctx);
+}
+
+static void
+start_read(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_read";
+
+ REQUIRE(ctx->state == connecting || ctx->state == connected);
+ REQUIRE(ctx->rdID.opaque == NULL);
+ if (evSelectFD(ctx->ev, ctx->sock, EV_READ, readable, ctx,
+ &ctx->rdID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: evSelect(fd %d): %s", me,
+ ctx->sock, strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+stop_read(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->coID.opaque == NULL);
+ REQUIRE(ctx->rdID.opaque != NULL);
+ (void)evDeselectFD(ctx->ev, ctx->rdID);
+ ctx->rdID.opaque = NULL;
+}
+
+static void
+readable(evContext ev, void *uap, int fd, int evmask) {
+ static const char me[] = "isc/ctl_clnt::readable";
+ struct ctl_cctx *ctx = uap;
+ struct ctl_tran *tran;
+ ssize_t n;
+ char *eos;
+
+ UNUSED(ev);
+
+ REQUIRE(ctx != NULL);
+ REQUIRE(fd >= 0);
+ REQUIRE(evmask == EV_READ);
+ REQUIRE(ctx->state == connected);
+ REQUIRE(!EMPTY(ctx->tran));
+ tran = HEAD(ctx->tran);
+ if (!allocated_p(ctx->inbuf) &&
+ ctl_bufget(&ctx->inbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: can't get an input buffer", me);
+ error(ctx);
+ return;
+ }
+ n = read(ctx->sock, ctx->inbuf.text + ctx->inbuf.used,
+ MAX_LINELEN - ctx->inbuf.used);
+ if (n <= 0) {
+ (*ctx->logger)(ctl_warning, "%s: read: %s", me,
+ (n == 0) ? "Unexpected EOF" : strerror(errno));
+ error(ctx);
+ return;
+ }
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ ctx->inbuf.used += n;
+ (*ctx->logger)(ctl_debug, "%s: read %d, used %d", me,
+ n, ctx->inbuf.used);
+ again:
+ eos = memchr(ctx->inbuf.text, '\n', ctx->inbuf.used);
+ if (eos != NULL && eos != ctx->inbuf.text && eos[-1] == '\r') {
+ int done = 0;
+
+ eos[-1] = '\0';
+ if (!arpacode_p(ctx->inbuf.text)) {
+ /* XXX Doesn't FTP do this sometimes? Is it legal? */
+ (*ctx->logger)(ctl_error, "%s: no arpa code (%s)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ return;
+ }
+ if (arpadone_p(ctx->inbuf.text))
+ done = 1;
+ else if (arpacont_p(ctx->inbuf.text))
+ done = 0;
+ else {
+ /* XXX Doesn't FTP do this sometimes? Is it legal? */
+ (*ctx->logger)(ctl_error, "%s: no arpa flag (%s)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ return;
+ }
+ (*tran->donefunc)(ctx, tran->uap, ctx->inbuf.text,
+ (done ? 0 : CTL_MORE));
+ ctx->inbuf.used -= ((eos - ctx->inbuf.text) + 1);
+ if (ctx->inbuf.used == 0U)
+ ctl_bufput(&ctx->inbuf);
+ else
+ memmove(ctx->inbuf.text, eos + 1, ctx->inbuf.used);
+ if (done) {
+ UNLINK(ctx->tran, tran, link);
+ memput(tran, sizeof *tran);
+ stop_read(ctx);
+ start_write(ctx);
+ return;
+ }
+ if (allocated_p(ctx->inbuf))
+ goto again;
+ return;
+ }
+ if (ctx->inbuf.used == (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: line too long (%-10s...)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ }
+}
+
+/* Timer related stuff. */
+
+static void
+start_timer(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_timer";
+
+ REQUIRE(ctx->tiID.opaque == NULL);
+ if (evSetIdleTimer(ctx->ev, timer, ctx, ctx->timeout, &ctx->tiID) < 0){
+ (*ctx->logger)(ctl_error, "%s: evSetIdleTimer: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+stop_timer(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::stop_timer";
+
+ REQUIRE(ctx->tiID.opaque != NULL);
+ if (evClearIdleTimer(ctx->ev, ctx->tiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evClearIdleTimer: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ ctx->tiID.opaque = NULL;
+}
+
+static void
+touch_timer(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->tiID.opaque != NULL);
+
+ evTouchIdleTimer(ctx->ev, ctx->tiID);
+}
+
+static void
+timer(evContext ev, void *uap, struct timespec due, struct timespec itv) {
+ static const char me[] = "isc/ctl_clnt::timer";
+ struct ctl_cctx *ctx = uap;
+
+ UNUSED(ev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ ctx->tiID.opaque = NULL;
+ (*ctx->logger)(ctl_error, "%s: timeout after %u seconds while %s", me,
+ ctx->timeout.tv_sec, state_names[ctx->state]);
+ error(ctx);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c
new file mode 100644
index 0000000000..7ab719a5e6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c
@@ -0,0 +1,188 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_p.c,v 1.4 2005/04/27 04:56:35 sra Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <isc/assertions.h>
+#include <isc/eventlib.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+#include <isc/ctl.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+const char * const ctl_sevnames[] = {
+ "debug", "warning", "error"
+};
+
+/* Public. */
+
+/*%
+ * ctl_logger()
+ * if ctl_startup()'s caller didn't specify a logger, this one
+ * is used. this pollutes stderr with all kinds of trash so it will
+ * probably never be used in real applications.
+ */
+void
+ctl_logger(enum ctl_severity severity, const char *format, ...) {
+ va_list ap;
+ static const char me[] = "ctl_logger";
+
+ fprintf(stderr, "%s(%s): ", me, ctl_sevnames[severity]);
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+}
+
+int
+ctl_bufget(struct ctl_buf *buf, ctl_logfunc logger) {
+ static const char me[] = "ctl_bufget";
+
+ REQUIRE(!allocated_p(*buf) && buf->used == 0U);
+ buf->text = memget(MAX_LINELEN);
+ if (!allocated_p(*buf)) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ return (-1);
+ }
+ buf->used = 0;
+ return (0);
+}
+
+void
+ctl_bufput(struct ctl_buf *buf) {
+
+ REQUIRE(allocated_p(*buf));
+ memput(buf->text, MAX_LINELEN);
+ buf->text = NULL;
+ buf->used = 0;
+}
+
+const char *
+ctl_sa_ntop(const struct sockaddr *sa,
+ char *buf, size_t size,
+ ctl_logfunc logger)
+{
+ static const char me[] = "ctl_sa_ntop";
+ static const char punt[] = "[0].-1";
+ char tmp[INET6_ADDRSTRLEN];
+
+ switch (sa->sa_family) {
+ case AF_INET6: {
+ const struct sockaddr_in6 *in6 =
+ (const struct sockaddr_in6 *) sa;
+
+ if (inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp, sizeof tmp)
+ == NULL) {
+ (*logger)(ctl_error, "%s: inet_ntop(%u %04x): %s",
+ me, in6->sin6_family,
+ in6->sin6_port, strerror(errno));
+ return (punt);
+ }
+ if (strlen(tmp) + sizeof "[].65535" > size) {
+ (*logger)(ctl_error, "%s: buffer overflow", me);
+ return (punt);
+ }
+ (void) sprintf(buf, "[%s].%u", tmp, ntohs(in6->sin6_port));
+ return (buf);
+ }
+ case AF_INET: {
+ const struct sockaddr_in *in =
+ (const struct sockaddr_in *) sa;
+
+ if (inet_ntop(in->sin_family, &in->sin_addr, tmp, sizeof tmp)
+ == NULL) {
+ (*logger)(ctl_error, "%s: inet_ntop(%u %04x %08x): %s",
+ me, in->sin_family,
+ in->sin_port, in->sin_addr.s_addr,
+ strerror(errno));
+ return (punt);
+ }
+ if (strlen(tmp) + sizeof "[].65535" > size) {
+ (*logger)(ctl_error, "%s: buffer overflow", me);
+ return (punt);
+ }
+ (void) sprintf(buf, "[%s].%u", tmp, ntohs(in->sin_port));
+ return (buf);
+ }
+#ifndef NO_SOCKADDR_UN
+ case AF_UNIX: {
+ const struct sockaddr_un *un =
+ (const struct sockaddr_un *) sa;
+ unsigned int x = sizeof un->sun_path;
+
+ if (x > size)
+ x = size;
+ strncpy(buf, un->sun_path, x - 1);
+ buf[x - 1] = '\0';
+ return (buf);
+ }
+#endif
+ default:
+ return (punt);
+ }
+}
+
+void
+ctl_sa_copy(const struct sockaddr *src, struct sockaddr *dst) {
+ switch (src->sa_family) {
+ case AF_INET6:
+ *((struct sockaddr_in6 *)dst) =
+ *((const struct sockaddr_in6 *)src);
+ break;
+ case AF_INET:
+ *((struct sockaddr_in *)dst) =
+ *((const struct sockaddr_in *)src);
+ break;
+#ifndef NO_SOCKADDR_UN
+ case AF_UNIX:
+ *((struct sockaddr_un *)dst) =
+ *((const struct sockaddr_un *)src);
+ break;
+#endif
+ default:
+ *dst = *src;
+ break;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h
new file mode 100644
index 0000000000..18a52ae39c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h
@@ -0,0 +1,28 @@
+struct ctl_buf {
+ char * text;
+ size_t used;
+};
+
+#define MAX_LINELEN 990 /*%< Like SMTP. */
+#ifndef NO_SOCKADDR_UN
+#define MAX_NTOP PATH_MAX
+#else
+#define MAX_NTOP (sizeof "[255.255.255.255].65535")
+#endif
+
+#define allocated_p(Buf) ((Buf).text != NULL)
+#define buffer_init(Buf) ((Buf).text = 0, (Buf.used) = 0)
+
+#define ctl_bufget __ctl_bufget
+#define ctl_bufput __ctl_bufput
+#define ctl_sa_ntop __ctl_sa_ntop
+#define ctl_sa_copy __ctl_sa_copy
+
+int ctl_bufget(struct ctl_buf *, ctl_logfunc);
+void ctl_bufput(struct ctl_buf *);
+const char * ctl_sa_ntop(const struct sockaddr *, char *, size_t,
+ ctl_logfunc);
+void ctl_sa_copy(const struct sockaddr *,
+ struct sockaddr *);
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c
new file mode 100644
index 0000000000..8fd7a21ffa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_srvr.c,v 1.10 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+
+#include <isc/assertions.h>
+#include <isc/ctl.h>
+#include <isc/eventlib.h>
+#include <isc/list.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Macros. */
+
+#define lastverb_p(verb) (verb->name == NULL || verb->func == NULL)
+#define address_expr ctl_sa_ntop((struct sockaddr *)&sess->sa, \
+ tmp, sizeof tmp, ctx->logger)
+
+/* Types. */
+
+enum state {
+ available = 0, initializing, writing, reading, reading_data,
+ processing, idling, quitting, closing
+};
+
+union sa_un {
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+};
+
+struct ctl_sess {
+ LINK(struct ctl_sess) link;
+ struct ctl_sctx * ctx;
+ enum state state;
+ int sock;
+ union sa_un sa;
+ evFileID rdID;
+ evStreamID wrID;
+ evTimerID rdtiID;
+ evTimerID wrtiID;
+ struct ctl_buf inbuf;
+ struct ctl_buf outbuf;
+ const struct ctl_verb * verb;
+ u_int helpcode;
+ const void * respctx;
+ u_int respflags;
+ ctl_srvrdone donefunc;
+ void * uap;
+ void * csctx;
+};
+
+struct ctl_sctx {
+ evContext ev;
+ void * uctx;
+ u_int unkncode;
+ u_int timeoutcode;
+ const struct ctl_verb * verbs;
+ const struct ctl_verb * connverb;
+ int sock;
+ int max_sess;
+ int cur_sess;
+ struct timespec timeout;
+ ctl_logfunc logger;
+ evConnID acID;
+ LIST(struct ctl_sess) sess;
+};
+
+/* Forward. */
+
+static void ctl_accept(evContext, void *, int,
+ const void *, int,
+ const void *, int);
+static void ctl_close(struct ctl_sess *);
+static void ctl_new_state(struct ctl_sess *,
+ enum state,
+ const char *);
+static void ctl_start_read(struct ctl_sess *);
+static void ctl_stop_read(struct ctl_sess *);
+static void ctl_readable(evContext, void *, int, int);
+static void ctl_rdtimeout(evContext, void *,
+ struct timespec,
+ struct timespec);
+static void ctl_wrtimeout(evContext, void *,
+ struct timespec,
+ struct timespec);
+static void ctl_docommand(struct ctl_sess *);
+static void ctl_writedone(evContext, void *, int, int);
+static void ctl_morehelp(struct ctl_sctx *,
+ struct ctl_sess *,
+ const struct ctl_verb *,
+ const char *,
+ u_int, const void *, void *);
+static void ctl_signal_done(struct ctl_sctx *,
+ struct ctl_sess *);
+
+/* Private data. */
+
+static const char * state_names[] = {
+ "available", "initializing", "writing", "reading",
+ "reading_data", "processing", "idling", "quitting", "closing"
+};
+
+static const char space[] = " ";
+
+static const struct ctl_verb fakehelpverb = {
+ "fakehelp", ctl_morehelp , NULL
+};
+
+/* Public. */
+
+/*%
+ * void
+ * ctl_server()
+ * create, condition, and start a listener on the control port.
+ */
+struct ctl_sctx *
+ctl_server(evContext lev, const struct sockaddr *sap, size_t sap_len,
+ const struct ctl_verb *verbs,
+ u_int unkncode, u_int timeoutcode,
+ u_int timeout, int backlog, int max_sess,
+ ctl_logfunc logger, void *uctx)
+{
+ static const char me[] = "ctl_server";
+ static const int on = 1;
+ const struct ctl_verb *connverb;
+ struct ctl_sctx *ctx;
+ int save_errno;
+
+ if (logger == NULL)
+ logger = ctl_logger;
+ for (connverb = verbs;
+ connverb->name != NULL && connverb->func != NULL;
+ connverb++)
+ if (connverb->name[0] == '\0')
+ break;
+ if (connverb->func == NULL) {
+ (*logger)(ctl_error, "%s: no connection verb found", me);
+ return (NULL);
+ }
+ ctx = memget(sizeof *ctx);
+ if (ctx == NULL) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ return (NULL);
+ }
+ ctx->ev = lev;
+ ctx->uctx = uctx;
+ ctx->unkncode = unkncode;
+ ctx->timeoutcode = timeoutcode;
+ ctx->verbs = verbs;
+ ctx->timeout = evConsTime(timeout, 0);
+ ctx->logger = logger;
+ ctx->connverb = connverb;
+ ctx->max_sess = max_sess;
+ ctx->cur_sess = 0;
+ INIT_LIST(ctx->sess);
+ ctx->sock = socket(sap->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (ctx->sock > evHighestFD(ctx->ev)) {
+ ctx->sock = -1;
+ errno = ENOTSOCK;
+ }
+ if (ctx->sock < 0) {
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: socket: %s",
+ me, strerror(errno));
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ if (ctx->sock > evHighestFD(lev)) {
+ close(ctx->sock);
+ (*ctx->logger)(ctl_error, "%s: file descriptor > evHighestFD");
+ errno = ENFILE;
+ memput(ctx, sizeof *ctx);
+ return (NULL);
+ }
+#ifdef NO_UNIX_REUSEADDR
+ if (sap->sa_family != AF_UNIX)
+#endif
+ if (setsockopt(ctx->sock, SOL_SOCKET, SO_REUSEADDR,
+ (const char *)&on, sizeof on) != 0) {
+ (*ctx->logger)(ctl_warning,
+ "%s: setsockopt(REUSEADDR): %s",
+ me, strerror(errno));
+ }
+ if (bind(ctx->sock, sap, sap_len) < 0) {
+ char tmp[MAX_NTOP];
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: bind: %s: %s",
+ me, ctl_sa_ntop((const struct sockaddr *)sap,
+ tmp, sizeof tmp, ctx->logger),
+ strerror(save_errno));
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ if (fcntl(ctx->sock, F_SETFD, 1) < 0) {
+ (*ctx->logger)(ctl_warning, "%s: fcntl: %s", me,
+ strerror(errno));
+ }
+ if (evListen(lev, ctx->sock, backlog, ctl_accept, ctx,
+ &ctx->acID) < 0) {
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: evListen(fd %d): %s",
+ me, ctx->sock, strerror(errno));
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ (*ctx->logger)(ctl_debug, "%s: new ctx %p, sock %d",
+ me, ctx, ctx->sock);
+ return (ctx);
+}
+
+/*%
+ * void
+ * ctl_endserver(ctx)
+ * if the control listener is open, close it. clean out all eventlib
+ * stuff. close all active sessions.
+ */
+void
+ctl_endserver(struct ctl_sctx *ctx) {
+ static const char me[] = "ctl_endserver";
+ struct ctl_sess *this, *next;
+
+ (*ctx->logger)(ctl_debug, "%s: ctx %p, sock %d, acID %p, sess %p",
+ me, ctx, ctx->sock, ctx->acID.opaque, ctx->sess);
+ if (ctx->acID.opaque != NULL) {
+ (void)evCancelConn(ctx->ev, ctx->acID);
+ ctx->acID.opaque = NULL;
+ }
+ if (ctx->sock != -1) {
+ (void) close(ctx->sock);
+ ctx->sock = -1;
+ }
+ for (this = HEAD(ctx->sess); this != NULL; this = next) {
+ next = NEXT(this, link);
+ ctl_close(this);
+ }
+ memput(ctx, sizeof *ctx);
+}
+
+/*%
+ * If body is non-NULL then it we add a "." line after it.
+ * Caller must have escaped lines with leading ".".
+ */
+void
+ctl_response(struct ctl_sess *sess, u_int code, const char *text,
+ u_int flags, const void *respctx, ctl_srvrdone donefunc,
+ void *uap, const char *body, size_t bodylen)
+{
+ static const char me[] = "ctl_response";
+ struct iovec iov[3], *iovp = iov;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP], *pc;
+ int n;
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == processing ||
+ sess->state == reading_data ||
+ sess->state == writing);
+ REQUIRE(sess->wrtiID.opaque == NULL);
+ REQUIRE(sess->wrID.opaque == NULL);
+ ctl_new_state(sess, writing, me);
+ sess->donefunc = donefunc;
+ sess->uap = uap;
+ if (!allocated_p(sess->outbuf) &&
+ ctl_bufget(&sess->outbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: cant get an output buffer",
+ me, address_expr);
+ goto untimely;
+ }
+ if (sizeof "000-\r\n" + strlen(text) > (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: %s: output buffer ovf, closing",
+ me, address_expr);
+ goto untimely;
+ }
+ sess->outbuf.used = SPRINTF((sess->outbuf.text, "%03d%c%s\r\n",
+ code, (flags & CTL_MORE) != 0 ? '-' : ' ',
+ text));
+ for (pc = sess->outbuf.text, n = 0;
+ n < (int)sess->outbuf.used-2; pc++, n++)
+ if (!isascii((unsigned char)*pc) ||
+ !isprint((unsigned char)*pc))
+ *pc = '\040';
+ *iovp++ = evConsIovec(sess->outbuf.text, sess->outbuf.used);
+ if (body != NULL) {
+ char *tmp;
+ DE_CONST(body, tmp);
+ *iovp++ = evConsIovec(tmp, bodylen);
+ DE_CONST(".\r\n", tmp);
+ *iovp++ = evConsIovec(tmp, 3);
+ }
+ (*ctx->logger)(ctl_debug, "%s: [%d] %s", me,
+ sess->outbuf.used, sess->outbuf.text);
+ if (evWrite(ctx->ev, sess->sock, iov, iovp - iov,
+ ctl_writedone, sess, &sess->wrID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evWrite: %s", me,
+ address_expr, strerror(errno));
+ goto untimely;
+ }
+ if (evSetIdleTimer(ctx->ev, ctl_wrtimeout, sess, ctx->timeout,
+ &sess->wrtiID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: %s: evSetIdleTimer: %s", me,
+ address_expr, strerror(errno));
+ goto untimely;
+ }
+ if (evTimeRW(ctx->ev, sess->wrID, sess->wrtiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evTimeRW: %s", me,
+ address_expr, strerror(errno));
+ untimely:
+ ctl_signal_done(ctx, sess);
+ ctl_close(sess);
+ return;
+ }
+ sess->respctx = respctx;
+ sess->respflags = flags;
+}
+
+void
+ctl_sendhelp(struct ctl_sess *sess, u_int code) {
+ static const char me[] = "ctl_sendhelp";
+ struct ctl_sctx *ctx = sess->ctx;
+
+ sess->helpcode = code;
+ sess->verb = &fakehelpverb;
+ ctl_morehelp(ctx, sess, NULL, me, CTL_MORE,
+ (const void *)ctx->verbs, NULL);
+}
+
+void *
+ctl_getcsctx(struct ctl_sess *sess) {
+ return (sess->csctx);
+}
+
+void *
+ctl_setcsctx(struct ctl_sess *sess, void *csctx) {
+ void *old = sess->csctx;
+
+ sess->csctx = csctx;
+ return (old);
+}
+
+/* Private functions. */
+
+static void
+ctl_accept(evContext lev, void *uap, int fd,
+ const void *lav, int lalen,
+ const void *rav, int ralen)
+{
+ static const char me[] = "ctl_accept";
+ struct ctl_sctx *ctx = uap;
+ struct ctl_sess *sess = NULL;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(lalen);
+ UNUSED(ralen);
+
+ if (fd < 0) {
+ (*ctx->logger)(ctl_error, "%s: accept: %s",
+ me, strerror(errno));
+ return;
+ }
+ if (ctx->cur_sess == ctx->max_sess) {
+ (*ctx->logger)(ctl_error, "%s: %s: too many control sessions",
+ me, ctl_sa_ntop((const struct sockaddr *)rav,
+ tmp, sizeof tmp,
+ ctx->logger));
+ (void) close(fd);
+ return;
+ }
+ sess = memget(sizeof *sess);
+ if (sess == NULL) {
+ (*ctx->logger)(ctl_error, "%s: memget: %s", me,
+ strerror(errno));
+ (void) close(fd);
+ return;
+ }
+ if (fcntl(fd, F_SETFD, 1) < 0) {
+ (*ctx->logger)(ctl_warning, "%s: fcntl: %s", me,
+ strerror(errno));
+ }
+ ctx->cur_sess++;
+ INIT_LINK(sess, link);
+ APPEND(ctx->sess, sess, link);
+ sess->ctx = ctx;
+ sess->sock = fd;
+ sess->wrID.opaque = NULL;
+ sess->rdID.opaque = NULL;
+ sess->wrtiID.opaque = NULL;
+ sess->rdtiID.opaque = NULL;
+ sess->respctx = NULL;
+ sess->csctx = NULL;
+ if (((const struct sockaddr *)rav)->sa_family == AF_UNIX)
+ ctl_sa_copy((const struct sockaddr *)lav,
+ (struct sockaddr *)&sess->sa);
+ else
+ ctl_sa_copy((const struct sockaddr *)rav,
+ (struct sockaddr *)&sess->sa);
+ sess->donefunc = NULL;
+ buffer_init(sess->inbuf);
+ buffer_init(sess->outbuf);
+ sess->state = available;
+ ctl_new_state(sess, initializing, me);
+ sess->verb = ctx->connverb;
+ (*ctx->logger)(ctl_debug, "%s: %s: accepting (fd %d)",
+ me, address_expr, sess->sock);
+ (*ctx->connverb->func)(ctx, sess, ctx->connverb, "", 0,
+ (const struct sockaddr *)rav, ctx->uctx);
+}
+
+static void
+ctl_new_state(struct ctl_sess *sess, enum state new_state, const char *reason)
+{
+ static const char me[] = "ctl_new_state";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ (*ctx->logger)(ctl_debug, "%s: %s: %s -> %s (%s)",
+ me, address_expr,
+ state_names[sess->state],
+ state_names[new_state], reason);
+ sess->state = new_state;
+}
+
+static void
+ctl_close(struct ctl_sess *sess) {
+ static const char me[] = "ctl_close";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == writing ||
+ sess->state == reading ||
+ sess->state == processing ||
+ sess->state == reading_data ||
+ sess->state == idling);
+ REQUIRE(sess->sock != -1);
+ if (sess->state == reading || sess->state == reading_data)
+ ctl_stop_read(sess);
+ else if (sess->state == writing) {
+ if (sess->wrID.opaque != NULL) {
+ (void) evCancelRW(ctx->ev, sess->wrID);
+ sess->wrID.opaque = NULL;
+ }
+ if (sess->wrtiID.opaque != NULL) {
+ (void) evClearIdleTimer(ctx->ev, sess->wrtiID);
+ sess->wrtiID.opaque = NULL;
+ }
+ }
+ ctl_new_state(sess, closing, me);
+ (void) close(sess->sock);
+ if (allocated_p(sess->inbuf))
+ ctl_bufput(&sess->inbuf);
+ if (allocated_p(sess->outbuf))
+ ctl_bufput(&sess->outbuf);
+ (*ctx->logger)(ctl_debug, "%s: %s: closed (fd %d)",
+ me, address_expr, sess->sock);
+ UNLINK(ctx->sess, sess, link);
+ memput(sess, sizeof *sess);
+ ctx->cur_sess--;
+}
+
+static void
+ctl_start_read(struct ctl_sess *sess) {
+ static const char me[] = "ctl_start_read";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == writing ||
+ sess->state == processing ||
+ sess->state == idling);
+ REQUIRE(sess->rdtiID.opaque == NULL);
+ REQUIRE(sess->rdID.opaque == NULL);
+ sess->inbuf.used = 0;
+ if (evSetIdleTimer(ctx->ev, ctl_rdtimeout, sess, ctx->timeout,
+ &sess->rdtiID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: %s: evSetIdleTimer: %s", me,
+ address_expr, strerror(errno));
+ ctl_close(sess);
+ return;
+ }
+ if (evSelectFD(ctx->ev, sess->sock, EV_READ,
+ ctl_readable, sess, &sess->rdID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evSelectFD: %s", me,
+ address_expr, strerror(errno));
+ return;
+ }
+ ctl_new_state(sess, reading, me);
+}
+
+static void
+ctl_stop_read(struct ctl_sess *sess) {
+ static const char me[] = "ctl_stop_read";
+ struct ctl_sctx *ctx = sess->ctx;
+
+ REQUIRE(sess->state == reading || sess->state == reading_data);
+ REQUIRE(sess->rdID.opaque != NULL);
+ (void) evDeselectFD(ctx->ev, sess->rdID);
+ sess->rdID.opaque = NULL;
+ if (sess->rdtiID.opaque != NULL) {
+ (void) evClearIdleTimer(ctx->ev, sess->rdtiID);
+ sess->rdtiID.opaque = NULL;
+ }
+ ctl_new_state(sess, idling, me);
+}
+
+static void
+ctl_readable(evContext lev, void *uap, int fd, int evmask) {
+ static const char me[] = "ctl_readable";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx;
+ char *eos, tmp[MAX_NTOP];
+ ssize_t n;
+
+ REQUIRE(sess != NULL);
+ REQUIRE(fd >= 0);
+ REQUIRE(evmask == EV_READ);
+ REQUIRE(sess->state == reading || sess->state == reading_data);
+
+ ctx = sess->ctx;
+ evTouchIdleTimer(lev, sess->rdtiID);
+ if (!allocated_p(sess->inbuf) &&
+ ctl_bufget(&sess->inbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: cant get an input buffer",
+ me, address_expr);
+ ctl_close(sess);
+ return;
+ }
+ n = read(sess->sock, sess->inbuf.text + sess->inbuf.used,
+ MAX_LINELEN - sess->inbuf.used);
+ if (n <= 0) {
+ (*ctx->logger)(ctl_debug, "%s: %s: read: %s",
+ me, address_expr,
+ (n == 0) ? "Unexpected EOF" : strerror(errno));
+ ctl_close(sess);
+ return;
+ }
+ sess->inbuf.used += n;
+ eos = memchr(sess->inbuf.text, '\n', sess->inbuf.used);
+ if (eos != NULL && eos != sess->inbuf.text && eos[-1] == '\r') {
+ eos[-1] = '\0';
+ if ((sess->respflags & CTL_DATA) != 0) {
+ INSIST(sess->verb != NULL);
+ (*sess->verb->func)(sess->ctx, sess, sess->verb,
+ sess->inbuf.text,
+ CTL_DATA, sess->respctx,
+ sess->ctx->uctx);
+ } else {
+ ctl_stop_read(sess);
+ ctl_docommand(sess);
+ }
+ sess->inbuf.used -= ((eos - sess->inbuf.text) + 1);
+ if (sess->inbuf.used == 0U)
+ ctl_bufput(&sess->inbuf);
+ else
+ memmove(sess->inbuf.text, eos + 1, sess->inbuf.used);
+ return;
+ }
+ if (sess->inbuf.used == (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: %s: line too long, closing",
+ me, address_expr);
+ ctl_close(sess);
+ }
+}
+
+static void
+ctl_wrtimeout(evContext lev, void *uap,
+ struct timespec due,
+ struct timespec itv)
+{
+ static const char me[] = "ctl_wrtimeout";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ REQUIRE(sess->state == writing);
+ sess->wrtiID.opaque = NULL;
+ (*ctx->logger)(ctl_warning, "%s: %s: write timeout, closing",
+ me, address_expr);
+ if (sess->wrID.opaque != NULL) {
+ (void) evCancelRW(ctx->ev, sess->wrID);
+ sess->wrID.opaque = NULL;
+ }
+ ctl_signal_done(ctx, sess);
+ ctl_new_state(sess, processing, me);
+ ctl_close(sess);
+}
+
+static void
+ctl_rdtimeout(evContext lev, void *uap,
+ struct timespec due,
+ struct timespec itv)
+{
+ static const char me[] = "ctl_rdtimeout";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ REQUIRE(sess->state == reading);
+ sess->rdtiID.opaque = NULL;
+ (*ctx->logger)(ctl_warning, "%s: %s: timeout, closing",
+ me, address_expr);
+ if (sess->state == reading || sess->state == reading_data)
+ ctl_stop_read(sess);
+ ctl_signal_done(ctx, sess);
+ ctl_new_state(sess, processing, me);
+ ctl_response(sess, ctx->timeoutcode, "Timeout.", CTL_EXIT, NULL,
+ NULL, NULL, NULL, 0);
+}
+
+static void
+ctl_docommand(struct ctl_sess *sess) {
+ static const char me[] = "ctl_docommand";
+ char *name, *rest, tmp[MAX_NTOP];
+ struct ctl_sctx *ctx = sess->ctx;
+ const struct ctl_verb *verb;
+
+ REQUIRE(allocated_p(sess->inbuf));
+ (*ctx->logger)(ctl_debug, "%s: %s: \"%s\" [%u]",
+ me, address_expr,
+ sess->inbuf.text, (u_int)sess->inbuf.used);
+ ctl_new_state(sess, processing, me);
+ name = sess->inbuf.text + strspn(sess->inbuf.text, space);
+ rest = name + strcspn(name, space);
+ if (*rest != '\0') {
+ *rest++ = '\0';
+ rest += strspn(rest, space);
+ }
+ for (verb = ctx->verbs;
+ verb != NULL && verb->name != NULL && verb->func != NULL;
+ verb++)
+ if (verb->name[0] != '\0' && strcasecmp(name, verb->name) == 0)
+ break;
+ if (verb != NULL && verb->name != NULL && verb->func != NULL) {
+ sess->verb = verb;
+ (*verb->func)(ctx, sess, verb, rest, 0, NULL, ctx->uctx);
+ } else {
+ char buf[1100];
+
+ if (sizeof "Unrecognized command \"\" (args \"\")" +
+ strlen(name) + strlen(rest) > sizeof buf)
+ strcpy(buf, "Unrecognized command (buf ovf)");
+ else
+ sprintf(buf,
+ "Unrecognized command \"%s\" (args \"%s\")",
+ name, rest);
+ ctl_response(sess, ctx->unkncode, buf, 0, NULL, NULL, NULL,
+ NULL, 0);
+ }
+}
+
+static void
+ctl_writedone(evContext lev, void *uap, int fd, int bytes) {
+ static const char me[] = "ctl_writedone";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+ int save_errno = errno;
+
+ UNUSED(lev);
+ UNUSED(uap);
+
+ REQUIRE(sess->state == writing);
+ REQUIRE(fd == sess->sock);
+ REQUIRE(sess->wrtiID.opaque != NULL);
+ sess->wrID.opaque = NULL;
+ (void) evClearIdleTimer(ctx->ev, sess->wrtiID);
+ sess->wrtiID.opaque = NULL;
+ if (bytes < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: %s",
+ me, address_expr, strerror(save_errno));
+ ctl_close(sess);
+ return;
+ }
+
+ INSIST(allocated_p(sess->outbuf));
+ ctl_bufput(&sess->outbuf);
+ if ((sess->respflags & CTL_EXIT) != 0) {
+ ctl_signal_done(ctx, sess);
+ ctl_close(sess);
+ return;
+ } else if ((sess->respflags & CTL_MORE) != 0) {
+ INSIST(sess->verb != NULL);
+ (*sess->verb->func)(sess->ctx, sess, sess->verb, "",
+ CTL_MORE, sess->respctx, sess->ctx->uctx);
+ } else {
+ ctl_signal_done(ctx, sess);
+ ctl_start_read(sess);
+ }
+}
+
+static void
+ctl_morehelp(struct ctl_sctx *ctx, struct ctl_sess *sess,
+ const struct ctl_verb *verb, const char *text,
+ u_int respflags, const void *respctx, void *uctx)
+{
+ const struct ctl_verb *this = respctx, *next = this + 1;
+
+ UNUSED(ctx);
+ UNUSED(verb);
+ UNUSED(text);
+ UNUSED(uctx);
+
+ REQUIRE(!lastverb_p(this));
+ REQUIRE((respflags & CTL_MORE) != 0);
+ if (lastverb_p(next))
+ respflags &= ~CTL_MORE;
+ ctl_response(sess, sess->helpcode, this->help, respflags, next,
+ NULL, NULL, NULL, 0);
+}
+
+static void
+ctl_signal_done(struct ctl_sctx *ctx, struct ctl_sess *sess) {
+ if (sess->donefunc != NULL) {
+ (*sess->donefunc)(ctx, sess, sess->uap);
+ sess->donefunc = NULL;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c b/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c
new file mode 100644
index 0000000000..38dfdbe512
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_connects.c - implement asynch connect/accept for the eventlib
+ * vix 16sep96 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_connects.c,v 1.8 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Macros. */
+
+#define GETXXXNAME(f, s, sa, len) ( \
+ (f((s), (&sa), (&len)) >= 0) ? 0 : \
+ (errno != EAFNOSUPPORT && errno != EOPNOTSUPP) ? -1 : ( \
+ memset(&(sa), 0, sizeof (sa)), \
+ (len) = sizeof (sa), \
+ (sa).sa_family = AF_UNIX, \
+ 0 \
+ ) \
+ )
+
+/* Forward. */
+
+static void listener(evContext ctx, void *uap, int fd, int evmask);
+static void connector(evContext ctx, void *uap, int fd, int evmask);
+
+/* Public. */
+
+int
+evListen(evContext opaqueCtx, int fd, int maxconn,
+ evConnFunc func, void *uap, evConnID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *new;
+ int mode;
+
+ OKNEW(new);
+ new->flags = EV_CONN_LISTEN;
+ OKFREE(mode = fcntl(fd, F_GETFL, NULL), new); /*%< side effect: validate fd. */
+ /*
+ * Remember the nonblocking status. We assume that either evSelectFD
+ * has not been done to this fd, or that if it has then the caller
+ * will evCancelConn before they evDeselectFD. If our assumptions
+ * are not met, then we might restore the old nonblocking status
+ * incorrectly.
+ */
+ if ((mode & PORT_NONBLOCK) == 0) {
+#ifdef USE_FIONBIO_IOCTL
+ int on = 1;
+ OKFREE(ioctl(fd, FIONBIO, (char *)&on), new);
+#else
+ OKFREE(fcntl(fd, F_SETFL, mode | PORT_NONBLOCK), new);
+#endif
+ new->flags |= EV_CONN_BLOCK;
+ }
+ OKFREE(listen(fd, maxconn), new);
+ if (evSelectFD(opaqueCtx, fd, EV_READ, listener, new, &new->file) < 0){
+ int save = errno;
+
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ new->flags |= EV_CONN_SELECTED;
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ if (ctx->conns != NULL)
+ ctx->conns->prev = new;
+ new->prev = NULL;
+ new->next = ctx->conns;
+ ctx->conns = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+}
+
+int
+evConnect(evContext opaqueCtx, int fd, const void *ra, int ralen,
+ evConnFunc func, void *uap, evConnID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *new;
+
+ OKNEW(new);
+ new->flags = 0;
+ /* Do the select() first to get the socket into nonblocking mode. */
+ if (evSelectFD(opaqueCtx, fd, EV_MASK_ALL,
+ connector, new, &new->file) < 0) {
+ int save = errno;
+
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ new->flags |= EV_CONN_SELECTED;
+ if (connect(fd, ra, ralen) < 0 &&
+ errno != EWOULDBLOCK &&
+ errno != EAGAIN &&
+ errno != EINPROGRESS) {
+ int save = errno;
+
+ (void) evDeselectFD(opaqueCtx, new->file);
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ /* No error, or EWOULDBLOCK. select() tells when it's ready. */
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ if (ctx->conns != NULL)
+ ctx->conns->prev = new;
+ new->prev = NULL;
+ new->next = ctx->conns;
+ ctx->conns = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+}
+
+int
+evCancelConn(evContext opaqueCtx, evConnID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *this = id.opaque;
+ evAccept *acc, *nxtacc;
+ int mode;
+
+ if ((this->flags & EV_CONN_SELECTED) != 0)
+ (void) evDeselectFD(opaqueCtx, this->file);
+ if ((this->flags & EV_CONN_BLOCK) != 0) {
+ mode = fcntl(this->fd, F_GETFL, NULL);
+ if (mode == -1) {
+ if (errno != EBADF)
+ return (-1);
+ } else {
+#ifdef USE_FIONBIO_IOCTL
+ int off = 0;
+ OK(ioctl(this->fd, FIONBIO, (char *)&off));
+#else
+ OK(fcntl(this->fd, F_SETFL, mode & ~PORT_NONBLOCK));
+#endif
+ }
+ }
+
+ /* Unlink from ctx->conns. */
+ if (this->prev != NULL)
+ this->prev->next = this->next;
+ else
+ ctx->conns = this->next;
+ if (this->next != NULL)
+ this->next->prev = this->prev;
+
+ /*
+ * Remove `this' from the ctx->accepts list (zero or more times).
+ */
+ for (acc = HEAD(ctx->accepts), nxtacc = NULL;
+ acc != NULL;
+ acc = nxtacc)
+ {
+ nxtacc = NEXT(acc, link);
+ if (acc->conn == this) {
+ UNLINK(ctx->accepts, acc, link);
+ close(acc->fd);
+ FREE(acc);
+ }
+ }
+
+ /* Wrap up and get out. */
+ FREE(this);
+ return (0);
+}
+
+int evHold(evContext opaqueCtx, evConnID id) {
+ evConn *this = id.opaque;
+
+ if ((this->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((this->flags & EV_CONN_SELECTED) == 0)
+ return (0);
+ this->flags &= ~EV_CONN_SELECTED;
+ return (evDeselectFD(opaqueCtx, this->file));
+}
+
+int evUnhold(evContext opaqueCtx, evConnID id) {
+ evConn *this = id.opaque;
+ int ret;
+
+ if ((this->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((this->flags & EV_CONN_SELECTED) != 0)
+ return (0);
+ ret = evSelectFD(opaqueCtx, this->fd, EV_READ, listener, this,
+ &this->file);
+ if (ret == 0)
+ this->flags |= EV_CONN_SELECTED;
+ return (ret);
+}
+
+int
+evTryAccept(evContext opaqueCtx, evConnID id, int *sys_errno) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *conn = id.opaque;
+ evAccept *new;
+
+ if ((conn->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ OKNEW(new);
+ new->conn = conn;
+ new->ralen = sizeof new->ra;
+ new->fd = accept(conn->fd, &new->ra.sa, &new->ralen);
+ if (new->fd > ctx->highestFD) {
+ close(new->fd);
+ new->fd = -1;
+ new->ioErrno = ENOTSOCK;
+ }
+ if (new->fd >= 0) {
+ new->lalen = sizeof new->la;
+ if (GETXXXNAME(getsockname, new->fd, new->la.sa, new->lalen) < 0) {
+ new->ioErrno = errno;
+ (void) close(new->fd);
+ new->fd = -1;
+ } else
+ new->ioErrno = 0;
+ } else {
+ new->ioErrno = errno;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ FREE(new);
+ return (-1);
+ }
+ }
+ INIT_LINK(new, link);
+ APPEND(ctx->accepts, new, link);
+ *sys_errno = new->ioErrno;
+ return (0);
+}
+
+/* Private. */
+
+static void
+listener(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *conn = uap;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la, ra;
+ int new;
+ ISC_SOCKLEN_T lalen = 0, ralen;
+
+ REQUIRE((evmask & EV_READ) != 0);
+ ralen = sizeof ra;
+ new = accept(fd, &ra.sa, &ralen);
+ if (new > ctx->highestFD) {
+ close(new);
+ new = -1;
+ errno = ENOTSOCK;
+ }
+ if (new >= 0) {
+ lalen = sizeof la;
+ if (GETXXXNAME(getsockname, new, la.sa, lalen) < 0) {
+ int save = errno;
+
+ (void) close(new);
+ errno = save;
+ new = -1;
+ }
+ } else if (errno == EAGAIN || errno == EWOULDBLOCK)
+ return;
+ (*conn->func)(opaqueCtx, conn->uap, new, &la.sa, lalen, &ra.sa, ralen);
+}
+
+static void
+connector(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evConn *conn = uap;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la, ra;
+ ISC_SOCKLEN_T lalen, ralen;
+#ifndef NETREAD_BROKEN
+ char buf[1];
+#endif
+ void *conn_uap;
+ evConnFunc conn_func;
+ evConnID id;
+ int socket_errno = 0;
+ ISC_SOCKLEN_T optlen;
+
+ UNUSED(evmask);
+
+ lalen = sizeof la;
+ ralen = sizeof ra;
+ conn_uap = conn->uap;
+ conn_func = conn->func;
+ id.opaque = conn;
+#ifdef SO_ERROR
+ optlen = sizeof socket_errno;
+ if (fd < 0 &&
+ getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, (char *)&socket_errno,
+ &optlen) < 0)
+ socket_errno = errno;
+ else
+ errno = socket_errno;
+#endif
+ if (evCancelConn(opaqueCtx, id) < 0 ||
+ socket_errno ||
+#ifdef NETREAD_BROKEN
+ 0 ||
+#else
+ read(fd, buf, 0) < 0 ||
+#endif
+ GETXXXNAME(getsockname, fd, la.sa, lalen) < 0 ||
+ GETXXXNAME(getpeername, fd, ra.sa, ralen) < 0) {
+ int save = errno;
+
+ (void) close(fd); /*%< XXX closing caller's fd */
+ errno = save;
+ fd = -1;
+ }
+ (*conn_func)(opaqueCtx, conn_uap, fd, &la.sa, lalen, &ra.sa, ralen);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_files.c b/usr/src/lib/libresolv2_joy/common/isc/ev_files.c
new file mode 100644
index 0000000000..b12baf1aaa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_files.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_files.c - implement asynch file IO for the eventlib
+ * vix 11sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_files.c,v 1.8 2005/07/28 06:51:48 marka Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+static evFile *FindFD(const evContext_p *ctx, int fd, int eventmask);
+
+int
+evSelectFD(evContext opaqueCtx,
+ int fd,
+ int eventmask,
+ evFileFunc func,
+ void *uap,
+ evFileID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evFile *id;
+ int mode;
+
+ evPrintf(ctx, 1,
+ "evSelectFD(ctx %p, fd %d, mask 0x%x, func %p, uap %p)\n",
+ ctx, fd, eventmask, func, uap);
+ if (eventmask == 0 || (eventmask & ~EV_MASK_ALL) != 0)
+ EV_ERR(EINVAL);
+#ifndef USE_POLL
+ if (fd > ctx->highestFD)
+ EV_ERR(EINVAL);
+#endif
+ OK(mode = fcntl(fd, F_GETFL, NULL)); /*%< side effect: validate fd. */
+ /*
+ * The first time we touch a file descriptor, we need to check to see
+ * if the application already had it in O_NONBLOCK mode and if so, all
+ * of our deselect()'s have to leave it in O_NONBLOCK. If not, then
+ * all but our last deselect() has to leave it in O_NONBLOCK.
+ */
+#ifdef USE_POLL
+ /* Make sure both ctx->pollfds[] and ctx->fdTable[] are large enough */
+ if (fd >= ctx->maxnfds && evPollfdRealloc(ctx, 1, fd) != 0)
+ EV_ERR(ENOMEM);
+#endif /* USE_POLL */
+ id = FindFD(ctx, fd, EV_MASK_ALL);
+ if (id == NULL) {
+ if (mode & PORT_NONBLOCK)
+ FD_SET(fd, &ctx->nonblockBefore);
+ else {
+#ifdef USE_FIONBIO_IOCTL
+ int on = 1;
+ OK(ioctl(fd, FIONBIO, (char *)&on));
+#else
+ OK(fcntl(fd, F_SETFL, mode | PORT_NONBLOCK));
+#endif
+ FD_CLR(fd, &ctx->nonblockBefore);
+ }
+ }
+
+ /*
+ * If this descriptor is already in use, search for it again to see
+ * if any of the eventmask bits we want to set are already captured.
+ * We cannot usefully capture the same fd event more than once in the
+ * same context.
+ */
+ if (id != NULL && FindFD(ctx, fd, eventmask) != NULL)
+ EV_ERR(ETOOMANYREFS);
+
+ /* Allocate and fill. */
+ OKNEW(id);
+ id->func = func;
+ id->uap = uap;
+ id->fd = fd;
+ id->eventmask = eventmask;
+
+ /*
+ * Insert at head. Order could be important for performance if we
+ * believe that evGetNext()'s accesses to the fd_sets will be more
+ * serial and therefore more cache-lucky if the list is ordered by
+ * ``fd.'' We do not believe these things, so we don't do it.
+ *
+ * The interesting sequence is where GetNext() has cached a select()
+ * result and the caller decides to evSelectFD() on some descriptor.
+ * Since GetNext() starts at the head, it can miss new entries we add
+ * at the head. This is not a serious problem since the event being
+ * evSelectFD()'d for has to occur before evSelectFD() is called for
+ * the file event to be considered "missed" -- a real corner case.
+ * Maintaining a "tail" pointer for ctx->files would fix this, but I'm
+ * not sure it would be ``more correct.''
+ */
+ if (ctx->files != NULL)
+ ctx->files->prev = id;
+ id->prev = NULL;
+ id->next = ctx->files;
+ ctx->files = id;
+
+ /* Insert into fd table. */
+ if (ctx->fdTable[fd] != NULL)
+ ctx->fdTable[fd]->fdprev = id;
+ id->fdprev = NULL;
+ id->fdnext = ctx->fdTable[fd];
+ ctx->fdTable[fd] = id;
+
+ /* Turn on the appropriate bits in the {rd,wr,ex}Next fd_set's. */
+ if (eventmask & EV_READ)
+ FD_SET(fd, &ctx->rdNext);
+ if (eventmask & EV_WRITE)
+ FD_SET(fd, &ctx->wrNext);
+ if (eventmask & EV_EXCEPT)
+ FD_SET(fd, &ctx->exNext);
+
+ /* Update fdMax. */
+ if (fd > ctx->fdMax)
+ ctx->fdMax = fd;
+
+ /* Remember the ID if the caller provided us a place for it. */
+ if (opaqueID)
+ opaqueID->opaque = id;
+
+ return (0);
+}
+
+int
+evDeselectFD(evContext opaqueCtx, evFileID opaqueID) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evFile *del = opaqueID.opaque;
+ evFile *cur;
+ int mode, eventmask;
+
+ if (!del) {
+ evPrintf(ctx, 11, "evDeselectFD(NULL) ignored\n");
+ errno = EINVAL;
+ return (-1);
+ }
+
+ evPrintf(ctx, 1, "evDeselectFD(fd %d, mask 0x%x)\n",
+ del->fd, del->eventmask);
+
+ /* Get the mode. Unless the file has been closed, errors are bad. */
+ mode = fcntl(del->fd, F_GETFL, NULL);
+ if (mode == -1 && errno != EBADF)
+ EV_ERR(errno);
+
+ /* Remove from the list of files. */
+ if (del->prev != NULL)
+ del->prev->next = del->next;
+ else
+ ctx->files = del->next;
+ if (del->next != NULL)
+ del->next->prev = del->prev;
+
+ /* Remove from the fd table. */
+ if (del->fdprev != NULL)
+ del->fdprev->fdnext = del->fdnext;
+ else
+ ctx->fdTable[del->fd] = del->fdnext;
+ if (del->fdnext != NULL)
+ del->fdnext->fdprev = del->fdprev;
+
+ /*
+ * If the file descriptor does not appear in any other select() entry,
+ * and if !EV_WASNONBLOCK, and if we got no EBADF when we got the mode
+ * earlier, then: restore the fd to blocking status.
+ */
+ if (!(cur = FindFD(ctx, del->fd, EV_MASK_ALL)) &&
+ !FD_ISSET(del->fd, &ctx->nonblockBefore) &&
+ mode != -1) {
+ /*
+ * Note that we won't return an error status to the caller if
+ * this fcntl() fails since (a) we've already done the work
+ * and (b) the caller didn't ask us anything about O_NONBLOCK.
+ */
+#ifdef USE_FIONBIO_IOCTL
+ int off = 0;
+ (void) ioctl(del->fd, FIONBIO, (char *)&off);
+#else
+ (void) fcntl(del->fd, F_SETFL, mode & ~PORT_NONBLOCK);
+#endif
+ }
+
+ /*
+ * Now find all other uses of this descriptor and OR together an event
+ * mask so that we don't turn off {rd,wr,ex}Next bits that some other
+ * file event is using. As an optimization, stop if the event mask
+ * fills.
+ */
+ eventmask = 0;
+ for ((void)NULL;
+ cur != NULL && eventmask != EV_MASK_ALL;
+ cur = cur->next)
+ if (cur->fd == del->fd)
+ eventmask |= cur->eventmask;
+
+ /* OK, now we know which bits we can clear out. */
+ if (!(eventmask & EV_READ)) {
+ FD_CLR(del->fd, &ctx->rdNext);
+ if (FD_ISSET(del->fd, &ctx->rdLast)) {
+ FD_CLR(del->fd, &ctx->rdLast);
+ ctx->fdCount--;
+ }
+ }
+ if (!(eventmask & EV_WRITE)) {
+ FD_CLR(del->fd, &ctx->wrNext);
+ if (FD_ISSET(del->fd, &ctx->wrLast)) {
+ FD_CLR(del->fd, &ctx->wrLast);
+ ctx->fdCount--;
+ }
+ }
+ if (!(eventmask & EV_EXCEPT)) {
+ FD_CLR(del->fd, &ctx->exNext);
+ if (FD_ISSET(del->fd, &ctx->exLast)) {
+ FD_CLR(del->fd, &ctx->exLast);
+ ctx->fdCount--;
+ }
+ }
+
+ /* If this was the maxFD, find the new one. */
+ if (del->fd == ctx->fdMax) {
+ ctx->fdMax = -1;
+ for (cur = ctx->files; cur; cur = cur->next)
+ if (cur->fd > ctx->fdMax)
+ ctx->fdMax = cur->fd;
+ }
+
+ /* If this was the fdNext, cycle that to the next entry. */
+ if (del == ctx->fdNext)
+ ctx->fdNext = del->next;
+
+ /* Couldn't free it before now since we were using fields out of it. */
+ FREE(del);
+
+ return (0);
+}
+
+static evFile *
+FindFD(const evContext_p *ctx, int fd, int eventmask) {
+ evFile *id;
+
+ for (id = ctx->fdTable[fd]; id != NULL; id = id->fdnext)
+ if (id->fd == fd && (id->eventmask & eventmask) != 0)
+ break;
+ return (id);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c b/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c
new file mode 100644
index 0000000000..5dad36d04a
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_streams.c - implement asynch stream file IO for the eventlib
+ * vix 04mar96 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_streams.c,v 1.5 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+static int copyvec(evStream *str, const struct iovec *iov, int iocnt);
+static void consume(evStream *str, size_t bytes);
+static void done(evContext opaqueCtx, evStream *str);
+static void writable(evContext opaqueCtx, void *uap, int fd, int evmask);
+static void readable(evContext opaqueCtx, void *uap, int fd, int evmask);
+
+struct iovec
+evConsIovec(void *buf, size_t cnt) {
+ struct iovec ret;
+
+ memset(&ret, 0xf5, sizeof ret);
+ ret.iov_base = buf;
+ ret.iov_len = cnt;
+ return (ret);
+}
+
+int
+evWrite(evContext opaqueCtx, int fd, const struct iovec *iov, int iocnt,
+ evStreamFunc func, void *uap, evStreamID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *new;
+ int save;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ new->flags = 0;
+ if (evSelectFD(opaqueCtx, fd, EV_WRITE, writable, new, &new->file) < 0)
+ goto free;
+ if (copyvec(new, iov, iocnt) < 0)
+ goto free;
+ new->prevDone = NULL;
+ new->nextDone = NULL;
+ if (ctx->streams != NULL)
+ ctx->streams->prev = new;
+ new->prev = NULL;
+ new->next = ctx->streams;
+ ctx->streams = new;
+ if (id != NULL)
+ id->opaque = new;
+ return (0);
+ free:
+ save = errno;
+ FREE(new);
+ errno = save;
+ return (-1);
+}
+
+int
+evRead(evContext opaqueCtx, int fd, const struct iovec *iov, int iocnt,
+ evStreamFunc func, void *uap, evStreamID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *new;
+ int save;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ new->flags = 0;
+ if (evSelectFD(opaqueCtx, fd, EV_READ, readable, new, &new->file) < 0)
+ goto free;
+ if (copyvec(new, iov, iocnt) < 0)
+ goto free;
+ new->prevDone = NULL;
+ new->nextDone = NULL;
+ if (ctx->streams != NULL)
+ ctx->streams->prev = new;
+ new->prev = NULL;
+ new->next = ctx->streams;
+ ctx->streams = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+ free:
+ save = errno;
+ FREE(new);
+ errno = save;
+ return (-1);
+}
+
+int
+evTimeRW(evContext opaqueCtx, evStreamID id, evTimerID timer) /*ARGSUSED*/ {
+ evStream *str = id.opaque;
+
+ UNUSED(opaqueCtx);
+
+ str->timer = timer;
+ str->flags |= EV_STR_TIMEROK;
+ return (0);
+}
+
+int
+evUntimeRW(evContext opaqueCtx, evStreamID id) /*ARGSUSED*/ {
+ evStream *str = id.opaque;
+
+ UNUSED(opaqueCtx);
+
+ str->flags &= ~EV_STR_TIMEROK;
+ return (0);
+}
+
+int
+evCancelRW(evContext opaqueCtx, evStreamID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *old = id.opaque;
+
+ /*
+ * The streams list is doubly threaded. First, there's ctx->streams
+ * that's used by evDestroy() to find and cancel all streams. Second,
+ * there's ctx->strDone (head) and ctx->strLast (tail) which thread
+ * through the potentially smaller number of "IO completed" streams,
+ * used in evGetNext() to avoid scanning the entire list.
+ */
+
+ /* Unlink from ctx->streams. */
+ if (old->prev != NULL)
+ old->prev->next = old->next;
+ else
+ ctx->streams = old->next;
+ if (old->next != NULL)
+ old->next->prev = old->prev;
+
+ /*
+ * If 'old' is on the ctx->strDone list, remove it. Update
+ * ctx->strLast if necessary.
+ */
+ if (old->prevDone == NULL && old->nextDone == NULL) {
+ /*
+ * Either 'old' is the only item on the done list, or it's
+ * not on the done list. If the former, then we unlink it
+ * from the list. If the latter, we leave the list alone.
+ */
+ if (ctx->strDone == old) {
+ ctx->strDone = NULL;
+ ctx->strLast = NULL;
+ }
+ } else {
+ if (old->prevDone != NULL)
+ old->prevDone->nextDone = old->nextDone;
+ else
+ ctx->strDone = old->nextDone;
+ if (old->nextDone != NULL)
+ old->nextDone->prevDone = old->prevDone;
+ else
+ ctx->strLast = old->prevDone;
+ }
+
+ /* Deallocate the stream. */
+ if (old->file.opaque)
+ evDeselectFD(opaqueCtx, old->file);
+ memput(old->iovOrig, sizeof (struct iovec) * old->iovOrigCount);
+ FREE(old);
+ return (0);
+}
+
+/* Copy a scatter/gather vector and initialize a stream handler's IO. */
+static int
+copyvec(evStream *str, const struct iovec *iov, int iocnt) {
+ int i;
+
+ str->iovOrig = (struct iovec *)memget(sizeof(struct iovec) * iocnt);
+ if (str->iovOrig == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ str->ioTotal = 0;
+ for (i = 0; i < iocnt; i++) {
+ str->iovOrig[i] = iov[i];
+ str->ioTotal += iov[i].iov_len;
+ }
+ str->iovOrigCount = iocnt;
+ str->iovCur = str->iovOrig;
+ str->iovCurCount = str->iovOrigCount;
+ str->ioDone = 0;
+ return (0);
+}
+
+/* Pull off or truncate lead iovec(s). */
+static void
+consume(evStream *str, size_t bytes) {
+ while (bytes > 0U) {
+ if (bytes < (size_t)str->iovCur->iov_len) {
+ str->iovCur->iov_len -= bytes;
+ str->iovCur->iov_base = (void *)
+ ((u_char *)str->iovCur->iov_base + bytes);
+ str->ioDone += bytes;
+ bytes = 0;
+ } else {
+ bytes -= str->iovCur->iov_len;
+ str->ioDone += str->iovCur->iov_len;
+ str->iovCur++;
+ str->iovCurCount--;
+ }
+ }
+}
+
+/* Add a stream to Done list and deselect the FD. */
+static void
+done(evContext opaqueCtx, evStream *str) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ if (ctx->strLast != NULL) {
+ str->prevDone = ctx->strLast;
+ ctx->strLast->nextDone = str;
+ ctx->strLast = str;
+ } else {
+ INSIST(ctx->strDone == NULL);
+ ctx->strDone = ctx->strLast = str;
+ }
+ evDeselectFD(opaqueCtx, str->file);
+ str->file.opaque = NULL;
+ /* evDrop() will call evCancelRW() on us. */
+}
+
+/* Dribble out some bytes on the stream. (Called by evDispatch().) */
+static void
+writable(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evStream *str = uap;
+ int bytes;
+
+ UNUSED(evmask);
+
+ bytes = writev(fd, str->iovCur, str->iovCurCount);
+ if (bytes > 0) {
+ if ((str->flags & EV_STR_TIMEROK) != 0)
+ evTouchIdleTimer(opaqueCtx, str->timer);
+ consume(str, bytes);
+ } else {
+ if (bytes < 0 && errno != EINTR) {
+ str->ioDone = -1;
+ str->ioErrno = errno;
+ }
+ }
+ if (str->ioDone == -1 || str->ioDone == str->ioTotal)
+ done(opaqueCtx, str);
+}
+
+/* Scoop up some bytes from the stream. (Called by evDispatch().) */
+static void
+readable(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evStream *str = uap;
+ int bytes;
+
+ UNUSED(evmask);
+
+ bytes = readv(fd, str->iovCur, str->iovCurCount);
+ if (bytes > 0) {
+ if ((str->flags & EV_STR_TIMEROK) != 0)
+ evTouchIdleTimer(opaqueCtx, str->timer);
+ consume(str, bytes);
+ } else {
+ if (bytes == 0)
+ str->ioDone = 0;
+ else {
+ if (errno != EINTR) {
+ str->ioDone = -1;
+ str->ioErrno = errno;
+ }
+ }
+ }
+ if (str->ioDone <= 0 || str->ioDone == str->ioTotal)
+ done(opaqueCtx, str);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c b/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c
new file mode 100644
index 0000000000..12ac2cebca
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_timers.c - implement timers for the eventlib
+ * vix 09sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_timers.c,v 1.6 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <errno.h>
+
+#include <isc/assertions.h>
+#include <isc/eventlib.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+#define MILLION 1000000
+#define BILLION 1000000000
+
+/* Forward. */
+
+static int due_sooner(void *, void *);
+static void set_index(void *, int);
+static void free_timer(void *, void *);
+static void print_timer(void *, void *);
+static void idle_timeout(evContext, void *, struct timespec, struct timespec);
+
+/* Private type. */
+
+typedef struct {
+ evTimerFunc func;
+ void * uap;
+ struct timespec lastTouched;
+ struct timespec max_idle;
+ evTimer * timer;
+} idle_timer;
+
+/* Public. */
+
+struct timespec
+evConsTime(time_t sec, long nsec) {
+ struct timespec x;
+
+ x.tv_sec = sec;
+ x.tv_nsec = nsec;
+ return (x);
+}
+
+struct timespec
+evAddTime(struct timespec addend1, struct timespec addend2) {
+ struct timespec x;
+
+ x.tv_sec = addend1.tv_sec + addend2.tv_sec;
+ x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
+ if (x.tv_nsec >= BILLION) {
+ x.tv_sec++;
+ x.tv_nsec -= BILLION;
+ }
+ return (x);
+}
+
+struct timespec
+evSubTime(struct timespec minuend, struct timespec subtrahend) {
+ struct timespec x;
+
+ x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
+ if (minuend.tv_nsec >= subtrahend.tv_nsec)
+ x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
+ else {
+ x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
+ x.tv_sec--;
+ }
+ return (x);
+}
+
+int
+evCmpTime(struct timespec a, struct timespec b) {
+ long x = a.tv_sec - b.tv_sec;
+
+ if (x == 0L)
+ x = a.tv_nsec - b.tv_nsec;
+ return (x < 0L ? (-1) : x > 0L ? (1) : (0));
+}
+
+struct timespec
+evNowTime() {
+ struct timeval now;
+#ifdef CLOCK_REALTIME
+ struct timespec tsnow;
+ int m = CLOCK_REALTIME;
+
+#ifdef CLOCK_MONOTONIC
+ if (__evOptMonoTime)
+ m = CLOCK_MONOTONIC;
+#endif
+ if (clock_gettime(m, &tsnow) == 0)
+ return (tsnow);
+#endif
+ if (gettimeofday(&now, NULL) < 0)
+ return (evConsTime(0, 0));
+ return (evTimeSpec(now));
+}
+
+struct timespec
+evUTCTime() {
+ struct timeval now;
+#ifdef CLOCK_REALTIME
+ struct timespec tsnow;
+ if (clock_gettime(CLOCK_REALTIME, &tsnow) == 0)
+ return (tsnow);
+#endif
+ if (gettimeofday(&now, NULL) < 0)
+ return (evConsTime(0, 0));
+ return (evTimeSpec(now));
+}
+
+struct timespec
+evLastEventTime(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ return (ctx->lastEventTime);
+}
+
+struct timespec
+evTimeSpec(struct timeval tv) {
+ struct timespec ts;
+
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+ return (ts);
+}
+
+struct timeval
+evTimeVal(struct timespec ts) {
+ struct timeval tv;
+
+ tv.tv_sec = ts.tv_sec;
+ tv.tv_usec = ts.tv_nsec / 1000;
+ return (tv);
+}
+
+int
+evSetTimer(evContext opaqueCtx,
+ evTimerFunc func,
+ void *uap,
+ struct timespec due,
+ struct timespec inter,
+ evTimerID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *id;
+
+ evPrintf(ctx, 1,
+"evSetTimer(ctx %p, func %p, uap %p, due %ld.%09ld, inter %ld.%09ld)\n",
+ ctx, func, uap,
+ (long)due.tv_sec, due.tv_nsec,
+ (long)inter.tv_sec, inter.tv_nsec);
+
+#ifdef __hpux
+ /*
+ * tv_sec and tv_nsec are unsigned.
+ */
+ if (due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#else
+ if (due.tv_sec < 0 || due.tv_nsec < 0 || due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_sec < 0 || inter.tv_nsec < 0 || inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#endif
+
+ /* due={0,0} is a magic cookie meaning "now." */
+ if (due.tv_sec == (time_t)0 && due.tv_nsec == 0L)
+ due = evNowTime();
+
+ /* Allocate and fill. */
+ OKNEW(id);
+ id->func = func;
+ id->uap = uap;
+ id->due = due;
+ id->inter = inter;
+
+ if (heap_insert(ctx->timers, id) < 0)
+ return (-1);
+
+ /* Remember the ID if the caller provided us a place for it. */
+ if (opaqueID)
+ opaqueID->opaque = id;
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evSetTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (0);
+}
+
+int
+evClearTimer(evContext opaqueCtx, evTimerID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *del = id.opaque;
+
+ if (ctx->cur != NULL &&
+ ctx->cur->type == Timer &&
+ ctx->cur->u.timer.this == del) {
+ evPrintf(ctx, 8, "deferring delete of timer (executing)\n");
+ /*
+ * Setting the interval to zero ensures that evDrop() will
+ * clean up the timer.
+ */
+ del->inter = evConsTime(0, 0);
+ return (0);
+ }
+
+ if (heap_element(ctx->timers, del->index) != del)
+ EV_ERR(ENOENT);
+
+ if (heap_delete(ctx->timers, del->index) < 0)
+ return (-1);
+ FREE(del);
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evClearTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (0);
+}
+
+int
+evConfigTimer(evContext opaqueCtx,
+ evTimerID id,
+ const char *param,
+ int value
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = id.opaque;
+ int result=0;
+
+ UNUSED(value);
+
+ if (heap_element(ctx->timers, timer->index) != timer)
+ EV_ERR(ENOENT);
+
+ if (strcmp(param, "rate") == 0)
+ timer->mode |= EV_TMR_RATE;
+ else if (strcmp(param, "interval") == 0)
+ timer->mode &= ~EV_TMR_RATE;
+ else
+ EV_ERR(EINVAL);
+
+ return (result);
+}
+
+int
+evResetTimer(evContext opaqueCtx,
+ evTimerID id,
+ evTimerFunc func,
+ void *uap,
+ struct timespec due,
+ struct timespec inter
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = id.opaque;
+ struct timespec old_due;
+ int result=0;
+
+ if (heap_element(ctx->timers, timer->index) != timer)
+ EV_ERR(ENOENT);
+
+#ifdef __hpux
+ /*
+ * tv_sec and tv_nsec are unsigned.
+ */
+ if (due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#else
+ if (due.tv_sec < 0 || due.tv_nsec < 0 || due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_sec < 0 || inter.tv_nsec < 0 || inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#endif
+
+ old_due = timer->due;
+
+ timer->func = func;
+ timer->uap = uap;
+ timer->due = due;
+ timer->inter = inter;
+
+ switch (evCmpTime(due, old_due)) {
+ case -1:
+ result = heap_increased(ctx->timers, timer->index);
+ break;
+ case 0:
+ result = 0;
+ break;
+ case 1:
+ result = heap_decreased(ctx->timers, timer->index);
+ break;
+ }
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evResetTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (result);
+}
+
+int
+evSetIdleTimer(evContext opaqueCtx,
+ evTimerFunc func,
+ void *uap,
+ struct timespec max_idle,
+ evTimerID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ idle_timer *tt;
+
+ /* Allocate and fill. */
+ OKNEW(tt);
+ tt->func = func;
+ tt->uap = uap;
+ tt->lastTouched = ctx->lastEventTime;
+ tt->max_idle = max_idle;
+
+ if (evSetTimer(opaqueCtx, idle_timeout, tt,
+ evAddTime(ctx->lastEventTime, max_idle),
+ max_idle, opaqueID) < 0) {
+ FREE(tt);
+ return (-1);
+ }
+
+ tt->timer = opaqueID->opaque;
+
+ return (0);
+}
+
+int
+evClearIdleTimer(evContext opaqueCtx, evTimerID id) {
+ evTimer *del = id.opaque;
+ idle_timer *tt = del->uap;
+
+ FREE(tt);
+ return (evClearTimer(opaqueCtx, id));
+}
+
+int
+evResetIdleTimer(evContext opaqueCtx,
+ evTimerID opaqueID,
+ evTimerFunc func,
+ void *uap,
+ struct timespec max_idle
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = opaqueID.opaque;
+ idle_timer *tt = timer->uap;
+
+ tt->func = func;
+ tt->uap = uap;
+ tt->lastTouched = ctx->lastEventTime;
+ tt->max_idle = max_idle;
+
+ return (evResetTimer(opaqueCtx, opaqueID, idle_timeout, tt,
+ evAddTime(ctx->lastEventTime, max_idle),
+ max_idle));
+}
+
+int
+evTouchIdleTimer(evContext opaqueCtx, evTimerID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *t = id.opaque;
+ idle_timer *tt = t->uap;
+
+ tt->lastTouched = ctx->lastEventTime;
+
+ return (0);
+}
+
+/* Public to the rest of eventlib. */
+
+heap_context
+evCreateTimers(const evContext_p *ctx) {
+
+ UNUSED(ctx);
+
+ return (heap_new(due_sooner, set_index, 2048));
+}
+
+void
+evDestroyTimers(const evContext_p *ctx) {
+ (void) heap_for_each(ctx->timers, free_timer, NULL);
+ (void) heap_free(ctx->timers);
+}
+
+/* Private. */
+
+static int
+due_sooner(void *a, void *b) {
+ evTimer *a_timer, *b_timer;
+
+ a_timer = a;
+ b_timer = b;
+ return (evCmpTime(a_timer->due, b_timer->due) < 0);
+}
+
+static void
+set_index(void *what, int index) {
+ evTimer *timer;
+
+ timer = what;
+ timer->index = index;
+}
+
+static void
+free_timer(void *what, void *uap) {
+ evTimer *t = what;
+
+ UNUSED(uap);
+
+ FREE(t);
+}
+
+static void
+print_timer(void *what, void *uap) {
+ evTimer *cur = what;
+ evContext_p *ctx = uap;
+
+ cur = what;
+ evPrintf(ctx, 7,
+ " func %p, uap %p, due %ld.%09ld, inter %ld.%09ld\n",
+ cur->func, cur->uap,
+ (long)cur->due.tv_sec, cur->due.tv_nsec,
+ (long)cur->inter.tv_sec, cur->inter.tv_nsec);
+}
+
+static void
+idle_timeout(evContext opaqueCtx,
+ void *uap,
+ struct timespec due,
+ struct timespec inter
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ idle_timer *this = uap;
+ struct timespec idle;
+
+ UNUSED(due);
+ UNUSED(inter);
+
+ idle = evSubTime(ctx->lastEventTime, this->lastTouched);
+ if (evCmpTime(idle, this->max_idle) >= 0) {
+ (this->func)(opaqueCtx, this->uap, this->timer->due,
+ this->max_idle);
+ /*
+ * Setting the interval to zero will cause the timer to
+ * be cleaned up in evDrop().
+ */
+ this->timer->inter = evConsTime(0, 0);
+ FREE(this);
+ } else {
+ /* evDrop() will reschedule the timer. */
+ this->timer->inter = evSubTime(this->max_idle, idle);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c b/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c
new file mode 100644
index 0000000000..99da1526c7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_waits.c - implement deferred function calls for the eventlib
+ * vix 05dec95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_waits.c,v 1.4 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <errno.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void print_waits(evContext_p *ctx);
+static evWaitList * evNewWaitList(evContext_p *);
+static void evFreeWaitList(evContext_p *, evWaitList *);
+static evWaitList * evGetWaitList(evContext_p *, const void *, int);
+
+
+/* Public. */
+
+/*%
+ * Enter a new wait function on the queue.
+ */
+int
+evWaitFor(evContext opaqueCtx, const void *tag,
+ evWaitFunc func, void *uap, evWaitID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *new;
+ evWaitList *wl = evGetWaitList(ctx, tag, 1);
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->tag = tag;
+ new->next = NULL;
+ if (wl->last != NULL)
+ wl->last->next = new;
+ else
+ wl->first = new;
+ wl->last = new;
+ if (id != NULL)
+ id->opaque = new;
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+ return (0);
+}
+
+/*%
+ * Mark runnable all waiting functions having a certain tag.
+ */
+int
+evDo(evContext opaqueCtx, const void *tag) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWaitList *wl = evGetWaitList(ctx, tag, 0);
+ evWait *first;
+
+ if (!wl) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ first = wl->first;
+ INSIST(first != NULL);
+
+ if (ctx->waitDone.last != NULL)
+ ctx->waitDone.last->next = first;
+ else
+ ctx->waitDone.first = first;
+ ctx->waitDone.last = wl->last;
+ evFreeWaitList(ctx, wl);
+
+ return (0);
+}
+
+/*%
+ * Remove a waiting (or ready to run) function from the queue.
+ */
+int
+evUnwait(evContext opaqueCtx, evWaitID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *this, *prev;
+ evWaitList *wl;
+ int found = 0;
+
+ this = id.opaque;
+ INSIST(this != NULL);
+ wl = evGetWaitList(ctx, this->tag, 0);
+ if (wl != NULL) {
+ for (prev = NULL, this = wl->first;
+ this != NULL;
+ prev = this, this = this->next)
+ if (this == (evWait *)id.opaque) {
+ found = 1;
+ if (prev != NULL)
+ prev->next = this->next;
+ else
+ wl->first = this->next;
+ if (wl->last == this)
+ wl->last = prev;
+ if (wl->first == NULL)
+ evFreeWaitList(ctx, wl);
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Maybe it's done */
+ for (prev = NULL, this = ctx->waitDone.first;
+ this != NULL;
+ prev = this, this = this->next)
+ if (this == (evWait *)id.opaque) {
+ found = 1;
+ if (prev != NULL)
+ prev->next = this->next;
+ else
+ ctx->waitDone.first = this->next;
+ if (ctx->waitDone.last == this)
+ ctx->waitDone.last = prev;
+ break;
+ }
+ }
+
+ if (!found) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ FREE(this);
+
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+
+ return (0);
+}
+
+int
+evDefer(evContext opaqueCtx, evWaitFunc func, void *uap) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *new;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->tag = NULL;
+ new->next = NULL;
+ if (ctx->waitDone.last != NULL)
+ ctx->waitDone.last->next = new;
+ else
+ ctx->waitDone.first = new;
+ ctx->waitDone.last = new;
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+ return (0);
+}
+
+/* Private. */
+
+static void
+print_waits(evContext_p *ctx) {
+ evWaitList *wl;
+ evWait *this;
+
+ evPrintf(ctx, 9, "wait waiting:\n");
+ for (wl = ctx->waitLists; wl != NULL; wl = wl->next) {
+ INSIST(wl->first != NULL);
+ evPrintf(ctx, 9, " tag %p:", wl->first->tag);
+ for (this = wl->first; this != NULL; this = this->next)
+ evPrintf(ctx, 9, " %p", this);
+ evPrintf(ctx, 9, "\n");
+ }
+ evPrintf(ctx, 9, "wait done:");
+ for (this = ctx->waitDone.first; this != NULL; this = this->next)
+ evPrintf(ctx, 9, " %p", this);
+ evPrintf(ctx, 9, "\n");
+}
+
+static evWaitList *
+evNewWaitList(evContext_p *ctx) {
+ evWaitList *new;
+
+ NEW(new);
+ if (new == NULL)
+ return (NULL);
+ new->first = new->last = NULL;
+ new->prev = NULL;
+ new->next = ctx->waitLists;
+ if (new->next != NULL)
+ new->next->prev = new;
+ ctx->waitLists = new;
+ return (new);
+}
+
+static void
+evFreeWaitList(evContext_p *ctx, evWaitList *this) {
+
+ INSIST(this != NULL);
+
+ if (this->prev != NULL)
+ this->prev->next = this->next;
+ else
+ ctx->waitLists = this->next;
+ if (this->next != NULL)
+ this->next->prev = this->prev;
+ FREE(this);
+}
+
+static evWaitList *
+evGetWaitList(evContext_p *ctx, const void *tag, int should_create) {
+ evWaitList *this;
+
+ for (this = ctx->waitLists; this != NULL; this = this->next) {
+ if (this->first != NULL && this->first->tag == tag)
+ break;
+ }
+ if (this == NULL && should_create)
+ this = evNewWaitList(ctx);
+ return (this);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/eventlib.c b/usr/src/lib/libresolv2_joy/common/isc/eventlib.c
new file mode 100644
index 0000000000..be4a7848b9
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/eventlib.c
@@ -0,0 +1,933 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* eventlib.c - implement glue for the eventlib
+ * vix 09sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: eventlib.c,v 1.10 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#ifdef SOLARIS2
+#include <limits.h>
+#endif /* SOLARIS2 */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+int __evOptMonoTime;
+
+#ifdef USE_POLL
+#define pselect Pselect
+#endif /* USE_POLL */
+
+/* Forward. */
+
+#if defined(NEED_PSELECT) || defined(USE_POLL)
+static int pselect(int, void *, void *, void *,
+ struct timespec *,
+ const sigset_t *);
+#endif
+
+int __evOptMonoTime;
+
+/* Public. */
+
+int
+evCreate(evContext *opaqueCtx) {
+ evContext_p *ctx;
+
+ /* Make sure the memory heap is initialized. */
+ if (meminit(0, 0) < 0 && errno != EEXIST)
+ return (-1);
+
+ OKNEW(ctx);
+
+ /* Global. */
+ ctx->cur = NULL;
+
+ /* Debugging. */
+ ctx->debug = 0;
+ ctx->output = NULL;
+
+ /* Connections. */
+ ctx->conns = NULL;
+ INIT_LIST(ctx->accepts);
+
+ /* Files. */
+ ctx->files = NULL;
+#ifdef USE_POLL
+ ctx->pollfds = NULL;
+ ctx->maxnfds = 0;
+ ctx->firstfd = 0;
+ emulMaskInit(ctx, rdLast, EV_READ, 1);
+ emulMaskInit(ctx, rdNext, EV_READ, 0);
+ emulMaskInit(ctx, wrLast, EV_WRITE, 1);
+ emulMaskInit(ctx, wrNext, EV_WRITE, 0);
+ emulMaskInit(ctx, exLast, EV_EXCEPT, 1);
+ emulMaskInit(ctx, exNext, EV_EXCEPT, 0);
+ emulMaskInit(ctx, nonblockBefore, EV_WASNONBLOCKING, 0);
+#endif /* USE_POLL */
+ FD_ZERO(&ctx->rdNext);
+ FD_ZERO(&ctx->wrNext);
+ FD_ZERO(&ctx->exNext);
+ FD_ZERO(&ctx->nonblockBefore);
+ ctx->fdMax = -1;
+ ctx->fdNext = NULL;
+ ctx->fdCount = 0; /*%< Invalidate {rd,wr,ex}Last. */
+#ifndef USE_POLL
+ ctx->highestFD = FD_SETSIZE - 1;
+ memset(ctx->fdTable, 0, sizeof ctx->fdTable);
+#else
+ ctx->highestFD = INT_MAX / sizeof(struct pollfd);
+ ctx->fdTable = NULL;
+#endif /* USE_POLL */
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastFdCount = 0;
+#endif
+
+ /* Streams. */
+ ctx->streams = NULL;
+ ctx->strDone = NULL;
+ ctx->strLast = NULL;
+
+ /* Timers. */
+ ctx->lastEventTime = evNowTime();
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastSelectTime = ctx->lastEventTime;
+#endif
+ ctx->timers = evCreateTimers(ctx);
+ if (ctx->timers == NULL)
+ return (-1);
+
+ /* Waits. */
+ ctx->waitLists = NULL;
+ ctx->waitDone.first = ctx->waitDone.last = NULL;
+ ctx->waitDone.prev = ctx->waitDone.next = NULL;
+
+ opaqueCtx->opaque = ctx;
+ return (0);
+}
+
+void
+evSetDebug(evContext opaqueCtx, int level, FILE *output) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ ctx->debug = level;
+ ctx->output = output;
+}
+
+int
+evDestroy(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ int revs = 424242; /*%< Doug Adams. */
+ evWaitList *this_wl, *next_wl;
+ evWait *this_wait, *next_wait;
+
+ /* Connections. */
+ while (revs-- > 0 && ctx->conns != NULL) {
+ evConnID id;
+
+ id.opaque = ctx->conns;
+ (void) evCancelConn(opaqueCtx, id);
+ }
+ INSIST(revs >= 0);
+
+ /* Streams. */
+ while (revs-- > 0 && ctx->streams != NULL) {
+ evStreamID id;
+
+ id.opaque = ctx->streams;
+ (void) evCancelRW(opaqueCtx, id);
+ }
+
+ /* Files. */
+ while (revs-- > 0 && ctx->files != NULL) {
+ evFileID id;
+
+ id.opaque = ctx->files;
+ (void) evDeselectFD(opaqueCtx, id);
+ }
+ INSIST(revs >= 0);
+
+ /* Timers. */
+ evDestroyTimers(ctx);
+
+ /* Waits. */
+ for (this_wl = ctx->waitLists;
+ revs-- > 0 && this_wl != NULL;
+ this_wl = next_wl) {
+ next_wl = this_wl->next;
+ for (this_wait = this_wl->first;
+ revs-- > 0 && this_wait != NULL;
+ this_wait = next_wait) {
+ next_wait = this_wait->next;
+ FREE(this_wait);
+ }
+ FREE(this_wl);
+ }
+ for (this_wait = ctx->waitDone.first;
+ revs-- > 0 && this_wait != NULL;
+ this_wait = next_wait) {
+ next_wait = this_wait->next;
+ FREE(this_wait);
+ }
+
+ FREE(ctx);
+ return (0);
+}
+
+int
+evGetNext(evContext opaqueCtx, evEvent *opaqueEv, int options) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ struct timespec nextTime;
+ evTimer *nextTimer;
+ evEvent_p *new;
+ int x, pselect_errno, timerPast;
+#ifdef EVENTLIB_TIME_CHECKS
+ struct timespec interval;
+#endif
+
+ /* Ensure that exactly one of EV_POLL or EV_WAIT was specified. */
+ x = ((options & EV_POLL) != 0) + ((options & EV_WAIT) != 0);
+ if (x != 1)
+ EV_ERR(EINVAL);
+
+ /* Get the time of day. We'll do this again after select() blocks. */
+ ctx->lastEventTime = evNowTime();
+
+ again:
+ /* Finished accept()'s do not require a select(). */
+ if (!EMPTY(ctx->accepts)) {
+ OKNEW(new);
+ new->type = Accept;
+ new->u.accept.this = HEAD(ctx->accepts);
+ UNLINK(ctx->accepts, HEAD(ctx->accepts), link);
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Stream IO does not require a select(). */
+ if (ctx->strDone != NULL) {
+ OKNEW(new);
+ new->type = Stream;
+ new->u.stream.this = ctx->strDone;
+ ctx->strDone = ctx->strDone->nextDone;
+ if (ctx->strDone == NULL)
+ ctx->strLast = NULL;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Waits do not require a select(). */
+ if (ctx->waitDone.first != NULL) {
+ OKNEW(new);
+ new->type = Wait;
+ new->u.wait.this = ctx->waitDone.first;
+ ctx->waitDone.first = ctx->waitDone.first->next;
+ if (ctx->waitDone.first == NULL)
+ ctx->waitDone.last = NULL;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Get the status and content of the next timer. */
+ if ((nextTimer = heap_element(ctx->timers, 1)) != NULL) {
+ nextTime = nextTimer->due;
+ timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
+ } else
+ timerPast = 0; /*%< Make gcc happy. */
+ evPrintf(ctx, 9, "evGetNext: fdCount %d\n", ctx->fdCount);
+ if (ctx->fdCount == 0) {
+ static const struct timespec NoTime = {0, 0L};
+ enum { JustPoll, Block, Timer } m;
+ struct timespec t, *tp;
+
+ /* Are there any events at all? */
+ if ((options & EV_WAIT) != 0 && !nextTimer && ctx->fdMax == -1)
+ EV_ERR(ENOENT);
+
+ /* Figure out what select()'s timeout parameter should be. */
+ if ((options & EV_POLL) != 0) {
+ m = JustPoll;
+ t = NoTime;
+ tp = &t;
+ } else if (nextTimer == NULL) {
+ m = Block;
+ /* ``t'' unused. */
+ tp = NULL;
+ } else if (timerPast) {
+ m = JustPoll;
+ t = NoTime;
+ tp = &t;
+ } else {
+ m = Timer;
+ /* ``t'' filled in later. */
+ tp = &t;
+ }
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0) {
+ interval = evSubTime(ctx->lastEventTime,
+ ctx->lastSelectTime);
+ if (interval.tv_sec > 0 || interval.tv_nsec > 0)
+ evPrintf(ctx, 1,
+ "time between pselect() %u.%09u count %d\n",
+ interval.tv_sec, interval.tv_nsec,
+ ctx->lastFdCount);
+ }
+#endif
+ do {
+#ifndef USE_POLL
+ /* XXX need to copy only the bits we are using. */
+ ctx->rdLast = ctx->rdNext;
+ ctx->wrLast = ctx->wrNext;
+ ctx->exLast = ctx->exNext;
+#else
+ /*
+ * The pollfd structure uses separate fields for
+ * the input and output events (corresponding to
+ * the ??Next and ??Last fd sets), so there's no
+ * need to copy one to the other.
+ */
+#endif /* USE_POLL */
+ if (m == Timer) {
+ INSIST(tp == &t);
+ t = evSubTime(nextTime, ctx->lastEventTime);
+ }
+
+ /* XXX should predict system's earliness and adjust. */
+ x = pselect(ctx->fdMax+1,
+ &ctx->rdLast, &ctx->wrLast, &ctx->exLast,
+ tp, NULL);
+ pselect_errno = errno;
+
+#ifndef USE_POLL
+ evPrintf(ctx, 4, "select() returns %d (err: %s)\n",
+ x, (x == -1) ? strerror(errno) : "none");
+#else
+ evPrintf(ctx, 4, "poll() returns %d (err: %s)\n",
+ x, (x == -1) ? strerror(errno) : "none");
+#endif /* USE_POLL */
+ /* Anything but a poll can change the time. */
+ if (m != JustPoll)
+ ctx->lastEventTime = evNowTime();
+
+ /* Select() likes to finish about 10ms early. */
+ } while (x == 0 && m == Timer &&
+ evCmpTime(ctx->lastEventTime, nextTime) < 0);
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastSelectTime = ctx->lastEventTime;
+#endif
+ if (x < 0) {
+ if (pselect_errno == EINTR) {
+ if ((options & EV_NULL) != 0)
+ goto again;
+ OKNEW(new);
+ new->type = Null;
+ /* No data. */
+ opaqueEv->opaque = new;
+ return (0);
+ }
+ if (pselect_errno == EBADF) {
+ for (x = 0; x <= ctx->fdMax; x++) {
+ struct stat sb;
+
+ if (FD_ISSET(x, &ctx->rdNext) == 0 &&
+ FD_ISSET(x, &ctx->wrNext) == 0 &&
+ FD_ISSET(x, &ctx->exNext) == 0)
+ continue;
+ if (fstat(x, &sb) == -1 &&
+ errno == EBADF)
+ evPrintf(ctx, 1, "EBADF: %d\n",
+ x);
+ }
+ abort();
+ }
+ EV_ERR(pselect_errno);
+ }
+ if (x == 0 && (nextTimer == NULL || !timerPast) &&
+ (options & EV_POLL))
+ EV_ERR(EWOULDBLOCK);
+ ctx->fdCount = x;
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastFdCount = x;
+#endif
+ }
+ INSIST(nextTimer || ctx->fdCount);
+
+ /* Timers go first since we'd like them to be accurate. */
+ if (nextTimer && !timerPast) {
+ /* Has anything happened since we blocked? */
+ timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
+ }
+ if (nextTimer && timerPast) {
+ OKNEW(new);
+ new->type = Timer;
+ new->u.timer.this = nextTimer;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* No timers, so there should be a ready file descriptor. */
+ x = 0;
+ while (ctx->fdCount > 0) {
+ evFile *fid;
+ int fd, eventmask;
+
+ if (ctx->fdNext == NULL) {
+ if (++x == 2) {
+ /*
+ * Hitting the end twice means that the last
+ * select() found some FD's which have since
+ * been deselected.
+ *
+ * On some systems, the count returned by
+ * selects is the total number of bits in
+ * all masks that are set, and on others it's
+ * the number of fd's that have some bit set,
+ * and on others, it's just broken. We
+ * always assume that it's the number of
+ * bits set in all masks, because that's what
+ * the man page says it should do, and
+ * the worst that can happen is we do an
+ * extra select().
+ */
+ ctx->fdCount = 0;
+ break;
+ }
+ ctx->fdNext = ctx->files;
+ }
+ fid = ctx->fdNext;
+ ctx->fdNext = fid->next;
+
+ fd = fid->fd;
+ eventmask = 0;
+ if (FD_ISSET(fd, &ctx->rdLast))
+ eventmask |= EV_READ;
+ if (FD_ISSET(fd, &ctx->wrLast))
+ eventmask |= EV_WRITE;
+ if (FD_ISSET(fd, &ctx->exLast))
+ eventmask |= EV_EXCEPT;
+ eventmask &= fid->eventmask;
+ if (eventmask != 0) {
+ if ((eventmask & EV_READ) != 0) {
+ FD_CLR(fd, &ctx->rdLast);
+ ctx->fdCount--;
+ }
+ if ((eventmask & EV_WRITE) != 0) {
+ FD_CLR(fd, &ctx->wrLast);
+ ctx->fdCount--;
+ }
+ if ((eventmask & EV_EXCEPT) != 0) {
+ FD_CLR(fd, &ctx->exLast);
+ ctx->fdCount--;
+ }
+ OKNEW(new);
+ new->type = File;
+ new->u.file.this = fid;
+ new->u.file.eventmask = eventmask;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+ }
+ if (ctx->fdCount < 0) {
+ /*
+ * select()'s count is off on a number of systems, and
+ * can result in fdCount < 0.
+ */
+ evPrintf(ctx, 4, "fdCount < 0 (%d)\n", ctx->fdCount);
+ ctx->fdCount = 0;
+ }
+
+ /* We get here if the caller deselect()'s an FD. Gag me with a goto. */
+ goto again;
+}
+
+int
+evDispatch(evContext opaqueCtx, evEvent opaqueEv) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evEvent_p *ev = opaqueEv.opaque;
+#ifdef EVENTLIB_TIME_CHECKS
+ void *func;
+ struct timespec start_time;
+ struct timespec interval;
+#endif
+
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0)
+ start_time = evNowTime();
+#endif
+ ctx->cur = ev;
+ switch (ev->type) {
+ case Accept: {
+ evAccept *this = ev->u.accept.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Accept: fd %d -> %d, func %p, uap %p\n",
+ this->conn->fd, this->fd,
+ this->conn->func, this->conn->uap);
+ errno = this->ioErrno;
+ (this->conn->func)(opaqueCtx, this->conn->uap, this->fd,
+ &this->la, this->lalen,
+ &this->ra, this->ralen);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->conn->func;
+#endif
+ break;
+ }
+ case File: {
+ evFile *this = ev->u.file.this;
+ int eventmask = ev->u.file.eventmask;
+
+ evPrintf(ctx, 5,
+ "Dispatch.File: fd %d, mask 0x%x, func %p, uap %p\n",
+ this->fd, this->eventmask, this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->fd, eventmask);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Stream: {
+ evStream *this = ev->u.stream.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Stream: fd %d, func %p, uap %p\n",
+ this->fd, this->func, this->uap);
+ errno = this->ioErrno;
+ (this->func)(opaqueCtx, this->uap, this->fd, this->ioDone);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Timer: {
+ evTimer *this = ev->u.timer.this;
+
+ evPrintf(ctx, 5, "Dispatch.Timer: func %p, uap %p\n",
+ this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->due, this->inter);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Wait: {
+ evWait *this = ev->u.wait.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Wait: tag %p, func %p, uap %p\n",
+ this->tag, this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->tag);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Null: {
+ /* No work. */
+#ifdef EVENTLIB_TIME_CHECKS
+ func = NULL;
+#endif
+ break;
+ }
+ default: {
+ abort();
+ }
+ }
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0) {
+ interval = evSubTime(evNowTime(), start_time);
+ /*
+ * Complain if it took longer than 50 milliseconds.
+ *
+ * We call getuid() to make an easy to find mark in a kernel
+ * trace.
+ */
+ if (interval.tv_sec > 0 || interval.tv_nsec > 50000000)
+ evPrintf(ctx, 1,
+ "dispatch interval %u.%09u uid %d type %d func %p\n",
+ interval.tv_sec, interval.tv_nsec,
+ getuid(), ev->type, func);
+ }
+#endif
+ ctx->cur = NULL;
+ evDrop(opaqueCtx, opaqueEv);
+ return (0);
+}
+
+void
+evDrop(evContext opaqueCtx, evEvent opaqueEv) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evEvent_p *ev = opaqueEv.opaque;
+
+ switch (ev->type) {
+ case Accept: {
+ FREE(ev->u.accept.this);
+ break;
+ }
+ case File: {
+ /* No work. */
+ break;
+ }
+ case Stream: {
+ evStreamID id;
+
+ id.opaque = ev->u.stream.this;
+ (void) evCancelRW(opaqueCtx, id);
+ break;
+ }
+ case Timer: {
+ evTimer *this = ev->u.timer.this;
+ evTimerID opaque;
+
+ /* Check to see whether the user func cleared the timer. */
+ if (heap_element(ctx->timers, this->index) != this) {
+ evPrintf(ctx, 5, "Dispatch.Timer: timer rm'd?\n");
+ break;
+ }
+ /*
+ * Timer is still there. Delete it if it has expired,
+ * otherwise set it according to its next interval.
+ */
+ if (this->inter.tv_sec == (time_t)0 &&
+ this->inter.tv_nsec == 0L) {
+ opaque.opaque = this;
+ (void) evClearTimer(opaqueCtx, opaque);
+ } else {
+ opaque.opaque = this;
+ (void) evResetTimer(opaqueCtx, opaque, this->func,
+ this->uap,
+ evAddTime((this->mode & EV_TMR_RATE) ?
+ this->due :
+ ctx->lastEventTime,
+ this->inter),
+ this->inter);
+ }
+ break;
+ }
+ case Wait: {
+ FREE(ev->u.wait.this);
+ break;
+ }
+ case Null: {
+ /* No work. */
+ break;
+ }
+ default: {
+ abort();
+ }
+ }
+ FREE(ev);
+}
+
+int
+evMainLoop(evContext opaqueCtx) {
+ evEvent event;
+ int x;
+
+ while ((x = evGetNext(opaqueCtx, &event, EV_WAIT)) == 0)
+ if ((x = evDispatch(opaqueCtx, event)) < 0)
+ break;
+ return (x);
+}
+
+int
+evHighestFD(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ return (ctx->highestFD);
+}
+
+void
+evPrintf(const evContext_p *ctx, int level, const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (ctx->output != NULL && ctx->debug >= level) {
+ vfprintf(ctx->output, fmt, ap);
+ fflush(ctx->output);
+ }
+ va_end(ap);
+}
+
+int
+evSetOption(evContext *opaqueCtx, const char *option, int value) {
+ /* evContext_p *ctx = opaqueCtx->opaque; */
+
+ UNUSED(opaqueCtx);
+ UNUSED(value);
+#ifndef CLOCK_MONOTONIC
+ UNUSED(option);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ if (strcmp(option, "monotime") == 0) {
+ if (opaqueCtx != NULL)
+ errno = EINVAL;
+ if (value == 0 || value == 1) {
+ __evOptMonoTime = value;
+ return (0);
+ } else {
+ errno = EINVAL;
+ return (-1);
+ }
+ }
+#endif
+ errno = ENOENT;
+ return (-1);
+}
+
+int
+evGetOption(evContext *opaqueCtx, const char *option, int *value) {
+ /* evContext_p *ctx = opaqueCtx->opaque; */
+
+ UNUSED(opaqueCtx);
+#ifndef CLOCK_MONOTONIC
+ UNUSED(value);
+ UNUSED(option);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ if (strcmp(option, "monotime") == 0) {
+ if (opaqueCtx != NULL)
+ errno = EINVAL;
+ *value = __evOptMonoTime;
+ return (0);
+ }
+#endif
+ errno = ENOENT;
+ return (-1);
+}
+
+#if defined(NEED_PSELECT) || defined(USE_POLL)
+/* XXX needs to move to the porting library. */
+static int
+pselect(int nfds, void *rfds, void *wfds, void *efds,
+ struct timespec *tsp,
+ const sigset_t *sigmask)
+{
+ struct timeval tv, *tvp;
+ sigset_t sigs;
+ int n;
+#ifdef USE_POLL
+ int polltimeout = INFTIM;
+ evContext_p *ctx;
+ struct pollfd *fds;
+ nfds_t pnfds;
+
+ UNUSED(nfds);
+#endif /* USE_POLL */
+
+ if (tsp) {
+ tvp = &tv;
+ tv = evTimeVal(*tsp);
+#ifdef USE_POLL
+ polltimeout = 1000 * tv.tv_sec + tv.tv_usec / 1000;
+#endif /* USE_POLL */
+ } else
+ tvp = NULL;
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, sigmask, &sigs);
+#ifndef USE_POLL
+ n = select(nfds, rfds, wfds, efds, tvp);
+#else
+ /*
+ * rfds, wfds, and efds should all be from the same evContext_p,
+ * so any of them will do. If they're all NULL, the caller is
+ * presumably calling us to block.
+ */
+ if (rfds != NULL)
+ ctx = ((__evEmulMask *)rfds)->ctx;
+ else if (wfds != NULL)
+ ctx = ((__evEmulMask *)wfds)->ctx;
+ else if (efds != NULL)
+ ctx = ((__evEmulMask *)efds)->ctx;
+ else
+ ctx = NULL;
+ if (ctx != NULL && ctx->fdMax != -1) {
+ fds = &(ctx->pollfds[ctx->firstfd]);
+ pnfds = ctx->fdMax - ctx->firstfd + 1;
+ } else {
+ fds = NULL;
+ pnfds = 0;
+ }
+ n = poll(fds, pnfds, polltimeout);
+ if (n > 0) {
+ int i, e;
+
+ INSIST(ctx != NULL);
+ for (e = 0, i = ctx->firstfd; i <= ctx->fdMax; i++) {
+ if (ctx->pollfds[i].fd < 0)
+ continue;
+ if (FD_ISSET(i, &ctx->rdLast))
+ e++;
+ if (FD_ISSET(i, &ctx->wrLast))
+ e++;
+ if (FD_ISSET(i, &ctx->exLast))
+ e++;
+ }
+ n = e;
+ }
+#endif /* USE_POLL */
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, &sigs, NULL);
+ if (tsp)
+ *tsp = evTimeSpec(tv);
+ return (n);
+}
+#endif
+
+#ifdef USE_POLL
+int
+evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd) {
+
+ int i, maxnfds;
+ void *pollfds, *fdTable;
+
+ if (fd < ctx->maxnfds)
+ return (0);
+
+ /* Don't allow ridiculously small values for pollfd_chunk_size */
+ if (pollfd_chunk_size < 20)
+ pollfd_chunk_size = 20;
+
+ maxnfds = (1 + (fd/pollfd_chunk_size)) * pollfd_chunk_size;
+
+ pollfds = realloc(ctx->pollfds, maxnfds * sizeof(*ctx->pollfds));
+ if (pollfds != NULL)
+ ctx->pollfds = pollfds;
+ fdTable = realloc(ctx->fdTable, maxnfds * sizeof(*ctx->fdTable));
+ if (fdTable != NULL)
+ ctx->fdTable = fdTable;
+
+ if (pollfds == NULL || fdTable == NULL) {
+ evPrintf(ctx, 2, "pollfd() realloc (%ld) failed\n",
+ (long)maxnfds*sizeof(struct pollfd));
+ return (-1);
+ }
+
+ for (i = ctx->maxnfds; i < maxnfds; i++) {
+ ctx->pollfds[i].fd = -1;
+ ctx->pollfds[i].events = 0;
+ ctx->fdTable[i] = 0;
+ }
+
+ ctx->maxnfds = maxnfds;
+
+ return (0);
+}
+
+/* Find the appropriate 'events' or 'revents' field in the pollfds array */
+short *
+__fd_eventfield(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = (evContext_p *)maskp->ctx;
+
+ if (!maskp->result || maskp->type == EV_WASNONBLOCKING)
+ return (&(ctx->pollfds[fd].events));
+ else
+ return (&(ctx->pollfds[fd].revents));
+}
+
+/* Translate to poll(2) event */
+short
+__poll_event(__evEmulMask *maskp) {
+
+ switch ((maskp)->type) {
+ case EV_READ:
+ return (POLLRDNORM);
+ case EV_WRITE:
+ return (POLLWRNORM);
+ case EV_EXCEPT:
+ return (POLLRDBAND | POLLPRI | POLLWRBAND);
+ case EV_WASNONBLOCKING:
+ return (POLLHUP);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Clear the events corresponding to the specified mask. If this leaves
+ * the events mask empty (apart from the POLLHUP bit), set the fd field
+ * to -1 so that poll(2) will ignore this fd.
+ */
+void
+__fd_clr(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = maskp->ctx;
+
+ *__fd_eventfield(fd, maskp) &= ~__poll_event(maskp);
+ if ((ctx->pollfds[fd].events & ~POLLHUP) == 0) {
+ ctx->pollfds[fd].fd = -1;
+ if (fd == ctx->fdMax)
+ while (ctx->fdMax > ctx->firstfd &&
+ ctx->pollfds[ctx->fdMax].fd < 0)
+ ctx->fdMax--;
+ if (fd == ctx->firstfd)
+ while (ctx->firstfd <= ctx->fdMax &&
+ ctx->pollfds[ctx->firstfd].fd < 0)
+ ctx->firstfd++;
+ /*
+ * Do we have a empty set of descriptors?
+ */
+ if (ctx->firstfd > ctx->fdMax) {
+ ctx->fdMax = -1;
+ ctx->firstfd = 0;
+ }
+ }
+}
+
+/*
+ * Set the events bit(s) corresponding to the specified mask. If the events
+ * field has any other bits than POLLHUP set, also set the fd field so that
+ * poll(2) will watch this fd.
+ */
+void
+__fd_set(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = maskp->ctx;
+
+ *__fd_eventfield(fd, maskp) |= __poll_event(maskp);
+ if ((ctx->pollfds[fd].events & ~POLLHUP) != 0) {
+ ctx->pollfds[fd].fd = fd;
+ if (fd < ctx->firstfd || ctx->fdMax == -1)
+ ctx->firstfd = fd;
+ if (fd > ctx->fdMax)
+ ctx->fdMax = fd;
+ }
+}
+#endif /* USE_POLL */
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h b/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h
new file mode 100644
index 0000000000..0a3614ab23
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2005 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief private interfaces for eventlib
+ * \author vix 09sep95 [initial]
+ *
+ * $Id: eventlib_p.h,v 1.9 2006/03/09 23:57:56 marka Exp $
+ */
+
+#ifndef _EVENTLIB_P_H
+#define _EVENTLIB_P_H
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/un.h>
+
+#define EVENTLIB_DEBUG 1
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/heap.h>
+#include <isc/list.h>
+#include <isc/memcluster.h>
+
+#define EV_MASK_ALL (EV_READ | EV_WRITE | EV_EXCEPT)
+#define EV_ERR(e) return (errno = (e), -1)
+#define OK(x) if ((x) < 0) EV_ERR(errno); else (void)NULL
+#define OKFREE(x, y) if ((x) < 0) { FREE((y)); EV_ERR(errno); } \
+ else (void)NULL
+
+#define NEW(p) if (((p) = memget(sizeof *(p))) != NULL) \
+ FILL(p); \
+ else \
+ (void)NULL;
+#define OKNEW(p) if (!((p) = memget(sizeof *(p)))) { \
+ errno = ENOMEM; \
+ return (-1); \
+ } else \
+ FILL(p)
+#define FREE(p) memput((p), sizeof *(p))
+
+#if EVENTLIB_DEBUG
+#define FILL(p) memset((p), 0xF5, sizeof *(p))
+#else
+#define FILL(p)
+#endif
+
+#ifdef USE_POLL
+#ifdef HAVE_STROPTS_H
+#include <stropts.h>
+#endif
+#include <poll.h>
+#endif /* USE_POLL */
+
+typedef struct evConn {
+ evConnFunc func;
+ void * uap;
+ int fd;
+ int flags;
+#define EV_CONN_LISTEN 0x0001 /*%< Connection is a listener. */
+#define EV_CONN_SELECTED 0x0002 /*%< evSelectFD(conn->file). */
+#define EV_CONN_BLOCK 0x0004 /*%< Listener fd was blocking. */
+ evFileID file;
+ struct evConn * prev;
+ struct evConn * next;
+} evConn;
+
+typedef struct evAccept {
+ int fd;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la;
+ ISC_SOCKLEN_T lalen;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } ra;
+ ISC_SOCKLEN_T ralen;
+ int ioErrno;
+ evConn * conn;
+ LINK(struct evAccept) link;
+} evAccept;
+
+typedef struct evFile {
+ evFileFunc func;
+ void * uap;
+ int fd;
+ int eventmask;
+ int preemptive;
+ struct evFile * prev;
+ struct evFile * next;
+ struct evFile * fdprev;
+ struct evFile * fdnext;
+} evFile;
+
+typedef struct evStream {
+ evStreamFunc func;
+ void * uap;
+ evFileID file;
+ evTimerID timer;
+ int flags;
+#define EV_STR_TIMEROK 0x0001 /*%< IFF timer valid. */
+ int fd;
+ struct iovec * iovOrig;
+ int iovOrigCount;
+ struct iovec * iovCur;
+ int iovCurCount;
+ int ioTotal;
+ int ioDone;
+ int ioErrno;
+ struct evStream *prevDone, *nextDone;
+ struct evStream *prev, *next;
+} evStream;
+
+typedef struct evTimer {
+ evTimerFunc func;
+ void * uap;
+ struct timespec due, inter;
+ int index;
+ int mode;
+#define EV_TMR_RATE 1
+} evTimer;
+
+typedef struct evWait {
+ evWaitFunc func;
+ void * uap;
+ const void * tag;
+ struct evWait * next;
+} evWait;
+
+typedef struct evWaitList {
+ evWait * first;
+ evWait * last;
+ struct evWaitList * prev;
+ struct evWaitList * next;
+} evWaitList;
+
+typedef struct evEvent_p {
+ enum { Accept, File, Stream, Timer, Wait, Free, Null } type;
+ union {
+ struct { evAccept *this; } accept;
+ struct { evFile *this; int eventmask; } file;
+ struct { evStream *this; } stream;
+ struct { evTimer *this; } timer;
+ struct { evWait *this; } wait;
+ struct { struct evEvent_p *next; } free;
+ struct { const void *placeholder; } null;
+ } u;
+} evEvent_p;
+
+#ifdef USE_POLL
+typedef struct {
+ void *ctx; /* pointer to the evContext_p */
+ uint32_t type; /* READ, WRITE, EXCEPT, nonblk */
+ uint32_t result; /* 1 => revents, 0 => events */
+} __evEmulMask;
+
+#define emulMaskInit(ctx, field, ev, lastnext) \
+ ctx->field.ctx = ctx; \
+ ctx->field.type = ev; \
+ ctx->field.result = lastnext;
+
+extern short *__fd_eventfield(int fd, __evEmulMask *maskp);
+extern short __poll_event(__evEmulMask *maskp);
+extern void __fd_clr(int fd, __evEmulMask *maskp);
+extern void __fd_set(int fd, __evEmulMask *maskp);
+
+#undef FD_ZERO
+#define FD_ZERO(maskp)
+
+#undef FD_SET
+#define FD_SET(fd, maskp) \
+ __fd_set(fd, maskp)
+
+#undef FD_CLR
+#define FD_CLR(fd, maskp) \
+ __fd_clr(fd, maskp)
+
+#undef FD_ISSET
+#define FD_ISSET(fd, maskp) \
+ ((*__fd_eventfield(fd, maskp) & __poll_event(maskp)) != 0)
+
+#endif /* USE_POLL */
+
+typedef struct {
+ /* Global. */
+ const evEvent_p *cur;
+ /* Debugging. */
+ int debug;
+ FILE *output;
+ /* Connections. */
+ evConn *conns;
+ LIST(evAccept) accepts;
+ /* Files. */
+ evFile *files, *fdNext;
+#ifndef USE_POLL
+ fd_set rdLast, rdNext;
+ fd_set wrLast, wrNext;
+ fd_set exLast, exNext;
+ fd_set nonblockBefore;
+ int fdMax, fdCount, highestFD;
+ evFile *fdTable[FD_SETSIZE];
+#else
+ struct pollfd *pollfds; /* Allocated as needed */
+ evFile **fdTable; /* Ditto */
+ int maxnfds; /* # elements in above */
+ int firstfd; /* First active fd */
+ int fdMax; /* Last active fd */
+ int fdCount; /* # fd:s with I/O */
+ int highestFD; /* max fd allowed by OS */
+ __evEmulMask rdLast, rdNext;
+ __evEmulMask wrLast, wrNext;
+ __evEmulMask exLast, exNext;
+ __evEmulMask nonblockBefore;
+#endif /* USE_POLL */
+#ifdef EVENTLIB_TIME_CHECKS
+ struct timespec lastSelectTime;
+ int lastFdCount;
+#endif
+ /* Streams. */
+ evStream *streams;
+ evStream *strDone, *strLast;
+ /* Timers. */
+ struct timespec lastEventTime;
+ heap_context timers;
+ /* Waits. */
+ evWaitList *waitLists;
+ evWaitList waitDone;
+} evContext_p;
+
+/* eventlib.c */
+#define evPrintf __evPrintf
+void evPrintf(const evContext_p *ctx, int level, const char *fmt, ...)
+ ISC_FORMAT_PRINTF(3, 4);
+
+#ifdef USE_POLL
+extern int evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd);
+#endif /* USE_POLL */
+
+/* ev_timers.c */
+#define evCreateTimers __evCreateTimers
+heap_context evCreateTimers(const evContext_p *);
+#define evDestroyTimers __evDestroyTimers
+void evDestroyTimers(const evContext_p *);
+
+/* ev_waits.c */
+#define evFreeWait __evFreeWait
+evWait *evFreeWait(evContext_p *ctx, evWait *old);
+
+/* Global options */
+extern int __evOptMonoTime;
+
+#endif /*_EVENTLIB_P_H*/
diff --git a/usr/src/lib/libresolv2_joy/common/isc/heap.c b/usr/src/lib/libresolv2_joy/common/isc/heap.c
new file mode 100644
index 0000000000..3d22b6fc71
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/heap.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*%
+ * Heap implementation of priority queues adapted from the following:
+ *
+ * _Introduction to Algorithms_, Cormen, Leiserson, and Rivest,
+ * MIT Press / McGraw Hill, 1990, ISBN 0-262-03141-8, chapter 7.
+ *
+ * _Algorithms_, Second Edition, Sedgewick, Addison-Wesley, 1988,
+ * ISBN 0-201-06673-4, chapter 11.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: heap.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "port_after.h"
+
+#include <isc/heap.h>
+
+/*%
+ * Note: to make heap_parent and heap_left easy to compute, the first
+ * element of the heap array is not used; i.e. heap subscripts are 1-based,
+ * not 0-based.
+ */
+#define heap_parent(i) ((i) >> 1)
+#define heap_left(i) ((i) << 1)
+
+#define ARRAY_SIZE_INCREMENT 512
+
+heap_context
+heap_new(heap_higher_priority_func higher_priority, heap_index_func index,
+ int array_size_increment) {
+ heap_context ctx;
+
+ if (higher_priority == NULL)
+ return (NULL);
+
+ ctx = (heap_context)malloc(sizeof (struct heap_context));
+ if (ctx == NULL)
+ return (NULL);
+
+ ctx->array_size = 0;
+ if (array_size_increment == 0)
+ ctx->array_size_increment = ARRAY_SIZE_INCREMENT;
+ else
+ ctx->array_size_increment = array_size_increment;
+ ctx->heap_size = 0;
+ ctx->heap = NULL;
+ ctx->higher_priority = higher_priority;
+ ctx->index = index;
+ return (ctx);
+}
+
+int
+heap_free(heap_context ctx) {
+ if (ctx == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (ctx->heap != NULL)
+ free(ctx->heap);
+ free(ctx);
+
+ return (0);
+}
+
+static int
+heap_resize(heap_context ctx) {
+ void **new_heap;
+
+ ctx->array_size += ctx->array_size_increment;
+ new_heap = (void **)realloc(ctx->heap,
+ (ctx->array_size) * (sizeof (void *)));
+ if (new_heap == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ ctx->heap = new_heap;
+ return (0);
+}
+
+static void
+float_up(heap_context ctx, int i, void *elt) {
+ int p;
+
+ for ( p = heap_parent(i);
+ i > 1 && ctx->higher_priority(elt, ctx->heap[p]);
+ i = p, p = heap_parent(i) ) {
+ ctx->heap[i] = ctx->heap[p];
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+ }
+ ctx->heap[i] = elt;
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+}
+
+static void
+sink_down(heap_context ctx, int i, void *elt) {
+ int j, size, half_size;
+
+ size = ctx->heap_size;
+ half_size = size / 2;
+ while (i <= half_size) {
+ /* find smallest of the (at most) two children */
+ j = heap_left(i);
+ if (j < size && ctx->higher_priority(ctx->heap[j+1],
+ ctx->heap[j]))
+ j++;
+ if (ctx->higher_priority(elt, ctx->heap[j]))
+ break;
+ ctx->heap[i] = ctx->heap[j];
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+ i = j;
+ }
+ ctx->heap[i] = elt;
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+}
+
+int
+heap_insert(heap_context ctx, void *elt) {
+ int i;
+
+ if (ctx == NULL || elt == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ i = ++ctx->heap_size;
+ if (ctx->heap_size >= ctx->array_size && heap_resize(ctx) < 0)
+ return (-1);
+
+ float_up(ctx, i, elt);
+
+ return (0);
+}
+
+int
+heap_delete(heap_context ctx, int i) {
+ void *elt;
+ int less;
+
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (i == ctx->heap_size) {
+ ctx->heap_size--;
+ } else {
+ elt = ctx->heap[ctx->heap_size--];
+ less = ctx->higher_priority(elt, ctx->heap[i]);
+ ctx->heap[i] = elt;
+ if (less)
+ float_up(ctx, i, ctx->heap[i]);
+ else
+ sink_down(ctx, i, ctx->heap[i]);
+ }
+
+ return (0);
+}
+
+int
+heap_increased(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ float_up(ctx, i, ctx->heap[i]);
+
+ return (0);
+}
+
+int
+heap_decreased(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sink_down(ctx, i, ctx->heap[i]);
+
+ return (0);
+}
+
+void *
+heap_element(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ return (ctx->heap[i]);
+}
+
+int
+heap_for_each(heap_context ctx, heap_for_each_func action, void *uap) {
+ int i;
+
+ if (ctx == NULL || action == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ for (i = 1; i <= ctx->heap_size; i++)
+ (action)(ctx->heap[i], uap);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/hex.c b/usr/src/lib/libresolv2_joy/common/isc/hex.c
new file mode 100644
index 0000000000..e43be4f3b5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/hex.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2001 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <port_before.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <isc/misc.h>
+#include <port_after.h>
+
+static const char hex[17] = "0123456789abcdef";
+
+int
+isc_gethexstring(unsigned char *buf, size_t len, int count, FILE *fp,
+ int *multiline)
+{
+ int c, n;
+ unsigned char x;
+ char *s;
+ int result = count;
+
+ x = 0; /*%< silence compiler */
+ n = 0;
+ while (count > 0) {
+ c = fgetc(fp);
+
+ if ((c == EOF) ||
+ (c == '\n' && !*multiline) ||
+ (c == '(' && *multiline) ||
+ (c == ')' && !*multiline))
+ goto formerr;
+ /* comment */
+ if (c == ';') {
+ do {
+ c = fgetc(fp);
+ } while (c != EOF && c != '\n');
+ if (c == '\n' && *multiline)
+ continue;
+ goto formerr;
+ }
+ /* white space */
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ continue;
+ /* multiline */
+ if ('(' == c || c == ')') {
+ *multiline = (c == '(' /*)*/);
+ continue;
+ }
+ if ((s = strchr(hex, tolower(c))) == NULL)
+ goto formerr;
+ x = (x<<4) | (s - hex);
+ if (++n == 2) {
+ if (len > 0U) {
+ *buf++ = x;
+ len--;
+ } else
+ result = -1;
+ count--;
+ n = 0;
+ }
+ }
+ return (result);
+
+ formerr:
+ if (c == '\n')
+ ungetc(c, fp);
+ return (-1);
+}
+
+void
+isc_puthexstring(FILE *fp, const unsigned char *buf, size_t buflen,
+ size_t len1, size_t len2, const char *sep)
+{
+ size_t i = 0;
+
+ if (len1 < 4U)
+ len1 = 4;
+ if (len2 < 4U)
+ len2 = 4;
+ while (buflen > 0U) {
+ fputc(hex[(buf[0]>>4)&0xf], fp);
+ fputc(hex[buf[0]&0xf], fp);
+ i += 2;
+ buflen--;
+ buf++;
+ if (i >= len1 && sep != NULL) {
+ fputs(sep, fp);
+ i = 0;
+ len1 = len2;
+ }
+ }
+}
+
+void
+isc_tohex(const unsigned char *buf, size_t buflen, char *t) {
+ while (buflen > 0U) {
+ *t++ = hex[(buf[0]>>4)&0xf];
+ *t++ = hex[buf[0]&0xf];
+ buf++;
+ buflen--;
+ }
+ *t = '\0';
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/logging.c b/usr/src/lib/libresolv2_joy/common/isc/logging.c
new file mode 100644
index 0000000000..8c2af2b9e3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/logging.c
@@ -0,0 +1,716 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: logging.c,v 1.9 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <syslog.h>
+#include <errno.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/assertions.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+#include <isc/misc.h>
+
+#include "port_after.h"
+
+#include "logging_p.h"
+
+static const int syslog_priority[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE,
+ LOG_WARNING, LOG_ERR, LOG_CRIT };
+
+static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+static const char *level_text[] = {
+ "info: ", "notice: ", "warning: ", "error: ", "critical: "
+};
+
+static void
+version_rename(log_channel chan) {
+ unsigned int ver;
+ char old_name[PATH_MAX+1];
+ char new_name[PATH_MAX+1];
+
+ ver = chan->out.file.versions;
+ if (ver < 1)
+ return;
+ if (ver > LOG_MAX_VERSIONS)
+ ver = LOG_MAX_VERSIONS;
+ /*
+ * Need to have room for '.nn' (XXX assumes LOG_MAX_VERSIONS < 100)
+ */
+ if (strlen(chan->out.file.name) > (size_t)(PATH_MAX-3))
+ return;
+ for (ver--; ver > 0; ver--) {
+ sprintf(old_name, "%s.%d", chan->out.file.name, ver-1);
+ sprintf(new_name, "%s.%d", chan->out.file.name, ver);
+ (void)isc_movefile(old_name, new_name);
+ }
+ sprintf(new_name, "%s.0", chan->out.file.name);
+ (void)isc_movefile(chan->out.file.name, new_name);
+}
+
+FILE *
+log_open_stream(log_channel chan) {
+ FILE *stream;
+ int fd, flags;
+ struct stat sb;
+ int regular;
+
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Don't open already open streams
+ */
+ if (chan->out.file.stream != NULL)
+ return (chan->out.file.stream);
+
+ if (stat(chan->out.file.name, &sb) < 0) {
+ if (errno != ENOENT) {
+ syslog(LOG_ERR,
+ "log_open_stream: stat of %s failed: %s",
+ chan->out.file.name, strerror(errno));
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ regular = 1;
+ } else
+ regular = (sb.st_mode & S_IFREG);
+
+ if (chan->out.file.versions) {
+ if (!regular) {
+ syslog(LOG_ERR,
+ "log_open_stream: want versions but %s isn't a regular file",
+ chan->out.file.name);
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ errno = EINVAL;
+ return (NULL);
+ }
+ }
+
+ flags = O_WRONLY|O_CREAT|O_APPEND;
+
+ if ((chan->flags & LOG_TRUNCATE) != 0) {
+ if (regular) {
+ (void)unlink(chan->out.file.name);
+ flags |= O_EXCL;
+ } else {
+ syslog(LOG_ERR,
+ "log_open_stream: want truncation but %s isn't a regular file",
+ chan->out.file.name);
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ errno = EINVAL;
+ return (NULL);
+ }
+ }
+
+ fd = open(chan->out.file.name, flags,
+ S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
+ if (fd < 0) {
+ syslog(LOG_ERR, "log_open_stream: open(%s) failed: %s",
+ chan->out.file.name, strerror(errno));
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ stream = fdopen(fd, "a");
+ if (stream == NULL) {
+ syslog(LOG_ERR, "log_open_stream: fdopen() failed");
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ (void) fchown(fd, chan->out.file.owner, chan->out.file.group);
+
+ chan->out.file.stream = stream;
+ return (stream);
+}
+
+int
+log_close_stream(log_channel chan) {
+ FILE *stream;
+
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (0);
+ }
+ stream = chan->out.file.stream;
+ chan->out.file.stream = NULL;
+ if (stream != NULL && fclose(stream) == EOF)
+ return (-1);
+ return (0);
+}
+
+void
+log_close_debug_channels(log_context lc) {
+ log_channel_list lcl;
+ int i;
+
+ for (i = 0; i < lc->num_categories; i++)
+ for (lcl = lc->categories[i]; lcl != NULL; lcl = lcl->next)
+ if (lcl->channel->type == log_file &&
+ lcl->channel->out.file.stream != NULL &&
+ lcl->channel->flags & LOG_REQUIRE_DEBUG)
+ (void)log_close_stream(lcl->channel);
+}
+
+FILE *
+log_get_stream(log_channel chan) {
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (chan->out.file.stream);
+}
+
+char *
+log_get_filename(log_channel chan) {
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (chan->out.file.name);
+}
+
+int
+log_check_channel(log_context lc, int level, log_channel chan) {
+ int debugging, chan_level;
+
+ REQUIRE(lc != NULL);
+
+ debugging = ((lc->flags & LOG_OPTION_DEBUG) != 0);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return (0);
+
+ if ((chan->flags & (LOG_CHANNEL_BROKEN|LOG_CHANNEL_OFF)) != 0)
+ return (0);
+
+ /* Some channels only log when debugging is on. */
+ if ((chan->flags & LOG_REQUIRE_DEBUG) && !debugging)
+ return (0);
+
+ /* Some channels use the global level. */
+ if ((chan->flags & LOG_USE_CONTEXT_LEVEL) != 0) {
+ chan_level = lc->level;
+ } else
+ chan_level = chan->level;
+
+ if (level > chan_level)
+ return (0);
+
+ return (1);
+}
+
+int
+log_check(log_context lc, int category, int level) {
+ log_channel_list lcl;
+ int debugging;
+
+ REQUIRE(lc != NULL);
+
+ debugging = ((lc->flags & LOG_OPTION_DEBUG) != 0);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return (0);
+
+ if (category < 0 || category > lc->num_categories)
+ category = 0; /*%< use default */
+ lcl = lc->categories[category];
+ if (lcl == NULL) {
+ category = 0;
+ lcl = lc->categories[0];
+ }
+
+ for ( /* nothing */; lcl != NULL; lcl = lcl->next) {
+ if (log_check_channel(lc, level, lcl->channel))
+ return (1);
+ }
+ return (0);
+}
+
+void
+log_vwrite(log_context lc, int category, int level, const char *format,
+ va_list args) {
+ log_channel_list lcl;
+ int pri, debugging, did_vsprintf = 0;
+ int original_category;
+ FILE *stream;
+ log_channel chan;
+ struct timeval tv;
+ struct tm *local_tm;
+#ifdef HAVE_TIME_R
+ struct tm tm_tmp;
+#endif
+ time_t tt;
+ const char *category_name;
+ const char *level_str;
+ char time_buf[256];
+ char level_buf[256];
+
+ REQUIRE(lc != NULL);
+
+ debugging = (lc->flags & LOG_OPTION_DEBUG);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return;
+
+ if (category < 0 || category > lc->num_categories)
+ category = 0; /*%< use default */
+ original_category = category;
+ lcl = lc->categories[category];
+ if (lcl == NULL) {
+ category = 0;
+ lcl = lc->categories[0];
+ }
+
+ /*
+ * Get the current time and format it.
+ */
+ time_buf[0]='\0';
+ if (gettimeofday(&tv, NULL) < 0) {
+ syslog(LOG_INFO, "gettimeofday failed in log_vwrite()");
+ } else {
+ tt = tv.tv_sec;
+#ifdef HAVE_TIME_R
+ local_tm = localtime_r(&tt, &tm_tmp);
+#else
+ local_tm = localtime(&tt);
+#endif
+ if (local_tm != NULL) {
+ sprintf(time_buf, "%02d-%s-%4d %02d:%02d:%02d.%03ld ",
+ local_tm->tm_mday, months[local_tm->tm_mon],
+ local_tm->tm_year+1900, local_tm->tm_hour,
+ local_tm->tm_min, local_tm->tm_sec,
+ (long)tv.tv_usec/1000);
+ }
+ }
+
+ /*
+ * Make a string representation of the current category and level
+ */
+
+ if (lc->category_names != NULL &&
+ lc->category_names[original_category] != NULL)
+ category_name = lc->category_names[original_category];
+ else
+ category_name = "";
+
+ if (level >= log_critical) {
+ if (level >= 0) {
+ sprintf(level_buf, "debug %d: ", level);
+ level_str = level_buf;
+ } else
+ level_str = level_text[-level-1];
+ } else {
+ sprintf(level_buf, "level %d: ", level);
+ level_str = level_buf;
+ }
+
+ /*
+ * Write the message to channels.
+ */
+ for ( /* nothing */; lcl != NULL; lcl = lcl->next) {
+ chan = lcl->channel;
+
+ if (!log_check_channel(lc, level, chan))
+ continue;
+
+ if (!did_vsprintf) {
+ (void)vsprintf(lc->buffer, format, args);
+ if (strlen(lc->buffer) > (size_t)LOG_BUFFER_SIZE) {
+ syslog(LOG_CRIT,
+ "memory overrun in log_vwrite()");
+ exit(1);
+ }
+ did_vsprintf = 1;
+ }
+
+ switch (chan->type) {
+ case log_syslog:
+ if (level >= log_critical)
+ pri = (level >= 0) ? 0 : -level;
+ else
+ pri = -log_critical;
+ syslog(chan->out.facility|syslog_priority[pri],
+ "%s%s%s%s",
+ (chan->flags & LOG_TIMESTAMP) ? time_buf : "",
+ (chan->flags & LOG_PRINT_CATEGORY) ?
+ category_name : "",
+ (chan->flags & LOG_PRINT_LEVEL) ?
+ level_str : "",
+ lc->buffer);
+ break;
+ case log_file:
+ stream = chan->out.file.stream;
+ if (stream == NULL) {
+ stream = log_open_stream(chan);
+ if (stream == NULL)
+ break;
+ }
+ if (chan->out.file.max_size != ULONG_MAX) {
+ long pos;
+
+ pos = ftell(stream);
+ if (pos >= 0 &&
+ (unsigned long)pos >
+ chan->out.file.max_size) {
+ /*
+ * try to roll over the log files,
+ * ignoring all all return codes
+ * except the open (we don't want
+ * to write any more anyway)
+ */
+ log_close_stream(chan);
+ version_rename(chan);
+ stream = log_open_stream(chan);
+ if (stream == NULL)
+ break;
+ }
+ }
+ fprintf(stream, "%s%s%s%s\n",
+ (chan->flags & LOG_TIMESTAMP) ? time_buf : "",
+ (chan->flags & LOG_PRINT_CATEGORY) ?
+ category_name : "",
+ (chan->flags & LOG_PRINT_LEVEL) ?
+ level_str : "",
+ lc->buffer);
+ fflush(stream);
+ break;
+ case log_null:
+ break;
+ default:
+ syslog(LOG_ERR,
+ "unknown channel type in log_vwrite()");
+ }
+ }
+}
+
+void
+log_write(log_context lc, int category, int level, const char *format, ...) {
+ va_list args;
+
+ va_start(args, format);
+ log_vwrite(lc, category, level, format, args);
+ va_end(args);
+}
+
+/*%
+ * Functions to create, set, or destroy contexts
+ */
+
+int
+log_new_context(int num_categories, char **category_names, log_context *lc) {
+ log_context nlc;
+
+ nlc = memget(sizeof (struct log_context));
+ if (nlc == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ nlc->num_categories = num_categories;
+ nlc->category_names = category_names;
+ nlc->categories = memget(num_categories * sizeof (log_channel_list));
+ if (nlc->categories == NULL) {
+ memput(nlc, sizeof (struct log_context));
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(nlc->categories, '\0',
+ num_categories * sizeof (log_channel_list));
+ nlc->flags = 0U;
+ nlc->level = 0;
+ *lc = nlc;
+ return (0);
+}
+
+void
+log_free_context(log_context lc) {
+ log_channel_list lcl, lcl_next;
+ log_channel chan;
+ int i;
+
+ REQUIRE(lc != NULL);
+
+ for (i = 0; i < lc->num_categories; i++)
+ for (lcl = lc->categories[i]; lcl != NULL; lcl = lcl_next) {
+ lcl_next = lcl->next;
+ chan = lcl->channel;
+ (void)log_free_channel(chan);
+ memput(lcl, sizeof (struct log_channel_list));
+ }
+ memput(lc->categories,
+ lc->num_categories * sizeof (log_channel_list));
+ memput(lc, sizeof (struct log_context));
+}
+
+int
+log_add_channel(log_context lc, int category, log_channel chan) {
+ log_channel_list lcl;
+
+ if (lc == NULL || category < 0 || category >= lc->num_categories) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ lcl = memget(sizeof (struct log_channel_list));
+ if (lcl == NULL) {
+ errno = ENOMEM;
+ return(-1);
+ }
+ lcl->channel = chan;
+ lcl->next = lc->categories[category];
+ lc->categories[category] = lcl;
+ chan->references++;
+ return (0);
+}
+
+int
+log_remove_channel(log_context lc, int category, log_channel chan) {
+ log_channel_list lcl, prev_lcl, next_lcl;
+ int found = 0;
+
+ if (lc == NULL || category < 0 || category >= lc->num_categories) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ for (prev_lcl = NULL, lcl = lc->categories[category];
+ lcl != NULL;
+ lcl = next_lcl) {
+ next_lcl = lcl->next;
+ if (lcl->channel == chan) {
+ log_free_channel(chan);
+ if (prev_lcl != NULL)
+ prev_lcl->next = next_lcl;
+ else
+ lc->categories[category] = next_lcl;
+ memput(lcl, sizeof (struct log_channel_list));
+ /*
+ * We just set found instead of returning because
+ * the channel might be on the list more than once.
+ */
+ found = 1;
+ } else
+ prev_lcl = lcl;
+ }
+ if (!found) {
+ errno = ENOENT;
+ return (-1);
+ }
+ return (0);
+}
+
+int
+log_option(log_context lc, int option, int value) {
+ if (lc == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ switch (option) {
+ case LOG_OPTION_DEBUG:
+ if (value)
+ lc->flags |= option;
+ else
+ lc->flags &= ~option;
+ break;
+ case LOG_OPTION_LEVEL:
+ lc->level = value;
+ break;
+ default:
+ errno = EINVAL;
+ return (-1);
+ }
+ return (0);
+}
+
+int
+log_category_is_active(log_context lc, int category) {
+ if (lc == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (category >= 0 && category < lc->num_categories &&
+ lc->categories[category] != NULL)
+ return (1);
+ return (0);
+}
+
+log_channel
+log_new_syslog_channel(unsigned int flags, int level, int facility) {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_syslog;
+ chan->flags = flags;
+ chan->level = level;
+ chan->out.facility = facility;
+ chan->references = 0;
+ return (chan);
+}
+
+log_channel
+log_new_file_channel(unsigned int flags, int level,
+ const char *name, FILE *stream, unsigned int versions,
+ unsigned long max_size) {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_file;
+ chan->flags = flags;
+ chan->level = level;
+ if (name != NULL) {
+ size_t len;
+
+ len = strlen(name);
+ /*
+ * Quantize length to a multiple of 256. There's space for the
+ * NUL, since if len is a multiple of 256, the size chosen will
+ * be the next multiple.
+ */
+ chan->out.file.name_size = ((len / 256) + 1) * 256;
+ chan->out.file.name = memget(chan->out.file.name_size);
+ if (chan->out.file.name == NULL) {
+ memput(chan, sizeof (struct log_channel));
+ errno = ENOMEM;
+ return (NULL);
+ }
+ /* This is safe. */
+ strcpy(chan->out.file.name, name);
+ } else {
+ chan->out.file.name_size = 0;
+ chan->out.file.name = NULL;
+ }
+ chan->out.file.stream = stream;
+ chan->out.file.versions = versions;
+ chan->out.file.max_size = max_size;
+ chan->out.file.owner = getuid();
+ chan->out.file.group = getgid();
+ chan->references = 0;
+ return (chan);
+}
+
+int
+log_set_file_owner(log_channel chan, uid_t owner, gid_t group) {
+ if (chan->type != log_file) {
+ errno = EBADF;
+ return (-1);
+ }
+ chan->out.file.owner = owner;
+ chan->out.file.group = group;
+ return (0);
+}
+
+log_channel
+log_new_null_channel() {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_null;
+ chan->flags = LOG_CHANNEL_OFF;
+ chan->level = log_info;
+ chan->references = 0;
+ return (chan);
+}
+
+int
+log_inc_references(log_channel chan) {
+ if (chan == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references++;
+ return (0);
+}
+
+int
+log_dec_references(log_channel chan) {
+ if (chan == NULL || chan->references <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references--;
+ return (0);
+}
+
+log_channel_type
+log_get_channel_type(log_channel chan) {
+ REQUIRE(chan != NULL);
+
+ return (chan->type);
+}
+
+int
+log_free_channel(log_channel chan) {
+ if (chan == NULL || chan->references <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references--;
+ if (chan->references == 0) {
+ if (chan->type == log_file) {
+ if ((chan->flags & LOG_CLOSE_STREAM) &&
+ chan->out.file.stream != NULL)
+ (void)fclose(chan->out.file.stream);
+ if (chan->out.file.name != NULL)
+ memput(chan->out.file.name,
+ chan->out.file.name_size);
+ }
+ memput(chan, sizeof (struct log_channel));
+ }
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/logging_p.h b/usr/src/lib/libresolv2_joy/common/isc/logging_p.h
new file mode 100644
index 0000000000..5e6314f190
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/logging_p.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LOGGING_P_H
+#define LOGGING_P_H
+
+typedef struct log_file_desc {
+ char *name;
+ size_t name_size;
+ FILE *stream;
+ unsigned int versions;
+ unsigned long max_size;
+ uid_t owner;
+ gid_t group;
+} log_file_desc;
+
+typedef union log_output {
+ int facility;
+ log_file_desc file;
+} log_output;
+
+struct log_channel {
+ int level; /*%< don't log messages > level */
+ log_channel_type type;
+ log_output out;
+ unsigned int flags;
+ int references;
+};
+
+typedef struct log_channel_list {
+ log_channel channel;
+ struct log_channel_list *next;
+} *log_channel_list;
+
+#define LOG_BUFFER_SIZE 20480
+
+struct log_context {
+ int num_categories;
+ char **category_names;
+ log_channel_list *categories;
+ int flags;
+ int level;
+ char buffer[LOG_BUFFER_SIZE];
+};
+
+#endif /* !LOGGING_P_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/memcluster.c b/usr/src/lib/libresolv2_joy/common/isc/memcluster.c
new file mode 100644
index 0000000000..515793fd6a
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/memcluster.c
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2005 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+/* When this symbol is defined allocations via memget are made slightly
+ bigger and some debugging info stuck before and after the region given
+ back to the caller. */
+/* #define DEBUGGING_MEMCLUSTER */
+#define MEMCLUSTER_ATEND
+
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: memcluster.c,v 1.11 2006/08/30 23:34:38 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <isc/memcluster.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#ifdef MEMCLUSTER_RECORD
+#ifndef DEBUGGING_MEMCLUSTER
+#define DEBUGGING_MEMCLUSTER
+#endif
+#endif
+
+#define DEF_MAX_SIZE 1100
+#define DEF_MEM_TARGET 4096
+
+typedef u_int32_t fence_t;
+
+typedef struct {
+ void * next;
+#if defined(DEBUGGING_MEMCLUSTER)
+#if defined(MEMCLUSTER_RECORD)
+ const char * file;
+ int line;
+#endif
+ size_t size;
+ fence_t fencepost;
+#endif
+} memcluster_element;
+
+#define SMALL_SIZE_LIMIT sizeof(memcluster_element)
+#define P_SIZE sizeof(void *)
+#define FRONT_FENCEPOST 0xfebafeba
+#define BACK_FENCEPOST 0xabefabef
+#define FENCEPOST_SIZE 4
+
+#ifndef MEMCLUSTER_LITTLE_MALLOC
+#define MEMCLUSTER_BIG_MALLOC 1
+#define NUM_BASIC_BLOCKS 64
+#endif
+
+struct stats {
+ u_long gets;
+ u_long totalgets;
+ u_long blocks;
+ u_long freefrags;
+};
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+static pthread_mutex_t memlock = PTHREAD_MUTEX_INITIALIZER;
+#define MEMLOCK (void)pthread_mutex_lock(&memlock)
+#define MEMUNLOCK (void)pthread_mutex_unlock(&memlock)
+#else
+/*
+ * Catch bad lock usage in non threaded build.
+ */
+static unsigned int memlock = 0;
+#define MEMLOCK do { INSIST(memlock == 0); memlock = 1; } while (0)
+#define MEMUNLOCK do { INSIST(memlock == 1); memlock = 0; } while (0)
+#endif /* DO_PTHEADS */
+
+/* Private data. */
+
+static size_t max_size;
+static size_t mem_target;
+#ifndef MEMCLUSTER_BIG_MALLOC
+static size_t mem_target_half;
+static size_t mem_target_fudge;
+#endif
+static memcluster_element ** freelists;
+#ifdef MEMCLUSTER_RECORD
+static memcluster_element ** activelists;
+#endif
+#ifdef MEMCLUSTER_BIG_MALLOC
+static memcluster_element * basic_blocks;
+#endif
+static struct stats * stats;
+
+/* Forward. */
+
+static size_t quantize(size_t);
+#if defined(DEBUGGING_MEMCLUSTER)
+static void check(unsigned char *, int, size_t);
+#endif
+
+/* Public. */
+
+int
+meminit(size_t init_max_size, size_t target_size) {
+
+#if defined(DEBUGGING_MEMCLUSTER)
+ INSIST(sizeof(fence_t) == FENCEPOST_SIZE);
+#endif
+ if (freelists != NULL) {
+ errno = EEXIST;
+ return (-1);
+ }
+ if (init_max_size == 0U)
+ max_size = DEF_MAX_SIZE;
+ else
+ max_size = init_max_size;
+ if (target_size == 0U)
+ mem_target = DEF_MEM_TARGET;
+ else
+ mem_target = target_size;
+#ifndef MEMCLUSTER_BIG_MALLOC
+ mem_target_half = mem_target / 2;
+ mem_target_fudge = mem_target + mem_target / 4;
+#endif
+ freelists = malloc(max_size * sizeof (memcluster_element *));
+ stats = malloc((max_size+1) * sizeof (struct stats));
+ if (freelists == NULL || stats == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(freelists, 0,
+ max_size * sizeof (memcluster_element *));
+ memset(stats, 0, (max_size + 1) * sizeof (struct stats));
+#ifdef MEMCLUSTER_RECORD
+ activelists = malloc((max_size + 1) * sizeof (memcluster_element *));
+ if (activelists == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(activelists, 0,
+ (max_size + 1) * sizeof (memcluster_element *));
+#endif
+#ifdef MEMCLUSTER_BIG_MALLOC
+ basic_blocks = NULL;
+#endif
+ return (0);
+}
+
+void *
+__memget(size_t size) {
+ return (__memget_record(size, NULL, 0));
+}
+
+void *
+__memget_record(size_t size, const char *file, int line) {
+ size_t new_size = quantize(size);
+#if defined(DEBUGGING_MEMCLUSTER)
+ memcluster_element *e;
+ char *p;
+ fence_t fp = BACK_FENCEPOST;
+#endif
+ void *ret;
+
+ MEMLOCK;
+
+#if !defined(MEMCLUSTER_RECORD)
+ UNUSED(file);
+ UNUSED(line);
+#endif
+ if (freelists == NULL) {
+ if (meminit(0, 0) == -1) {
+ MEMUNLOCK;
+ return (NULL);
+ }
+ }
+ if (size == 0U) {
+ MEMUNLOCK;
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (size >= max_size || new_size >= max_size) {
+ /* memget() was called on something beyond our upper limit. */
+ stats[max_size].gets++;
+ stats[max_size].totalgets++;
+#if defined(DEBUGGING_MEMCLUSTER)
+ e = malloc(new_size);
+ if (e == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+ e->next = NULL;
+ e->size = size;
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+ e->next = activelists[max_size];
+ activelists[max_size] = e;
+#endif
+ MEMUNLOCK;
+ e->fencepost = FRONT_FENCEPOST;
+ p = (char *)e + sizeof *e + size;
+ memcpy(p, &fp, sizeof fp);
+ return ((char *)e + sizeof *e);
+#else
+ MEMUNLOCK;
+ return (malloc(size));
+#endif
+ }
+
+ /*
+ * If there are no blocks in the free list for this size, get a chunk
+ * of memory and then break it up into "new_size"-sized blocks, adding
+ * them to the free list.
+ */
+ if (freelists[new_size] == NULL) {
+ int i, frags;
+ size_t total_size;
+ void *new;
+ char *curr, *next;
+
+#ifdef MEMCLUSTER_BIG_MALLOC
+ if (basic_blocks == NULL) {
+ new = malloc(NUM_BASIC_BLOCKS * mem_target);
+ if (new == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+ curr = new;
+ next = curr + mem_target;
+ for (i = 0; i < (NUM_BASIC_BLOCKS - 1); i++) {
+ ((memcluster_element *)curr)->next = next;
+ curr = next;
+ next += mem_target;
+ }
+ /*
+ * curr is now pointing at the last block in the
+ * array.
+ */
+ ((memcluster_element *)curr)->next = NULL;
+ basic_blocks = new;
+ }
+ total_size = mem_target;
+ new = basic_blocks;
+ basic_blocks = basic_blocks->next;
+#else
+ if (new_size > mem_target_half)
+ total_size = mem_target_fudge;
+ else
+ total_size = mem_target;
+ new = malloc(total_size);
+ if (new == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+#endif
+ frags = total_size / new_size;
+ stats[new_size].blocks++;
+ stats[new_size].freefrags += frags;
+ /* Set up a linked-list of blocks of size "new_size". */
+ curr = new;
+ next = curr + new_size;
+ for (i = 0; i < (frags - 1); i++) {
+#if defined (DEBUGGING_MEMCLUSTER)
+ memset(curr, 0xa5, new_size);
+#endif
+ ((memcluster_element *)curr)->next = next;
+ curr = next;
+ next += new_size;
+ }
+ /* curr is now pointing at the last block in the array. */
+#if defined (DEBUGGING_MEMCLUSTER)
+ memset(curr, 0xa5, new_size);
+#endif
+ ((memcluster_element *)curr)->next = freelists[new_size];
+ freelists[new_size] = new;
+ }
+
+ /* The free list uses the "rounded-up" size "new_size". */
+#if defined (DEBUGGING_MEMCLUSTER)
+ e = freelists[new_size];
+ ret = (char *)e + sizeof *e;
+ /*
+ * Check to see if this buffer has been written to while on free list.
+ */
+ check(ret, 0xa5, new_size - sizeof *e);
+ /*
+ * Mark memory we are returning.
+ */
+ memset(ret, 0xe5, size);
+#else
+ ret = freelists[new_size];
+#endif
+ freelists[new_size] = freelists[new_size]->next;
+#if defined(DEBUGGING_MEMCLUSTER)
+ e->next = NULL;
+ e->size = size;
+ e->fencepost = FRONT_FENCEPOST;
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+ e->next = activelists[size];
+ activelists[size] = e;
+#endif
+ p = (char *)e + sizeof *e + size;
+ memcpy(p, &fp, sizeof fp);
+#endif
+
+ /*
+ * The stats[] uses the _actual_ "size" requested by the
+ * caller, with the caveat (in the code above) that "size" >= the
+ * max. size (max_size) ends up getting recorded as a call to
+ * max_size.
+ */
+ stats[size].gets++;
+ stats[size].totalgets++;
+ stats[new_size].freefrags--;
+ MEMUNLOCK;
+#if defined(DEBUGGING_MEMCLUSTER)
+ return ((char *)e + sizeof *e);
+#else
+ return (ret);
+#endif
+}
+
+/*%
+ * This is a call from an external caller,
+ * so we want to count this as a user "put".
+ */
+void
+__memput(void *mem, size_t size) {
+ __memput_record(mem, size, NULL, 0);
+}
+
+void
+__memput_record(void *mem, size_t size, const char *file, int line) {
+ size_t new_size = quantize(size);
+#if defined (DEBUGGING_MEMCLUSTER)
+ memcluster_element *e;
+ memcluster_element *el;
+#ifdef MEMCLUSTER_RECORD
+ memcluster_element *prev;
+#endif
+ fence_t fp;
+ char *p;
+#endif
+
+ MEMLOCK;
+
+#if !defined (MEMCLUSTER_RECORD)
+ UNUSED(file);
+ UNUSED(line);
+#endif
+
+ REQUIRE(freelists != NULL);
+
+ if (size == 0U) {
+ MEMUNLOCK;
+ errno = EINVAL;
+ return;
+ }
+
+#if defined (DEBUGGING_MEMCLUSTER)
+ e = (memcluster_element *) ((char *)mem - sizeof *e);
+ INSIST(e->fencepost == FRONT_FENCEPOST);
+ INSIST(e->size == size);
+ p = (char *)e + sizeof *e + size;
+ memcpy(&fp, p, sizeof fp);
+ INSIST(fp == BACK_FENCEPOST);
+ INSIST(((u_long)mem % 4) == 0);
+#ifdef MEMCLUSTER_RECORD
+ prev = NULL;
+ if (size == max_size || new_size >= max_size)
+ el = activelists[max_size];
+ else
+ el = activelists[size];
+ while (el != NULL && el != e) {
+ prev = el;
+ el = el->next;
+ }
+ INSIST(el != NULL); /*%< double free */
+ if (prev == NULL) {
+ if (size == max_size || new_size >= max_size)
+ activelists[max_size] = el->next;
+ else
+ activelists[size] = el->next;
+ } else
+ prev->next = el->next;
+#endif
+#endif
+
+ if (size == max_size || new_size >= max_size) {
+ /* memput() called on something beyond our upper limit */
+#if defined(DEBUGGING_MEMCLUSTER)
+ free(e);
+#else
+ free(mem);
+#endif
+
+ INSIST(stats[max_size].gets != 0U);
+ stats[max_size].gets--;
+ MEMUNLOCK;
+ return;
+ }
+
+ /* The free list uses the "rounded-up" size "new_size": */
+#if defined(DEBUGGING_MEMCLUSTER)
+ memset(mem, 0xa5, new_size - sizeof *e); /*%< catch write after free */
+ e->size = 0; /*%< catch double memput() */
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+#endif
+#ifdef MEMCLUSTER_ATEND
+ e->next = NULL;
+ el = freelists[new_size];
+ while (el != NULL && el->next != NULL)
+ el = el->next;
+ if (el)
+ el->next = e;
+ else
+ freelists[new_size] = e;
+#else
+ e->next = freelists[new_size];
+ freelists[new_size] = (void *)e;
+#endif
+#else
+ ((memcluster_element *)mem)->next = freelists[new_size];
+ freelists[new_size] = (memcluster_element *)mem;
+#endif
+
+ /*
+ * The stats[] uses the _actual_ "size" requested by the
+ * caller, with the caveat (in the code above) that "size" >= the
+ * max. size (max_size) ends up getting recorded as a call to
+ * max_size.
+ */
+ INSIST(stats[size].gets != 0U);
+ stats[size].gets--;
+ stats[new_size].freefrags++;
+ MEMUNLOCK;
+}
+
+void *
+__memget_debug(size_t size, const char *file, int line) {
+ void *ptr;
+ ptr = __memget_record(size, file, line);
+ fprintf(stderr, "%s:%d: memget(%lu) -> %p\n", file, line,
+ (u_long)size, ptr);
+ return (ptr);
+}
+
+void
+__memput_debug(void *ptr, size_t size, const char *file, int line) {
+ fprintf(stderr, "%s:%d: memput(%p, %lu)\n", file, line, ptr,
+ (u_long)size);
+ __memput_record(ptr, size, file, line);
+}
+
+/*%
+ * Print the stats[] on the stream "out" with suitable formatting.
+ */
+void
+memstats(FILE *out) {
+ size_t i;
+#ifdef MEMCLUSTER_RECORD
+ memcluster_element *e;
+#endif
+
+ MEMLOCK;
+
+ if (freelists == NULL) {
+ MEMUNLOCK;
+ return;
+ }
+ for (i = 1; i <= max_size; i++) {
+ const struct stats *s = &stats[i];
+
+ if (s->totalgets == 0U && s->gets == 0U)
+ continue;
+ fprintf(out, "%s%5lu: %11lu gets, %11lu rem",
+ (i == max_size) ? ">=" : " ",
+ (unsigned long)i, s->totalgets, s->gets);
+ if (s->blocks != 0U)
+ fprintf(out, " (%lu bl, %lu ff)",
+ s->blocks, s->freefrags);
+ fputc('\n', out);
+ }
+#ifdef MEMCLUSTER_RECORD
+ fprintf(out, "Active Memory:\n");
+ for (i = 1; i <= max_size; i++) {
+ if ((e = activelists[i]) != NULL)
+ while (e != NULL) {
+ fprintf(out, "%s:%d %p:%lu\n",
+ e->file != NULL ? e->file :
+ "<UNKNOWN>", e->line,
+ (char *)e + sizeof *e,
+ (u_long)e->size);
+ e = e->next;
+ }
+ }
+#endif
+ MEMUNLOCK;
+}
+
+int
+memactive(void) {
+ size_t i;
+
+ if (stats == NULL)
+ return (0);
+ for (i = 1; i <= max_size; i++)
+ if (stats[i].gets != 0U)
+ return (1);
+ return (0);
+}
+
+/* Private. */
+
+/*%
+ * Round up size to a multiple of sizeof(void *). This guarantees that a
+ * block is at least sizeof void *, and that we won't violate alignment
+ * restrictions, both of which are needed to make lists of blocks.
+ */
+static size_t
+quantize(size_t size) {
+ int remainder;
+ /*
+ * If there is no remainder for the integer division of
+ *
+ * (rightsize/P_SIZE)
+ *
+ * then we already have a good size; if not, then we need
+ * to round up the result in order to get a size big
+ * enough to satisfy the request _and_ aligned on P_SIZE boundaries.
+ */
+ remainder = size % P_SIZE;
+ if (remainder != 0)
+ size += P_SIZE - remainder;
+#if defined(DEBUGGING_MEMCLUSTER)
+ return (size + SMALL_SIZE_LIMIT + sizeof (int));
+#else
+ return (size);
+#endif
+}
+
+#if defined(DEBUGGING_MEMCLUSTER)
+static void
+check(unsigned char *a, int value, size_t len) {
+ size_t i;
+ for (i = 0; i < len; i++)
+ INSIST(a[i] == value);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/movefile.c b/usr/src/lib/libresolv2_joy/common/isc/movefile.c
new file mode 100644
index 0000000000..0ffc7047e2
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/movefile.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2000 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+#include <port_before.h>
+#include <stdio.h>
+#include <isc/misc.h>
+#include <port_after.h>
+#ifndef HAVE_MOVEFILE
+/*
+ * rename() is lame (can't overwrite an existing file) on some systems.
+ * use movefile() instead, and let lame OS ports do what they need to.
+ */
+
+int
+isc_movefile(const char *oldname, const char *newname) {
+ return (rename(oldname, newname));
+}
+#else
+ static int os_port_has_isc_movefile = 1;
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/tree.c b/usr/src/lib/libresolv2_joy/common/isc/tree.c
new file mode 100644
index 0000000000..8ba675fbe8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/tree.c
@@ -0,0 +1,534 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: tree.c,v 1.4 2005/04/27 04:56:39 sra Exp $";
+#endif
+
+/*%
+ * tree - balanced binary tree library
+ *
+ * vix 05apr94 [removed vixie.h dependencies; cleaned up formatting, names]
+ * vix 22jan93 [revisited; uses RCS, ANSI, POSIX; has bug fixes]
+ * vix 23jun86 [added delete uar to add for replaced nodes]
+ * vix 20jun86 [added tree_delete per wirth a+ds (mod2 v.) p. 224]
+ * vix 06feb86 [added tree_mung()]
+ * vix 02feb86 [added tree balancing from wirth "a+ds=p" p. 220-221]
+ * vix 14dec85 [written]
+ */
+
+/*%
+ * This program text was created by Paul Vixie using examples from the book:
+ * "Algorithms & Data Structures," Niklaus Wirth, Prentice-Hall, 1986, ISBN
+ * 0-13-022005-1. Any errors in the conversion from Modula-2 to C are Paul
+ * Vixie's.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*#define DEBUG "tree"*/
+
+#include "port_before.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#include <isc/memcluster.h>
+#include <isc/tree.h>
+
+#ifdef DEBUG
+static int debugDepth = 0;
+static char *debugFuncs[256];
+# define ENTER(proc) { \
+ debugFuncs[debugDepth] = proc; \
+ fprintf(stderr, "ENTER(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ debugDepth++; \
+ }
+# define RET(value) { \
+ debugDepth--; \
+ fprintf(stderr, "RET(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ return (value); \
+ }
+# define RETV { \
+ debugDepth--; \
+ fprintf(stderr, "RETV(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ return; \
+ }
+# define MSG(msg) fprintf(stderr, "MSG(%s)\n", msg);
+#else
+# define ENTER(proc) ;
+# define RET(value) return (value);
+# define RETV return;
+# define MSG(msg) ;
+#endif
+
+#ifndef TRUE
+# define TRUE 1
+# define FALSE 0
+#endif
+
+static tree * sprout(tree **, tree_t, int *, int (*)(), void (*)());
+static int delete(tree **, int (*)(), tree_t, void (*)(), int *, int *);
+static void del(tree **, int *, tree **, void (*)(), int *);
+static void bal_L(tree **, int *);
+static void bal_R(tree **, int *);
+
+void
+tree_init(tree **ppr_tree) {
+ ENTER("tree_init")
+ *ppr_tree = NULL;
+ RETV
+}
+
+tree_t
+tree_srch(tree **ppr_tree, int (*pfi_compare)(tree_t, tree_t), tree_t p_user) {
+ ENTER("tree_srch")
+
+ if (*ppr_tree) {
+ int i_comp = (*pfi_compare)(p_user, (**ppr_tree).data);
+
+ if (i_comp > 0)
+ RET(tree_srch(&(**ppr_tree).right,
+ pfi_compare,
+ p_user))
+
+ if (i_comp < 0)
+ RET(tree_srch(&(**ppr_tree).left,
+ pfi_compare,
+ p_user))
+
+ /* not higher, not lower... this must be the one.
+ */
+ RET((**ppr_tree).data)
+ }
+
+ /* grounded. NOT found.
+ */
+ RET(NULL)
+}
+
+tree_t
+tree_add(tree **ppr_tree, int (*pfi_compare)(tree_t, tree_t),
+ tree_t p_user, void (*pfv_uar)())
+{
+ int i_balance = FALSE;
+
+ ENTER("tree_add")
+ if (!sprout(ppr_tree, p_user, &i_balance, pfi_compare, pfv_uar))
+ RET(NULL)
+ RET(p_user)
+}
+
+int
+tree_delete(tree **ppr_p, int (*pfi_compare)(tree_t, tree_t),
+ tree_t p_user, void (*pfv_uar)())
+{
+ int i_balance = FALSE, i_uar_called = FALSE;
+
+ ENTER("tree_delete");
+ RET(delete(ppr_p, pfi_compare, p_user, pfv_uar,
+ &i_balance, &i_uar_called))
+}
+
+int
+tree_trav(tree **ppr_tree, int (*pfi_uar)(tree_t)) {
+ ENTER("tree_trav")
+
+ if (!*ppr_tree)
+ RET(TRUE)
+
+ if (!tree_trav(&(**ppr_tree).left, pfi_uar))
+ RET(FALSE)
+ if (!(*pfi_uar)((**ppr_tree).data))
+ RET(FALSE)
+ if (!tree_trav(&(**ppr_tree).right, pfi_uar))
+ RET(FALSE)
+ RET(TRUE)
+}
+
+void
+tree_mung(tree **ppr_tree, void (*pfv_uar)(tree_t)) {
+ ENTER("tree_mung")
+ if (*ppr_tree) {
+ tree_mung(&(**ppr_tree).left, pfv_uar);
+ tree_mung(&(**ppr_tree).right, pfv_uar);
+ if (pfv_uar)
+ (*pfv_uar)((**ppr_tree).data);
+ memput(*ppr_tree, sizeof(tree));
+ *ppr_tree = NULL;
+ }
+ RETV
+}
+
+static tree *
+sprout(tree **ppr, tree_t p_data, int *pi_balance,
+ int (*pfi_compare)(tree_t, tree_t), void (*pfv_delete)(tree_t))
+{
+ tree *p1, *p2, *sub;
+ int cmp;
+
+ ENTER("sprout")
+
+ /* are we grounded? if so, add the node "here" and set the rebalance
+ * flag, then exit.
+ */
+ if (!*ppr) {
+ MSG("grounded. adding new node, setting h=true")
+ *ppr = (tree *) memget(sizeof(tree));
+ if (*ppr) {
+ (*ppr)->left = NULL;
+ (*ppr)->right = NULL;
+ (*ppr)->bal = 0;
+ (*ppr)->data = p_data;
+ *pi_balance = TRUE;
+ }
+ RET(*ppr);
+ }
+
+ /* compare the data using routine passed by caller.
+ */
+ cmp = (*pfi_compare)(p_data, (*ppr)->data);
+
+ /* if LESS, prepare to move to the left.
+ */
+ if (cmp < 0) {
+ MSG("LESS. sprouting left.")
+ sub = sprout(&(*ppr)->left, p_data, pi_balance,
+ pfi_compare, pfv_delete);
+ if (sub && *pi_balance) { /*%< left branch has grown */
+ MSG("LESS: left branch has grown")
+ switch ((*ppr)->bal) {
+ case 1:
+ /* right branch WAS longer; bal is ok now */
+ MSG("LESS: case 1.. bal restored implicitly")
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ break;
+ case 0:
+ /* balance WAS okay; now left branch longer */
+ MSG("LESS: case 0.. balnce bad but still ok")
+ (*ppr)->bal = -1;
+ break;
+ case -1:
+ /* left branch was already too long. rebal */
+ MSG("LESS: case -1: rebalancing")
+ p1 = (*ppr)->left;
+ if (p1->bal == -1) { /*%< LL */
+ MSG("LESS: single LL")
+ (*ppr)->left = p1->right;
+ p1->right = *ppr;
+ (*ppr)->bal = 0;
+ *ppr = p1;
+ } else { /*%< double LR */
+ MSG("LESS: double LR")
+
+ p2 = p1->right;
+ p1->right = p2->left;
+ p2->left = p1;
+
+ (*ppr)->left = p2->right;
+ p2->right = *ppr;
+
+ if (p2->bal == -1)
+ (*ppr)->bal = 1;
+ else
+ (*ppr)->bal = 0;
+
+ if (p2->bal == 1)
+ p1->bal = -1;
+ else
+ p1->bal = 0;
+ *ppr = p2;
+ } /*else*/
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ } /*switch*/
+ } /*if*/
+ RET(sub)
+ } /*if*/
+
+ /* if MORE, prepare to move to the right.
+ */
+ if (cmp > 0) {
+ MSG("MORE: sprouting to the right")
+ sub = sprout(&(*ppr)->right, p_data, pi_balance,
+ pfi_compare, pfv_delete);
+ if (sub && *pi_balance) {
+ MSG("MORE: right branch has grown")
+
+ switch ((*ppr)->bal) {
+ case -1:
+ MSG("MORE: balance was off, fixed implicitly")
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ break;
+ case 0:
+ MSG("MORE: balance was okay, now off but ok")
+ (*ppr)->bal = 1;
+ break;
+ case 1:
+ MSG("MORE: balance was off, need to rebalance")
+ p1 = (*ppr)->right;
+ if (p1->bal == 1) { /*%< RR */
+ MSG("MORE: single RR")
+ (*ppr)->right = p1->left;
+ p1->left = *ppr;
+ (*ppr)->bal = 0;
+ *ppr = p1;
+ } else { /*%< double RL */
+ MSG("MORE: double RL")
+
+ p2 = p1->left;
+ p1->left = p2->right;
+ p2->right = p1;
+
+ (*ppr)->right = p2->left;
+ p2->left = *ppr;
+
+ if (p2->bal == 1)
+ (*ppr)->bal = -1;
+ else
+ (*ppr)->bal = 0;
+
+ if (p2->bal == -1)
+ p1->bal = 1;
+ else
+ p1->bal = 0;
+
+ *ppr = p2;
+ } /*else*/
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ } /*switch*/
+ } /*if*/
+ RET(sub)
+ } /*if*/
+
+ /* not less, not more: this is the same key! replace...
+ */
+ MSG("FOUND: Replacing data value")
+ *pi_balance = FALSE;
+ if (pfv_delete)
+ (*pfv_delete)((*ppr)->data);
+ (*ppr)->data = p_data;
+ RET(*ppr)
+}
+
+static int
+delete(tree **ppr_p, int (*pfi_compare)(tree_t, tree_t), tree_t p_user,
+ void (*pfv_uar)(tree_t), int *pi_balance, int *pi_uar_called)
+{
+ tree *pr_q;
+ int i_comp, i_ret;
+
+ ENTER("delete")
+
+ if (*ppr_p == NULL) {
+ MSG("key not in tree")
+ RET(FALSE)
+ }
+
+ i_comp = (*pfi_compare)((*ppr_p)->data, p_user);
+ if (i_comp > 0) {
+ MSG("too high - scan left")
+ i_ret = delete(&(*ppr_p)->left, pfi_compare, p_user, pfv_uar,
+ pi_balance, pi_uar_called);
+ if (*pi_balance)
+ bal_L(ppr_p, pi_balance);
+ } else if (i_comp < 0) {
+ MSG("too low - scan right")
+ i_ret = delete(&(*ppr_p)->right, pfi_compare, p_user, pfv_uar,
+ pi_balance, pi_uar_called);
+ if (*pi_balance)
+ bal_R(ppr_p, pi_balance);
+ } else {
+ MSG("equal")
+ pr_q = *ppr_p;
+ if (pr_q->right == NULL) {
+ MSG("right subtree null")
+ *ppr_p = pr_q->left;
+ *pi_balance = TRUE;
+ } else if (pr_q->left == NULL) {
+ MSG("right subtree non-null, left subtree null")
+ *ppr_p = pr_q->right;
+ *pi_balance = TRUE;
+ } else {
+ MSG("neither subtree null")
+ del(&pr_q->left, pi_balance, &pr_q,
+ pfv_uar, pi_uar_called);
+ if (*pi_balance)
+ bal_L(ppr_p, pi_balance);
+ }
+ if (!*pi_uar_called && pfv_uar)
+ (*pfv_uar)(pr_q->data);
+ /* Thanks to wuth@castrov.cuc.ab.ca for the following stmt. */
+ memput(pr_q, sizeof(tree));
+ i_ret = TRUE;
+ }
+ RET(i_ret)
+}
+
+static void
+del(tree **ppr_r, int *pi_balance, tree **ppr_q,
+ void (*pfv_uar)(tree_t), int *pi_uar_called)
+{
+ ENTER("del")
+
+ if ((*ppr_r)->right != NULL) {
+ del(&(*ppr_r)->right, pi_balance, ppr_q,
+ pfv_uar, pi_uar_called);
+ if (*pi_balance)
+ bal_R(ppr_r, pi_balance);
+ } else {
+ if (pfv_uar)
+ (*pfv_uar)((*ppr_q)->data);
+ *pi_uar_called = TRUE;
+ (*ppr_q)->data = (*ppr_r)->data;
+ *ppr_q = *ppr_r;
+ *ppr_r = (*ppr_r)->left;
+ *pi_balance = TRUE;
+ }
+
+ RETV
+}
+
+static void
+bal_L(tree **ppr_p, int *pi_balance) {
+ tree *p1, *p2;
+ int b1, b2;
+
+ ENTER("bal_L")
+ MSG("left branch has shrunk")
+
+ switch ((*ppr_p)->bal) {
+ case -1:
+ MSG("was imbalanced, fixed implicitly")
+ (*ppr_p)->bal = 0;
+ break;
+ case 0:
+ MSG("was okay, is now one off")
+ (*ppr_p)->bal = 1;
+ *pi_balance = FALSE;
+ break;
+ case 1:
+ MSG("was already off, this is too much")
+ p1 = (*ppr_p)->right;
+ b1 = p1->bal;
+ if (b1 >= 0) {
+ MSG("single RR")
+ (*ppr_p)->right = p1->left;
+ p1->left = *ppr_p;
+ if (b1 == 0) {
+ MSG("b1 == 0")
+ (*ppr_p)->bal = 1;
+ p1->bal = -1;
+ *pi_balance = FALSE;
+ } else {
+ MSG("b1 != 0")
+ (*ppr_p)->bal = 0;
+ p1->bal = 0;
+ }
+ *ppr_p = p1;
+ } else {
+ MSG("double RL")
+ p2 = p1->left;
+ b2 = p2->bal;
+ p1->left = p2->right;
+ p2->right = p1;
+ (*ppr_p)->right = p2->left;
+ p2->left = *ppr_p;
+ if (b2 == 1)
+ (*ppr_p)->bal = -1;
+ else
+ (*ppr_p)->bal = 0;
+ if (b2 == -1)
+ p1->bal = 1;
+ else
+ p1->bal = 0;
+ *ppr_p = p2;
+ p2->bal = 0;
+ }
+ }
+ RETV
+}
+
+static void
+bal_R(tree **ppr_p, int *pi_balance) {
+ tree *p1, *p2;
+ int b1, b2;
+
+ ENTER("bal_R")
+ MSG("right branch has shrunk")
+ switch ((*ppr_p)->bal) {
+ case 1:
+ MSG("was imbalanced, fixed implicitly")
+ (*ppr_p)->bal = 0;
+ break;
+ case 0:
+ MSG("was okay, is now one off")
+ (*ppr_p)->bal = -1;
+ *pi_balance = FALSE;
+ break;
+ case -1:
+ MSG("was already off, this is too much")
+ p1 = (*ppr_p)->left;
+ b1 = p1->bal;
+ if (b1 <= 0) {
+ MSG("single LL")
+ (*ppr_p)->left = p1->right;
+ p1->right = *ppr_p;
+ if (b1 == 0) {
+ MSG("b1 == 0")
+ (*ppr_p)->bal = -1;
+ p1->bal = 1;
+ *pi_balance = FALSE;
+ } else {
+ MSG("b1 != 0")
+ (*ppr_p)->bal = 0;
+ p1->bal = 0;
+ }
+ *ppr_p = p1;
+ } else {
+ MSG("double LR")
+ p2 = p1->right;
+ b2 = p2->bal;
+ p1->right = p2->left;
+ p2->left = p1;
+ (*ppr_p)->left = p2->right;
+ p2->right = *ppr_p;
+ if (b2 == -1)
+ (*ppr_p)->bal = 1;
+ else
+ (*ppr_p)->bal = 0;
+ if (b2 == 1)
+ p1->bal = -1;
+ else
+ p1->bal = 0;
+ *ppr_p = p2;
+ p2->bal = 0;
+ }
+ }
+ RETV
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy b/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy
new file mode 100644
index 0000000000..aedd06a0fa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy
@@ -0,0 +1,59 @@
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+/*
+ * Copyright (c) 1997-1999 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <arpa/nameser.h>
+#include <resolv.h>
+
+/*
+ * usr/src/lib/libresolv2 routines
+ */
+
+int dn_skipname(const uchar_t *, const uchar_t *);
+void fp_query(const u_char *, FILE *);
+const uchar_t * p_cdname(const uchar_t *, const uchar_t *, FILE *);
+const char * p_class(int);
+void p_query(const u_char *);
+const char * p_time(unsigned int);
+const char * p_type(int);
+void putlong(unsigned int, uchar_t *);
+uint32_t _getlong(const u_char *);
+uint16_t _getshort(const u_char *);
+const char * hstrerror(int);
+int res_init(void);
+int res_mkquery(int, const char *, int, int, const u_char *,
+ int, const u_char *, u_char *, int);
+int res_query(const char *, int, int, u_char *, int);
+int res_querydomain(const char *, const char *, int, int,
+ u_char *, int);
+int res_search(const char *, int, int, u_char *, int);
+int res_send(const u_char *, int, u_char *, int);
+int res_update(ns_updrec *);
+int res_ninit(res_state);
+void fp_resstat(const res_state, FILE *);
+const char * res_hostalias(const res_state, const char *, char *, size_t);
+int res_nquery(res_state, const char *, int, int, u_char *, int);
+int res_nsearch(res_state, const char *, int, int, u_char *, int);
+int res_nquerydomain(res_state, const char *, const char *,
+ int, int, u_char *, int);
+int res_nmkquery(res_state, int, const char *, int, int,
+ const u_char *, int, const u_char *,
+ u_char *, int);
+int res_nsend(res_state, const u_char *, int, u_char *, int);
+int res_nmkupdate(res_state, ns_updrec *, u_char *, int);
+void res_nclose(res_state);
+int res_nsendsigned(res_state, const u_char *, int, ns_tsig_key *,
+ u_char *, int);
+int dn_comp(const char *, u_char *, int, u_char **, u_char **);
+int dn_expand(const u_char *, const u_char *, const u_char *,
+ char *, int);
diff --git a/usr/src/lib/libresolv2_joy/common/mapfile-vers b/usr/src/lib/libresolv2_joy/common/mapfile-vers
new file mode 100644
index 0000000000..e0c66a1c96
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/mapfile-vers
@@ -0,0 +1,60 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ global:
+ joy_dn_expand;
+ joy_res_nsearch;
+ joy_res_ninit;
+ joy_res_ndestroy;
+ joy_res_gethostbyaddr;
+ joy_res_gethostbyname;
+ joy_res_gethostbyname2;
+ joy_res_sethostent;
+ joy_res_endhostent;
+ __joy_res_override_retry;
+ __joy_res_unset_no_hosts_fallback;
+ __joy_res_set_no_hosts_fallback;
+ __joy_h_errno;
+ __joy_ns_get16;
+ __joy_ns_get32;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c
new file mode 100644
index 0000000000..292375af63
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_date.c,v 1.6 2005/04/27 04:56:39 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static int datepart(const char *, int, int, int, int *);
+
+/* Public. */
+
+/*%
+ * Convert a date in ASCII into the number of seconds since
+ * 1 January 1970 (GMT assumed). Format is yyyymmddhhmmss, all
+ * digits required, no spaces allowed.
+ */
+
+u_int32_t
+ns_datetosecs(const char *cp, int *errp) {
+ struct tm time;
+ u_int32_t result;
+ int mdays, i;
+ static const int days_per_month[12] =
+ {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+ if (strlen(cp) != 14U) {
+ *errp = 1;
+ return (0);
+ }
+ *errp = 0;
+
+ memset(&time, 0, sizeof time);
+ time.tm_year = datepart(cp + 0, 4, 1990, 9999, errp) - 1900;
+ time.tm_mon = datepart(cp + 4, 2, 01, 12, errp) - 1;
+ time.tm_mday = datepart(cp + 6, 2, 01, 31, errp);
+ time.tm_hour = datepart(cp + 8, 2, 00, 23, errp);
+ time.tm_min = datepart(cp + 10, 2, 00, 59, errp);
+ time.tm_sec = datepart(cp + 12, 2, 00, 59, errp);
+ if (*errp) /*%< Any parse errors? */
+ return (0);
+
+ /*
+ * OK, now because timegm() is not available in all environments,
+ * we will do it by hand. Roll up sleeves, curse the gods, begin!
+ */
+
+#define SECS_PER_DAY ((u_int32_t)24*60*60)
+#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
+
+ result = time.tm_sec; /*%< Seconds */
+ result += time.tm_min * 60; /*%< Minutes */
+ result += time.tm_hour * (60*60); /*%< Hours */
+ result += (time.tm_mday - 1) * SECS_PER_DAY; /*%< Days */
+ /* Months are trickier. Look without leaping, then leap */
+ mdays = 0;
+ for (i = 0; i < time.tm_mon; i++)
+ mdays += days_per_month[i];
+ result += mdays * SECS_PER_DAY; /*%< Months */
+ if (time.tm_mon > 1 && isleap(1900+time.tm_year))
+ result += SECS_PER_DAY; /*%< Add leapday for this year */
+ /* First figure years without leapdays, then add them in. */
+ /* The loop is slow, FIXME, but simple and accurate. */
+ result += (time.tm_year - 70) * (SECS_PER_DAY*365); /*%< Years */
+ for (i = 70; i < time.tm_year; i++)
+ if (isleap(1900+i))
+ result += SECS_PER_DAY; /*%< Add leapday for prev year */
+ return (result);
+}
+
+/* Private. */
+
+/*%
+ * Parse part of a date. Set error flag if any error.
+ * Don't reset the flag if there is no error.
+ */
+static int
+datepart(const char *buf, int size, int min, int max, int *errp) {
+ int result = 0;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (!isdigit((unsigned char)(buf[i])))
+ *errp = 1;
+ result = (result * 10) + buf[i] - '0';
+ }
+ if (result < min)
+ *errp = 1;
+ if (result > max)
+ *errp = 1;
+ return (result);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c
new file mode 100644
index 0000000000..f6b0accef1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c
@@ -0,0 +1,1153 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_name.c,v 1.11 2009/01/23 19:59:16 each Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+#define NS_TYPE_ELT 0x40 /*%< EDNS0 extended label type */
+#define DNS_LABELTYPE_BITSTRING 0x41
+
+/* Data. */
+
+static const char digits[] = "0123456789";
+
+static const char digitvalue[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*16*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*32*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*48*/
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /*64*/
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*80*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*96*/
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*112*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*128*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*256*/
+};
+
+/* Forward. */
+
+static int special(int);
+static int printable(int);
+static int dn_find(const u_char *, const u_char *,
+ const u_char * const *,
+ const u_char * const *);
+static int encode_bitsring(const char **, const char *,
+ unsigned char **, unsigned char **,
+ unsigned const char *);
+static int labellen(const u_char *);
+static int decode_bitstring(const unsigned char **,
+ char *, const char *);
+
+/* Public. */
+
+/*%
+ * Convert an encoded domain name to printable ascii as per RFC1035.
+
+ * return:
+ *\li Number of bytes written to buffer, or -1 (with errno set)
+ *
+ * notes:
+ *\li The root is returned as "."
+ *\li All other domains are returned in non absolute form
+ */
+int
+ns_name_ntop(const u_char *src, char *dst, size_t dstsiz)
+{
+ const u_char *cp;
+ char *dn, *eom;
+ u_char c;
+ u_int n;
+ int l;
+
+ cp = src;
+ dn = dst;
+ eom = dst + dstsiz;
+
+ while ((n = *cp++) != 0) {
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Some kind of compression pointer. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dn != dst) {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '.';
+ }
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE; /*%< XXX */
+ return (-1);
+ }
+ if (dn + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if ((n & NS_CMPRSFLGS) == NS_TYPE_ELT) {
+ int m;
+
+ if (n != DNS_LABELTYPE_BITSTRING) {
+ /* XXX: labellen should reject this case */
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((m = decode_bitstring(&cp, dn, eom)) < 0)
+ {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ dn += m;
+ continue;
+ }
+ for ((void)NULL; l > 0; l--) {
+ c = *cp++;
+ if (special(c)) {
+ if (dn + 1 >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\\';
+ *dn++ = (char)c;
+ } else if (!printable(c)) {
+ if (dn + 3 >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\\';
+ *dn++ = digits[c / 100];
+ *dn++ = digits[(c % 100) / 10];
+ *dn++ = digits[c % 10];
+ } else {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = (char)c;
+ }
+ }
+ }
+ if (dn == dst) {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '.';
+ }
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\0';
+ return (dn - dst);
+}
+
+/*%
+ * Convert a ascii string into an encoded domain name as per RFC1035.
+ *
+ * return:
+ *
+ *\li -1 if it fails
+ *\li 1 if string was fully qualified
+ *\li 0 is string was not fully qualified
+ *
+ * notes:
+ *\li Enforces label and domain length limits.
+ */
+int
+ns_name_pton(const char *src, u_char *dst, size_t dstsiz) {
+ return (ns_name_pton2(src, dst, dstsiz, NULL));
+}
+
+/*
+ * ns_name_pton2(src, dst, dstsiz, *dstlen)
+ * Convert a ascii string into an encoded domain name as per RFC1035.
+ * return:
+ * -1 if it fails
+ * 1 if string was fully qualified
+ * 0 is string was not fully qualified
+ * side effects:
+ * fills in *dstlen (if non-NULL)
+ * notes:
+ * Enforces label and domain length limits.
+ */
+int
+ns_name_pton2(const char *src, u_char *dst, size_t dstsiz, size_t *dstlen) {
+ u_char *label, *bp, *eom;
+ int c, n, escaped, e = 0;
+ char *cp;
+
+ escaped = 0;
+ bp = dst;
+ eom = dst + dstsiz;
+ label = bp++;
+
+ while ((c = *src++) != 0) {
+ if (escaped) {
+ if (c == '[') { /*%< start a bit string label */
+ if ((cp = strchr(src, ']')) == NULL) {
+ errno = EINVAL; /*%< ??? */
+ return (-1);
+ }
+ if ((e = encode_bitsring(&src, cp + 2,
+ &label, &bp, eom))
+ != 0) {
+ errno = e;
+ return (-1);
+ }
+ escaped = 0;
+ label = bp++;
+ if ((c = *src++) == 0)
+ goto done;
+ else if (c != '.') {
+ errno = EINVAL;
+ return (-1);
+ }
+ continue;
+ }
+ else if ((cp = strchr(digits, c)) != NULL) {
+ n = (cp - digits) * 100;
+ if ((c = *src++) == 0 ||
+ (cp = strchr(digits, c)) == NULL) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ n += (cp - digits) * 10;
+ if ((c = *src++) == 0 ||
+ (cp = strchr(digits, c)) == NULL) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ n += (cp - digits);
+ if (n > 255) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ c = n;
+ }
+ escaped = 0;
+ } else if (c == '\\') {
+ escaped = 1;
+ continue;
+ } else if (c == '.') {
+ c = (bp - label - 1);
+ if ((c & NS_CMPRSFLGS) != 0) { /*%< Label too big. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (label >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *label = c;
+ /* Fully qualified ? */
+ if (*src == '\0') {
+ if (c != 0) {
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = '\0';
+ }
+ if ((bp - dst) > MAXCDNAME) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstlen != NULL)
+ *dstlen = (bp - dst);
+ return (1);
+ }
+ if (c == 0 || *src == '.') {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ label = bp++;
+ continue;
+ }
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = (u_char)c;
+ }
+ c = (bp - label - 1);
+ if ((c & NS_CMPRSFLGS) != 0) { /*%< Label too big. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ done:
+ if (label >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *label = c;
+ if (c != 0) {
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = 0;
+ }
+ if ((bp - dst) > MAXCDNAME) { /*%< src too big */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstlen != NULL)
+ *dstlen = (bp - dst);
+ return (0);
+}
+
+/*%
+ * Convert a network strings labels into all lowercase.
+ *
+ * return:
+ *\li Number of bytes written to buffer, or -1 (with errno set)
+ *
+ * notes:
+ *\li Enforces label and domain length limits.
+ */
+
+int
+ns_name_ntol(const u_char *src, u_char *dst, size_t dstsiz)
+{
+ const u_char *cp;
+ u_char *dn, *eom;
+ u_char c;
+ u_int n;
+ int l;
+
+ cp = src;
+ dn = dst;
+ eom = dst + dstsiz;
+
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ while ((n = *cp++) != 0) {
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Some kind of compression pointer. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = n;
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dn + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ for ((void)NULL; l > 0; l--) {
+ c = *cp++;
+ if (isascii(c) && isupper(c))
+ *dn++ = tolower(c);
+ else
+ *dn++ = c;
+ }
+ }
+ *dn++ = '\0';
+ return (dn - dst);
+}
+
+/*%
+ * Unpack a domain name from a message, source may be compressed.
+ *
+ * return:
+ *\li -1 if it fails, or consumed octets if it succeeds.
+ */
+int
+ns_name_unpack(const u_char *msg, const u_char *eom, const u_char *src,
+ u_char *dst, size_t dstsiz)
+{
+ return (ns_name_unpack2(msg, eom, src, dst, dstsiz, NULL));
+}
+
+/*
+ * ns_name_unpack2(msg, eom, src, dst, dstsiz, *dstlen)
+ * Unpack a domain name from a message, source may be compressed.
+ * return:
+ * -1 if it fails, or consumed octets if it succeeds.
+ * side effect:
+ * fills in *dstlen (if non-NULL).
+ */
+int
+ns_name_unpack2(const u_char *msg, const u_char *eom, const u_char *src,
+ u_char *dst, size_t dstsiz, size_t *dstlen)
+{
+ const u_char *srcp, *dstlim;
+ u_char *dstp;
+ int n, len, checked, l;
+
+ len = -1;
+ checked = 0;
+ dstp = dst;
+ srcp = src;
+ dstlim = dst + dstsiz;
+ if (srcp < msg || srcp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ /* Fetch next label in domain name. */
+ while ((n = *srcp++) != 0) {
+ /* Check for indirection. */
+ switch (n & NS_CMPRSFLGS) {
+ case 0:
+ case NS_TYPE_ELT:
+ /* Limit checks. */
+ if ((l = labellen(srcp - 1)) < 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstp + l + 1 >= dstlim || srcp + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ checked += l + 1;
+ *dstp++ = n;
+ memcpy(dstp, srcp, l);
+ dstp += l;
+ srcp += l;
+ break;
+
+ case NS_CMPRSFLGS:
+ if (srcp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (len < 0)
+ len = srcp - src + 1;
+ srcp = msg + (((n & 0x3f) << 8) | (*srcp & 0xff));
+ if (srcp < msg || srcp >= eom) { /*%< Out of range. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ checked += 2;
+ /*
+ * Check for loops in the compressed name;
+ * if we've looked at the whole message,
+ * there must be a loop.
+ */
+ if (checked >= eom - msg) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ break;
+
+ default:
+ errno = EMSGSIZE;
+ return (-1); /*%< flag error */
+ }
+ }
+ *dstp++ = 0;
+ if (dstlen != NULL)
+ *dstlen = dstp - dst;
+ if (len < 0)
+ len = srcp - src;
+ return (len);
+}
+
+/*%
+ * Pack domain name 'domain' into 'comp_dn'.
+ *
+ * return:
+ *\li Size of the compressed name, or -1.
+ *
+ * notes:
+ *\li 'dnptrs' is an array of pointers to previous compressed names.
+ *\li dnptrs[0] is a pointer to the beginning of the message. The array
+ * ends with NULL.
+ *\li 'lastdnptr' is a pointer to the end of the array pointed to
+ * by 'dnptrs'.
+ *
+ * Side effects:
+ *\li The list of pointers in dnptrs is updated for labels inserted into
+ * the message as we compress the name. If 'dnptr' is NULL, we don't
+ * try to compress names. If 'lastdnptr' is NULL, we don't update the
+ * list.
+ */
+int
+ns_name_pack(const u_char *src, u_char *dst, int dstsiz,
+ const u_char **dnptrs, const u_char **lastdnptr)
+{
+ u_char *dstp;
+ const u_char **cpp, **lpp, *eob, *msg;
+ const u_char *srcp;
+ int n, l, first = 1;
+
+ srcp = src;
+ dstp = dst;
+ eob = dstp + dstsiz;
+ lpp = cpp = NULL;
+ if (dnptrs != NULL) {
+ if ((msg = *dnptrs++) != NULL) {
+ for (cpp = dnptrs; *cpp != NULL; cpp++)
+ (void)NULL;
+ lpp = cpp; /*%< end of list to search */
+ }
+ } else
+ msg = NULL;
+
+ /* make sure the domain we are about to add is legal */
+ l = 0;
+ do {
+ int l0;
+
+ n = *srcp;
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if ((l0 = labellen(srcp)) < 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ l += l0 + 1;
+ if (l > MAXCDNAME) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ srcp += l0 + 1;
+ } while (n != 0);
+
+ /* from here on we need to reset compression pointer array on error */
+ srcp = src;
+ do {
+ /* Look to see if we can use pointers. */
+ n = *srcp;
+ if (n != 0 && msg != NULL) {
+ l = dn_find(srcp, msg, (const u_char * const *)dnptrs,
+ (const u_char * const *)lpp);
+ if (l >= 0) {
+ if (dstp + 1 >= eob) {
+ goto cleanup;
+ }
+ *dstp++ = (l >> 8) | NS_CMPRSFLGS;
+ *dstp++ = l % 256;
+ return (dstp - dst);
+ }
+ /* Not found, save it. */
+ if (lastdnptr != NULL && cpp < lastdnptr - 1 &&
+ (dstp - msg) < 0x4000 && first) {
+ *cpp++ = dstp;
+ *cpp = NULL;
+ first = 0;
+ }
+ }
+ /* copy label to buffer */
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Should not happen. */
+ goto cleanup;
+ }
+ n = labellen(srcp);
+ if (dstp + 1 + n >= eob) {
+ goto cleanup;
+ }
+ memcpy(dstp, srcp, n + 1);
+ srcp += n + 1;
+ dstp += n + 1;
+ } while (n != 0);
+
+ if (dstp > eob) {
+cleanup:
+ if (msg != NULL)
+ *lpp = NULL;
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (dstp - dst);
+}
+
+/*%
+ * Expand compressed domain name to presentation format.
+ *
+ * return:
+ *\li Number of bytes read out of `src', or -1 (with errno set).
+ *
+ * note:
+ *\li Root domain returns as "." not "".
+ */
+int
+ns_name_uncompress(const u_char *msg, const u_char *eom, const u_char *src,
+ char *dst, size_t dstsiz)
+{
+ u_char tmp[NS_MAXCDNAME];
+ int n;
+
+ if ((n = ns_name_unpack(msg, eom, src, tmp, sizeof tmp)) == -1)
+ return (-1);
+ if (ns_name_ntop(tmp, dst, dstsiz) == -1)
+ return (-1);
+ return (n);
+}
+
+/*%
+ * Compress a domain name into wire format, using compression pointers.
+ *
+ * return:
+ *\li Number of bytes consumed in `dst' or -1 (with errno set).
+ *
+ * notes:
+ *\li 'dnptrs' is an array of pointers to previous compressed names.
+ *\li dnptrs[0] is a pointer to the beginning of the message.
+ *\li The list ends with NULL. 'lastdnptr' is a pointer to the end of the
+ * array pointed to by 'dnptrs'. Side effect is to update the list of
+ * pointers for labels inserted into the message as we compress the name.
+ *\li If 'dnptr' is NULL, we don't try to compress names. If 'lastdnptr'
+ * is NULL, we don't update the list.
+ */
+int
+ns_name_compress(const char *src, u_char *dst, size_t dstsiz,
+ const u_char **dnptrs, const u_char **lastdnptr)
+{
+ u_char tmp[NS_MAXCDNAME];
+
+ if (ns_name_pton(src, tmp, sizeof tmp) == -1)
+ return (-1);
+ return (ns_name_pack(tmp, dst, dstsiz, dnptrs, lastdnptr));
+}
+
+/*%
+ * Reset dnptrs so that there are no active references to pointers at or
+ * after src.
+ */
+void
+ns_name_rollback(const u_char *src, const u_char **dnptrs,
+ const u_char **lastdnptr)
+{
+ while (dnptrs < lastdnptr && *dnptrs != NULL) {
+ if (*dnptrs >= src) {
+ *dnptrs = NULL;
+ break;
+ }
+ dnptrs++;
+ }
+}
+
+/*%
+ * Advance *ptrptr to skip over the compressed name it points at.
+ *
+ * return:
+ *\li 0 on success, -1 (with errno set) on failure.
+ */
+int
+ns_name_skip(const u_char **ptrptr, const u_char *eom)
+{
+ const u_char *cp;
+ u_int n;
+ int l;
+
+ cp = *ptrptr;
+ while (cp < eom && (n = *cp++) != 0) {
+ /* Check for indirection. */
+ switch (n & NS_CMPRSFLGS) {
+ case 0: /*%< normal case, n == len */
+ cp += n;
+ continue;
+ case NS_TYPE_ELT: /*%< EDNS0 extended label */
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE; /*%< XXX */
+ return (-1);
+ }
+ cp += l;
+ continue;
+ case NS_CMPRSFLGS: /*%< indirection */
+ cp++;
+ break;
+ default: /*%< illegal type */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ break;
+ }
+ if (cp > eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *ptrptr = cp;
+ return (0);
+}
+
+/* Find the number of octets an nname takes up, including the root label.
+ * (This is basically ns_name_skip() without compression-pointer support.)
+ * ((NOTE: can only return zero if passed-in namesiz argument is zero.))
+ */
+ssize_t
+ns_name_length(ns_nname_ct nname, size_t namesiz) {
+ ns_nname_ct orig = nname;
+ u_int n;
+
+ while (namesiz-- > 0 && (n = *nname++) != 0) {
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (n > namesiz) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ nname += n;
+ namesiz -= n;
+ }
+ return (nname - orig);
+}
+
+/* Compare two nname's for equality. Return -1 on error (setting errno).
+ */
+int
+ns_name_eq(ns_nname_ct a, size_t as, ns_nname_ct b, size_t bs) {
+ ns_nname_ct ae = a + as, be = b + bs;
+ int ac, bc;
+
+ while (ac = *a, bc = *b, ac != 0 && bc != 0) {
+ if ((ac & NS_CMPRSFLGS) != 0 || (bc & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (a + ac >= ae || b + bc >= be) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (ac != bc || strncasecmp((const char *) ++a,
+ (const char *) ++b, ac) != 0)
+ return (0);
+ a += ac, b += bc;
+ }
+ return (ac == 0 && bc == 0);
+}
+
+/* Is domain "A" owned by (at or below) domain "B"?
+ */
+int
+ns_name_owned(ns_namemap_ct a, int an, ns_namemap_ct b, int bn) {
+ /* If A is shorter, it cannot be owned by B. */
+ if (an < bn)
+ return (0);
+
+ /* If they are unequal before the length of the shorter, A cannot... */
+ while (bn > 0) {
+ if (a->len != b->len ||
+ strncasecmp((const char *) a->base,
+ (const char *) b->base, a->len) != 0)
+ return (0);
+ a++, an--;
+ b++, bn--;
+ }
+
+ /* A might be longer or not, but either way, B owns it. */
+ return (1);
+}
+
+/* Build an array of <base,len> tuples from an nname, top-down order.
+ * Return the number of tuples (labels) thus discovered.
+ */
+int
+ns_name_map(ns_nname_ct nname, size_t namelen, ns_namemap_t map, int mapsize) {
+ u_int n;
+ int l;
+
+ n = *nname++;
+ namelen--;
+
+ /* Root zone? */
+ if (n == 0) {
+ /* Extra data follows name? */
+ if (namelen > 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (0);
+ }
+
+ /* Compression pointer? */
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+
+ /* Label too long? */
+ if (n > namelen) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+
+ /* Recurse to get rest of name done first. */
+ l = ns_name_map(nname + n, namelen - n, map, mapsize);
+ if (l < 0)
+ return (-1);
+
+ /* Too many labels? */
+ if (l >= mapsize) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ /* We're on our way back up-stack, store current map data. */
+ map[l].base = nname;
+ map[l].len = n;
+ return (l + 1);
+}
+
+/* Count the labels in a domain name. Root counts, so COM. has two. This
+ * is to make the result comparable to the result of ns_name_map().
+ */
+int
+ns_name_labels(ns_nname_ct nname, size_t namesiz) {
+ int ret = 0;
+ u_int n;
+
+ while (namesiz-- > 0 && (n = *nname++) != 0) {
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (n > namesiz) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ nname += n;
+ namesiz -= n;
+ ret++;
+ }
+ return (ret + 1);
+}
+
+/* Private. */
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * is this characted special ("in need of quoting") ?
+ *
+ * return:
+ *\li boolean.
+ */
+static int
+special(int ch) {
+ switch (ch) {
+ case 0x22: /*%< '"' */
+ case 0x2E: /*%< '.' */
+ case 0x3B: /*%< ';' */
+ case 0x5C: /*%< '\\' */
+ case 0x28: /*%< '(' */
+ case 0x29: /*%< ')' */
+ /* Special modifiers in zone files. */
+ case 0x40: /*%< '@' */
+ case 0x24: /*%< '$' */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * is this character visible and not a space when printed ?
+ *
+ * return:
+ *\li boolean.
+ */
+static int
+printable(int ch) {
+ return (ch > 0x20 && ch < 0x7f);
+}
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * convert this character to lower case if it's upper case.
+ */
+static int
+mklower(int ch) {
+ if (ch >= 0x41 && ch <= 0x5A)
+ return (ch + 0x20);
+ return (ch);
+}
+
+/*%
+ * Search for the counted-label name in an array of compressed names.
+ *
+ * return:
+ *\li offset from msg if found, or -1.
+ *
+ * notes:
+ *\li dnptrs is the pointer to the first name on the list,
+ *\li not the pointer to the start of the message.
+ */
+static int
+dn_find(const u_char *domain, const u_char *msg,
+ const u_char * const *dnptrs,
+ const u_char * const *lastdnptr)
+{
+ const u_char *dn, *cp, *sp;
+ const u_char * const *cpp;
+ u_int n;
+
+ for (cpp = dnptrs; cpp < lastdnptr; cpp++) {
+ sp = *cpp;
+ /*
+ * terminate search on:
+ * root label
+ * compression pointer
+ * unusable offset
+ */
+ while (*sp != 0 && (*sp & NS_CMPRSFLGS) == 0 &&
+ (sp - msg) < 0x4000) {
+ dn = domain;
+ cp = sp;
+ while ((n = *cp++) != 0) {
+ /*
+ * check for indirection
+ */
+ switch (n & NS_CMPRSFLGS) {
+ case 0: /*%< normal case, n == len */
+ n = labellen(cp - 1); /*%< XXX */
+ if (n != *dn++)
+ goto next;
+
+ for ((void)NULL; n > 0; n--)
+ if (mklower(*dn++) !=
+ mklower(*cp++))
+ goto next;
+ /* Is next root for both ? */
+ if (*dn == '\0' && *cp == '\0')
+ return (sp - msg);
+ if (*dn)
+ continue;
+ goto next;
+ case NS_CMPRSFLGS: /*%< indirection */
+ cp = msg + (((n & 0x3f) << 8) | *cp);
+ break;
+
+ default: /*%< illegal type */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ }
+ next: ;
+ sp += *sp + 1;
+ }
+ }
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+decode_bitstring(const unsigned char **cpp, char *dn, const char *eom)
+{
+ const unsigned char *cp = *cpp;
+ char *beg = dn, tc;
+ int b, blen, plen, i;
+
+ if ((blen = (*cp & 0xff)) == 0)
+ blen = 256;
+ plen = (blen + 3) / 4;
+ plen += sizeof("\\[x/]") + (blen > 99 ? 3 : (blen > 9) ? 2 : 1);
+ if (dn + plen >= eom)
+ return (-1);
+
+ cp++;
+ i = SPRINTF((dn, "\\[x"));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ for (b = blen; b > 7; b -= 8, cp++) {
+ i = SPRINTF((dn, "%02x", *cp & 0xff));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ }
+ if (b > 4) {
+ tc = *cp++;
+ i = SPRINTF((dn, "%02x", tc & (0xff << (8 - b))));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ } else if (b > 0) {
+ tc = *cp++;
+ i = SPRINTF((dn, "%1x",
+ ((tc >> 4) & 0x0f) & (0x0f << (4 - b))));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ }
+ i = SPRINTF((dn, "/%d]", blen));
+ if (i < 0)
+ return (-1);
+ dn += i;
+
+ *cpp = cp;
+ return (dn - beg);
+}
+
+static int
+encode_bitsring(const char **bp, const char *end, unsigned char **labelp,
+ unsigned char ** dst, unsigned const char *eom)
+{
+ int afterslash = 0;
+ const char *cp = *bp;
+ unsigned char *tp;
+ char c;
+ const char *beg_blen;
+ char *end_blen = NULL;
+ int value = 0, count = 0, tbcount = 0, blen = 0;
+
+ beg_blen = end_blen = NULL;
+
+ /* a bitstring must contain at least 2 characters */
+ if (end - cp < 2)
+ return (EINVAL);
+
+ /* XXX: currently, only hex strings are supported */
+ if (*cp++ != 'x')
+ return (EINVAL);
+ if (!isxdigit((*cp) & 0xff)) /*%< reject '\[x/BLEN]' */
+ return (EINVAL);
+
+ for (tp = *dst + 1; cp < end && tp < eom; cp++) {
+ switch((c = *cp)) {
+ case ']': /*%< end of the bitstring */
+ if (afterslash) {
+ if (beg_blen == NULL)
+ return (EINVAL);
+ blen = (int)strtol(beg_blen, &end_blen, 10);
+ if (*end_blen != ']')
+ return (EINVAL);
+ }
+ if (count)
+ *tp++ = ((value << 4) & 0xff);
+ cp++; /*%< skip ']' */
+ goto done;
+ case '/':
+ afterslash = 1;
+ break;
+ default:
+ if (afterslash) {
+ if (!isdigit(c&0xff))
+ return (EINVAL);
+ if (beg_blen == NULL) {
+
+ if (c == '0') {
+ /* blen never begings with 0 */
+ return (EINVAL);
+ }
+ beg_blen = cp;
+ }
+ } else {
+ if (!isxdigit(c&0xff))
+ return (EINVAL);
+ value <<= 4;
+ value += digitvalue[(int)c];
+ count += 4;
+ tbcount += 4;
+ if (tbcount > 256)
+ return (EINVAL);
+ if (count == 8) {
+ *tp++ = value;
+ count = 0;
+ }
+ }
+ break;
+ }
+ }
+ done:
+ if (cp >= end || tp >= eom)
+ return (EMSGSIZE);
+
+ /*
+ * bit length validation:
+ * If a <length> is present, the number of digits in the <bit-data>
+ * MUST be just sufficient to contain the number of bits specified
+ * by the <length>. If there are insignificant bits in a final
+ * hexadecimal or octal digit, they MUST be zero.
+ * RFC2673, Section 3.2.
+ */
+ if (blen > 0) {
+ int traillen;
+
+ if (((blen + 3) & ~3) != tbcount)
+ return (EINVAL);
+ traillen = tbcount - blen; /*%< between 0 and 3 */
+ if (((value << (8 - traillen)) & 0xff) != 0)
+ return (EINVAL);
+ }
+ else
+ blen = tbcount;
+ if (blen == 256)
+ blen = 0;
+
+ /* encode the type and the significant bit fields */
+ **labelp = DNS_LABELTYPE_BITSTRING;
+ **dst = blen;
+
+ *bp = cp;
+ *dst = tp;
+
+ return (0);
+}
+
+static int
+labellen(const u_char *lp)
+{
+ int bitlen;
+ u_char l = *lp;
+
+ if ((l & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* should be avoided by the caller */
+ return (-1);
+ }
+
+ if ((l & NS_CMPRSFLGS) == NS_TYPE_ELT) {
+ if (l == DNS_LABELTYPE_BITSTRING) {
+ if ((bitlen = *(lp + 1)) == 0)
+ bitlen = 256;
+ return ((bitlen + 7 ) / 8 + 1);
+ }
+ return (-1); /*%< unknwon ELT */
+ }
+ return (l);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c
new file mode 100644
index 0000000000..e196217f9b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_netint.c,v 1.3 2005/04/27 04:56:40 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include "port_after.h"
+
+#pragma redefine_extname __ns_get16 __joy_ns_get16
+#pragma redefine_extname __ns_get32 __joy_ns_get32
+
+/* Public. */
+
+u_int
+ns_get16(const u_char *src) {
+ u_int dst;
+
+ NS_GET16(dst, src);
+ return (dst);
+}
+
+u_long
+ns_get32(const u_char *src) {
+ u_long dst;
+
+ NS_GET32(dst, src);
+ return (dst);
+}
+
+void
+ns_put16(u_int src, u_char *dst) {
+ NS_PUT16(src, dst);
+}
+
+void
+ns_put32(u_long src, u_char *dst) {
+ NS_PUT32(src, dst);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c
new file mode 100644
index 0000000000..c18dd060e1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2009 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_newmsg.c,v 1.3 2009/02/26 10:48:57 marka Exp $";
+#endif
+
+#include <port_before.h>
+
+#include <arpa/nameser.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include <port_after.h>
+
+static int rdcpy(ns_newmsg *, ns_type, const u_char *, size_t);
+
+/* Initialize a "newmsg" object to empty.
+ */
+int
+ns_newmsg_init(u_char *buffer, size_t bufsiz, ns_newmsg *handle) {
+ ns_msg *msg = &handle->msg;
+
+ memset(handle, 0, sizeof *handle);
+ msg->_msg = buffer;
+ msg->_eom = buffer + bufsiz;
+ msg->_sect = ns_s_qd;
+ msg->_rrnum = 0;
+ msg->_msg_ptr = buffer + NS_HFIXEDSZ;
+ handle->dnptrs[0] = msg->_msg;
+ handle->dnptrs[1] = NULL;
+ handle->lastdnptr = &handle->dnptrs[sizeof handle->dnptrs /
+ sizeof handle->dnptrs[0] - 1];
+ return (0);
+}
+
+/* Initialize a "newmsg" object by copying an existing parsed message.
+ */
+int
+ns_newmsg_copy(ns_newmsg *handle, ns_msg *msg) {
+ ns_flag flag;
+ ns_sect sect;
+
+ ns_newmsg_id(handle, ns_msg_id(*msg));
+ for (flag = ns_f_qr; flag < ns_f_max; flag++)
+ ns_newmsg_flag(handle, flag, ns_msg_getflag(*msg, flag));
+ for (sect = ns_s_qd; sect < ns_s_max; sect++) {
+ int i, count;
+
+ count = ns_msg_count(*msg, sect);
+ for (i = 0; i < count; i++) {
+ ns_rr2 rr;
+ int x;
+
+ if (ns_parserr2(msg, sect, i, &rr) < 0)
+ return (-1);
+ if (sect == ns_s_qd)
+ x = ns_newmsg_q(handle,
+ ns_rr_nname(rr),
+ ns_rr_type(rr),
+ ns_rr_class(rr));
+ else
+ x = ns_newmsg_rr(handle, sect,
+ ns_rr_nname(rr),
+ ns_rr_type(rr),
+ ns_rr_class(rr),
+ ns_rr_ttl(rr),
+ ns_rr_rdlen(rr),
+ ns_rr_rdata(rr));
+ if (x < 0)
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/* Set the message-ID in a "newmsg" object.
+ */
+void
+ns_newmsg_id(ns_newmsg *handle, u_int16_t id) {
+ ns_msg *msg = &handle->msg;
+
+ msg->_id = id;
+}
+
+/* Set a flag (including rcode or opcode) in a "newmsg" object.
+ */
+void
+ns_newmsg_flag(ns_newmsg *handle, ns_flag flag, u_int value) {
+ extern struct _ns_flagdata _ns_flagdata[16];
+ struct _ns_flagdata *fd = &_ns_flagdata[flag];
+ ns_msg *msg = &handle->msg;
+
+ assert(flag < ns_f_max);
+ msg->_flags &= (~fd->mask);
+ msg->_flags |= (value << fd->shift);
+}
+
+/* Add a question (or zone, if it's an update) to a "newmsg" object.
+ */
+int
+ns_newmsg_q(ns_newmsg *handle, ns_nname_ct qname,
+ ns_type qtype, ns_class qclass)
+{
+ ns_msg *msg = &handle->msg;
+ u_char *t;
+ int n;
+
+ if (msg->_sect != ns_s_qd) {
+ errno = ENODEV;
+ return (-1);
+ }
+ t = (u_char *) (unsigned long) msg->_msg_ptr;
+ if (msg->_rrnum == 0)
+ msg->_sections[ns_s_qd] = t;
+ n = ns_name_pack(qname, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (t + QFIXEDSZ >= msg->_eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ NS_PUT16(qtype, t);
+ NS_PUT16(qclass, t);
+ msg->_msg_ptr = t;
+ msg->_counts[ns_s_qd] = ++msg->_rrnum;
+ return (0);
+}
+
+/* Add an RR to a "newmsg" object.
+ */
+int
+ns_newmsg_rr(ns_newmsg *handle, ns_sect sect,
+ ns_nname_ct name, ns_type type,
+ ns_class rr_class, u_int32_t ttl,
+ u_int16_t rdlen, const u_char *rdata)
+{
+ ns_msg *msg = &handle->msg;
+ u_char *t;
+ int n;
+
+ if (sect < msg->_sect) {
+ errno = ENODEV;
+ return (-1);
+ }
+ t = (u_char *) (unsigned long) msg->_msg_ptr;
+ if (sect > msg->_sect) {
+ msg->_sect = sect;
+ msg->_sections[sect] = t;
+ msg->_rrnum = 0;
+ }
+ n = ns_name_pack(name, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (t + RRFIXEDSZ + rdlen >= msg->_eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ NS_PUT16(type, t);
+ NS_PUT16(rr_class, t);
+ NS_PUT32(ttl, t);
+ msg->_msg_ptr = t;
+ if (rdcpy(handle, type, rdata, rdlen) < 0)
+ return (-1);
+ msg->_counts[sect] = ++msg->_rrnum;
+ return (0);
+}
+
+/* Complete a "newmsg" object and return its size for use in write().
+ * (Note: the "newmsg" object is also made ready for ns_parserr() etc.)
+ */
+size_t
+ns_newmsg_done(ns_newmsg *handle) {
+ ns_msg *msg = &handle->msg;
+ ns_sect sect;
+ u_char *t;
+
+ t = (u_char *) (unsigned long) msg->_msg;
+ NS_PUT16(msg->_id, t);
+ NS_PUT16(msg->_flags, t);
+ for (sect = 0; sect < ns_s_max; sect++)
+ NS_PUT16(msg->_counts[sect], t);
+ msg->_eom = msg->_msg_ptr;
+ msg->_sect = ns_s_max;
+ msg->_rrnum = -1;
+ msg->_msg_ptr = NULL;
+ return (msg->_eom - msg->_msg);
+}
+
+/* Private. */
+
+/* Copy an RDATA, using compression pointers where RFC1035 permits.
+ */
+static int
+rdcpy(ns_newmsg *handle, ns_type type, const u_char *rdata, size_t rdlen) {
+ ns_msg *msg = &handle->msg;
+ u_char *p = (u_char *) (unsigned long) msg->_msg_ptr;
+ u_char *t = p + NS_INT16SZ;
+ u_char *s = t;
+ int n;
+
+ switch (type) {
+ case ns_t_soa:
+ /* MNAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (ns_name_skip(&rdata, msg->_eom) < 0)
+ return (-1);
+
+ /* ANAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (ns_name_skip(&rdata, msg->_eom) < 0)
+ return (-1);
+
+ /* Serial, Refresh, Retry, Expiry, and Minimum. */
+ if ((msg->_eom - t) < (NS_INT32SZ * 5)) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ memcpy(t, rdata, NS_INT32SZ * 5);
+ t += (NS_INT32SZ * 5);
+ break;
+ case ns_t_ptr:
+ case ns_t_cname:
+ case ns_t_ns:
+ /* PTRDNAME, CNAME, or NSDNAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ break;
+ default:
+ memcpy(t, rdata, rdlen);
+ t += rdlen;
+ }
+ NS_PUT16(t - s, p);
+ msg->_msg_ptr = t;
+ return (0);
+}
+
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c
new file mode 100644
index 0000000000..ba11eea707
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_parse.c,v 1.10 2009/01/23 19:59:16 each Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void setsection(ns_msg *msg, ns_sect sect);
+
+/* Macros. */
+
+#if !defined(SOLARIS2) || defined(__COVERITY__)
+#define RETERR(err) do { errno = (err); return (-1); } while (0)
+#else
+#define RETERR(err) \
+ do { errno = (err); if (errno == errno) return (-1); } while (0)
+#endif
+
+#define PARSE_FMT_PRESO 0 /* Parse using presentation-format names */
+#define PARSE_FMT_WIRE 1 /* Parse using network-format names */
+
+/* Public. */
+
+/* These need to be in the same order as the nres.h:ns_flag enum. */
+struct _ns_flagdata _ns_flagdata[16] = {
+ { 0x8000, 15 }, /*%< qr. */
+ { 0x7800, 11 }, /*%< opcode. */
+ { 0x0400, 10 }, /*%< aa. */
+ { 0x0200, 9 }, /*%< tc. */
+ { 0x0100, 8 }, /*%< rd. */
+ { 0x0080, 7 }, /*%< ra. */
+ { 0x0040, 6 }, /*%< z. */
+ { 0x0020, 5 }, /*%< ad. */
+ { 0x0010, 4 }, /*%< cd. */
+ { 0x000f, 0 }, /*%< rcode. */
+ { 0x0000, 0 }, /*%< expansion (1/6). */
+ { 0x0000, 0 }, /*%< expansion (2/6). */
+ { 0x0000, 0 }, /*%< expansion (3/6). */
+ { 0x0000, 0 }, /*%< expansion (4/6). */
+ { 0x0000, 0 }, /*%< expansion (5/6). */
+ { 0x0000, 0 }, /*%< expansion (6/6). */
+};
+
+int ns_msg_getflag(ns_msg handle, int flag) {
+ return(((handle)._flags & _ns_flagdata[flag].mask) >> _ns_flagdata[flag].shift);
+}
+
+int
+ns_skiprr(const u_char *ptr, const u_char *eom, ns_sect section, int count) {
+ const u_char *optr = ptr;
+
+ for ((void)NULL; count > 0; count--) {
+ int b, rdlength;
+
+ b = dn_skipname(ptr, eom);
+ if (b < 0)
+ RETERR(EMSGSIZE);
+ ptr += b/*Name*/ + NS_INT16SZ/*Type*/ + NS_INT16SZ/*Class*/;
+ if (section != ns_s_qd) {
+ if (ptr + NS_INT32SZ + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ ptr += NS_INT32SZ/*TTL*/;
+ NS_GET16(rdlength, ptr);
+ ptr += rdlength/*RData*/;
+ }
+ }
+ if (ptr > eom)
+ RETERR(EMSGSIZE);
+ return (ptr - optr);
+}
+
+int
+ns_initparse(const u_char *msg, int msglen, ns_msg *handle) {
+ const u_char *eom = msg + msglen;
+ int i;
+
+ handle->_msg = msg;
+ handle->_eom = eom;
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_id, msg);
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_flags, msg);
+ for (i = 0; i < ns_s_max; i++) {
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_counts[i], msg);
+ }
+ for (i = 0; i < ns_s_max; i++)
+ if (handle->_counts[i] == 0)
+ handle->_sections[i] = NULL;
+ else {
+ int b = ns_skiprr(msg, eom, (ns_sect)i,
+ handle->_counts[i]);
+
+ if (b < 0)
+ return (-1);
+ handle->_sections[i] = msg;
+ msg += b;
+ }
+ if (msg != eom)
+ RETERR(EMSGSIZE);
+ setsection(handle, ns_s_max);
+ return (0);
+}
+
+int
+ns_parserr(ns_msg *handle, ns_sect section, int rrnum, ns_rr *rr) {
+ int b;
+ int tmp;
+
+ /* Make section right. */
+ tmp = section;
+ if (tmp < 0 || section >= ns_s_max)
+ RETERR(ENODEV);
+ if (section != handle->_sect)
+ setsection(handle, section);
+
+ /* Make rrnum right. */
+ if (rrnum == -1)
+ rrnum = handle->_rrnum;
+ if (rrnum < 0 || rrnum >= handle->_counts[(int)section])
+ RETERR(ENODEV);
+ if (rrnum < handle->_rrnum)
+ setsection(handle, section);
+ if (rrnum > handle->_rrnum) {
+ b = ns_skiprr(handle->_msg_ptr, handle->_eom, section,
+ rrnum - handle->_rrnum);
+
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ handle->_rrnum = rrnum;
+ }
+
+ /* Do the parse. */
+ b = dn_expand(handle->_msg, handle->_eom,
+ handle->_msg_ptr, rr->name, NS_MAXDNAME);
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ if (handle->_msg_ptr + NS_INT16SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(rr->type, handle->_msg_ptr);
+ NS_GET16(rr->rr_class, handle->_msg_ptr);
+ if (section == ns_s_qd) {
+ rr->ttl = 0;
+ rr->rdlength = 0;
+ rr->rdata = NULL;
+ } else {
+ if (handle->_msg_ptr + NS_INT32SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET32(rr->ttl, handle->_msg_ptr);
+ NS_GET16(rr->rdlength, handle->_msg_ptr);
+ if (handle->_msg_ptr + rr->rdlength > handle->_eom)
+ RETERR(EMSGSIZE);
+ rr->rdata = handle->_msg_ptr;
+ handle->_msg_ptr += rr->rdlength;
+ }
+ if (++handle->_rrnum > handle->_counts[(int)section])
+ setsection(handle, (ns_sect)((int)section + 1));
+
+ /* All done. */
+ return (0);
+}
+
+/*
+ * This is identical to the above but uses network-format (uncompressed) names.
+ */
+int
+ns_parserr2(ns_msg *handle, ns_sect section, int rrnum, ns_rr2 *rr) {
+ int b;
+ int tmp;
+
+ /* Make section right. */
+ if ((tmp = section) < 0 || section >= ns_s_max)
+ RETERR(ENODEV);
+ if (section != handle->_sect)
+ setsection(handle, section);
+
+ /* Make rrnum right. */
+ if (rrnum == -1)
+ rrnum = handle->_rrnum;
+ if (rrnum < 0 || rrnum >= handle->_counts[(int)section])
+ RETERR(ENODEV);
+ if (rrnum < handle->_rrnum)
+ setsection(handle, section);
+ if (rrnum > handle->_rrnum) {
+ b = ns_skiprr(handle->_msg_ptr, handle->_eom, section,
+ rrnum - handle->_rrnum);
+
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ handle->_rrnum = rrnum;
+ }
+
+ /* Do the parse. */
+ b = ns_name_unpack2(handle->_msg, handle->_eom, handle->_msg_ptr,
+ rr->nname, NS_MAXNNAME, &rr->nnamel);
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ if (handle->_msg_ptr + NS_INT16SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(rr->type, handle->_msg_ptr);
+ NS_GET16(rr->rr_class, handle->_msg_ptr);
+ if (section == ns_s_qd) {
+ rr->ttl = 0;
+ rr->rdlength = 0;
+ rr->rdata = NULL;
+ } else {
+ if (handle->_msg_ptr + NS_INT32SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET32(rr->ttl, handle->_msg_ptr);
+ NS_GET16(rr->rdlength, handle->_msg_ptr);
+ if (handle->_msg_ptr + rr->rdlength > handle->_eom)
+ RETERR(EMSGSIZE);
+ rr->rdata = handle->_msg_ptr;
+ handle->_msg_ptr += rr->rdlength;
+ }
+ if (++handle->_rrnum > handle->_counts[(int)section])
+ setsection(handle, (ns_sect)((int)section + 1));
+
+ /* All done. */
+ return (0);
+}
+
+/* Private. */
+
+static void
+setsection(ns_msg *msg, ns_sect sect) {
+ msg->_sect = sect;
+ if (sect == ns_s_max) {
+ msg->_rrnum = -1;
+ msg->_msg_ptr = NULL;
+ } else {
+ msg->_rrnum = 0;
+ msg->_msg_ptr = msg->_sections[(int)sect];
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c
new file mode 100644
index 0000000000..e70df376c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c
@@ -0,0 +1,1242 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_print.c,v 1.12 2009/03/03 05:29:58 each Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <isc/dst.h>
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static size_t prune_origin(const char *name, const char *origin);
+static int charstr(const u_char *rdata, const u_char *edata,
+ char **buf, size_t *buflen);
+static int addname(const u_char *msg, size_t msglen,
+ const u_char **p, const char *origin,
+ char **buf, size_t *buflen);
+static void addlen(size_t len, char **buf, size_t *buflen);
+static int addstr(const char *src, size_t len,
+ char **buf, size_t *buflen);
+static int addtab(size_t len, size_t target, int spaced,
+ char **buf, size_t *buflen);
+
+/* Macros. */
+
+#define T(x) \
+ do { \
+ if ((x) < 0) \
+ return (-1); \
+ } while (0)
+
+static const char base32hex[] =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUV=0123456789abcdefghijklmnopqrstuv";
+
+/* Public. */
+
+/*%
+ * Convert an RR to presentation format.
+ *
+ * return:
+ *\li Number of characters written to buf, or -1 (check errno).
+ */
+int
+ns_sprintrr(const ns_msg *handle, const ns_rr *rr,
+ const char *name_ctx, const char *origin,
+ char *buf, size_t buflen)
+{
+ int n;
+
+ n = ns_sprintrrf(ns_msg_base(*handle), ns_msg_size(*handle),
+ ns_rr_name(*rr), ns_rr_class(*rr), ns_rr_type(*rr),
+ ns_rr_ttl(*rr), ns_rr_rdata(*rr), ns_rr_rdlen(*rr),
+ name_ctx, origin, buf, buflen);
+ return (n);
+}
+
+/*%
+ * Convert the fields of an RR into presentation format.
+ *
+ * return:
+ *\li Number of characters written to buf, or -1 (check errno).
+ */
+int
+ns_sprintrrf(const u_char *msg, size_t msglen,
+ const char *name, ns_class class, ns_type type,
+ u_long ttl, const u_char *rdata, size_t rdlen,
+ const char *name_ctx, const char *origin,
+ char *buf, size_t buflen)
+{
+ const char *obuf = buf;
+ const u_char *edata = rdata + rdlen;
+ int spaced = 0;
+
+ const char *comment;
+ char tmp[100];
+ int len, x;
+
+ /*
+ * Owner.
+ */
+ if (name_ctx != NULL && ns_samename(name_ctx, name) == 1) {
+ T(addstr("\t\t\t", 3, &buf, &buflen));
+ } else {
+ len = prune_origin(name, origin);
+ if (*name == '\0') {
+ goto root;
+ } else if (len == 0) {
+ T(addstr("@\t\t\t", 4, &buf, &buflen));
+ } else {
+ T(addstr(name, len, &buf, &buflen));
+ /* Origin not used or not root, and no trailing dot? */
+ if (((origin == NULL || origin[0] == '\0') ||
+ (origin[0] != '.' && origin[1] != '\0' &&
+ name[len] == '\0')) && name[len - 1] != '.') {
+ root:
+ T(addstr(".", 1, &buf, &buflen));
+ len++;
+ }
+ T(spaced = addtab(len, 24, spaced, &buf, &buflen));
+ }
+ }
+
+ /*
+ * TTL, Class, Type.
+ */
+ T(x = ns_format_ttl(ttl, buf, buflen));
+ addlen(x, &buf, &buflen);
+ len = SPRINTF((tmp, " %s %s", p_class(class), p_type(type)));
+ T(addstr(tmp, len, &buf, &buflen));
+ T(spaced = addtab(x + len, 16, spaced, &buf, &buflen));
+
+ /*
+ * RData.
+ */
+ switch (type) {
+ case ns_t_a:
+ if (rdlen != (size_t)NS_INADDRSZ)
+ goto formerr;
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+
+ case ns_t_hinfo:
+ case ns_t_isdn:
+ /* First word. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+
+ /* Second word, optional in ISDN records. */
+ if (type == ns_t_isdn && rdata == edata)
+ break;
+
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ break;
+
+ case ns_t_soa: {
+ u_long t;
+
+ /* Server name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Administrator name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" (\n", 3, &buf, &buflen));
+ spaced = 0;
+
+ if ((edata - rdata) != 5*NS_INT32SZ)
+ goto formerr;
+
+ /* Serial number. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ len = SPRINTF((tmp, "%lu", t));
+ T(addstr(tmp, len, &buf, &buflen));
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; serial\n", 9, &buf, &buflen));
+ spaced = 0;
+
+ /* Refresh interval. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; refresh\n", 10, &buf, &buflen));
+ spaced = 0;
+
+ /* Retry interval. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; retry\n", 8, &buf, &buflen));
+ spaced = 0;
+
+ /* Expiry. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; expiry\n", 9, &buf, &buflen));
+ spaced = 0;
+
+ /* Minimum TTL. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(addstr(" )", 2, &buf, &buflen));
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; minimum\n", 10, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ case ns_t_kx: {
+ u_int t;
+
+ if (rdlen < (size_t)NS_INT16SZ)
+ goto formerr;
+
+ /* Priority. */
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Target. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_px: {
+ u_int t;
+
+ if (rdlen < (size_t)NS_INT16SZ)
+ goto formerr;
+
+ /* Priority. */
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Name1. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Name2. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_x25:
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ break;
+
+ case ns_t_txt:
+ case ns_t_spf:
+ while (rdata < edata) {
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ if (rdata < edata)
+ T(addstr(" ", 1, &buf, &buflen));
+ }
+ break;
+
+ case ns_t_nsap: {
+ char t[2+255*3];
+
+ (void) inet_nsap_ntoa(rdlen, rdata, t);
+ T(addstr(t, strlen(t), &buf, &buflen));
+ break;
+ }
+
+ case ns_t_aaaa:
+ if (rdlen != (size_t)NS_IN6ADDRSZ)
+ goto formerr;
+ (void) inet_ntop(AF_INET6, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+
+ case ns_t_loc: {
+ char t[255];
+
+ /* XXX protocol format checking? */
+ (void) loc_ntoa(rdata, t);
+ T(addstr(t, strlen(t), &buf, &buflen));
+ break;
+ }
+
+ case ns_t_naptr: {
+ u_int order, preference;
+ char t[50];
+
+ if (rdlen < 2U*NS_INT16SZ)
+ goto formerr;
+
+ /* Order, Precedence. */
+ order = ns_get16(rdata); rdata += NS_INT16SZ;
+ preference = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((t, "%u %u ", order, preference));
+ T(addstr(t, len, &buf, &buflen));
+
+ /* Flags. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Service. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Regexp. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len < 0)
+ return (-1);
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Server. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_srv: {
+ u_int priority, weight, port;
+ char t[50];
+
+ if (rdlen < 3U*NS_INT16SZ)
+ goto formerr;
+
+ /* Priority, Weight, Port. */
+ priority = ns_get16(rdata); rdata += NS_INT16SZ;
+ weight = ns_get16(rdata); rdata += NS_INT16SZ;
+ port = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((t, "%u %u %u ", priority, weight, port));
+ T(addstr(t, len, &buf, &buflen));
+
+ /* Server. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_minfo:
+ case ns_t_rp:
+ /* Name1. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Name2. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+
+ case ns_t_wks: {
+ int n, lcnt;
+
+ if (rdlen < 1U + NS_INT32SZ)
+ goto formerr;
+
+ /* Address. */
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += NS_INADDRSZ;
+
+ /* Protocol. */
+ len = SPRINTF((tmp, " %u ( ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata += NS_INT8SZ;
+
+ /* Bit map. */
+ n = 0;
+ lcnt = 0;
+ while (rdata < edata) {
+ u_int c = *rdata++;
+ do {
+ if (c & 0200) {
+ if (lcnt == 0) {
+ T(addstr("\n\t\t\t\t", 5,
+ &buf, &buflen));
+ lcnt = 10;
+ spaced = 0;
+ }
+ len = SPRINTF((tmp, "%d ", n));
+ T(addstr(tmp, len, &buf, &buflen));
+ lcnt--;
+ }
+ c <<= 1;
+ } while (++n & 07);
+ }
+ T(addstr(")", 1, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_key:
+ case ns_t_dnskey: {
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ u_int keyflags, protocol, algorithm, key_id;
+ const char *leader;
+ int n;
+
+ if (rdlen < 0U + NS_INT16SZ + NS_INT8SZ + NS_INT8SZ)
+ goto formerr;
+
+ /* Key flags, Protocol, Algorithm. */
+ key_id = dst_s_dns_key_id(rdata, edata-rdata);
+ keyflags = ns_get16(rdata); rdata += NS_INT16SZ;
+ protocol = *rdata++;
+ algorithm = *rdata++;
+ len = SPRINTF((tmp, "0x%04x %u %u",
+ keyflags, protocol, algorithm));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Public key data. */
+ len = b64_ntop(rdata, edata - rdata,
+ base64_key, sizeof base64_key);
+ if (len < 0)
+ goto formerr;
+ if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ } else
+ leader = " ";
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ n = SPRINTF((tmp, " ; key_tag= %u", key_id));
+ T(addstr(tmp, n, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_sig:
+ case ns_t_rrsig: {
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ u_int type, algorithm, labels, footprint;
+ const char *leader;
+ u_long t;
+ int n;
+
+ if (rdlen < 22U)
+ goto formerr;
+
+ /* Type covered, Algorithm, Label count, Original TTL. */
+ type = ns_get16(rdata); rdata += NS_INT16SZ;
+ algorithm = *rdata++;
+ labels = *rdata++;
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s %d %d %lu ",
+ p_type(type), algorithm, labels, t));
+ T(addstr(tmp, len, &buf, &buflen));
+ if (labels > (u_int)dn_count_labels(name))
+ goto formerr;
+
+ /* Signature expiry. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Time signed. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Signature Footprint. */
+ footprint = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", footprint));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Signer's name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ /* Signature. */
+ len = b64_ntop(rdata, edata - rdata,
+ base64_key, sizeof base64_key);
+ if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ } else
+ leader = " ";
+ if (len < 0)
+ goto formerr;
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_nxt: {
+ int n, c;
+
+ /* Next domain name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ /* Type bit map. */
+ n = edata - rdata;
+ for (c = 0; c < n*8; c++)
+ if (NS_NXT_BIT_ISSET(c, rdata)) {
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ break;
+ }
+
+ case ns_t_cert: {
+ u_int c_type, key_tag, alg;
+ int n;
+ unsigned int siz;
+ char base64_cert[8192], tmp[40];
+ const char *leader;
+
+ c_type = ns_get16(rdata); rdata += NS_INT16SZ;
+ key_tag = ns_get16(rdata); rdata += NS_INT16SZ;
+ alg = (u_int) *rdata++;
+
+ len = SPRINTF((tmp, "%d %d %d ", c_type, key_tag, alg));
+ T(addstr(tmp, len, &buf, &buflen));
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_cert) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ }
+ else {
+ len = b64_ntop(rdata, edata-rdata, base64_cert, siz);
+
+ if (len < 0)
+ goto formerr;
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_cert + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ break;
+ }
+
+ case ns_t_tkey: {
+ /* KJD - need to complete this */
+ u_long t;
+ int mode, err, keysize;
+
+ /* Algorithm name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Inception. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Experation. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Mode , Error, Key Size. */
+ /* Priority, Weight, Port. */
+ mode = ns_get16(rdata); rdata += NS_INT16SZ;
+ err = ns_get16(rdata); rdata += NS_INT16SZ;
+ keysize = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u %u %u ", mode, err, keysize));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* XXX need to dump key, print otherdata length & other data */
+ break;
+ }
+
+ case ns_t_tsig: {
+ /* BEW - need to complete this */
+ int n;
+
+ T(len = addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+ rdata += 8; /*%< time */
+ n = ns_get16(rdata); rdata += INT16SZ;
+ rdata += n; /*%< sig */
+ n = ns_get16(rdata); rdata += INT16SZ; /*%< original id */
+ sprintf(buf, "%d", ns_get16(rdata));
+ rdata += INT16SZ;
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+ }
+
+ case ns_t_a6: {
+ struct in6_addr a;
+ int pbyte, pbit;
+
+ /* prefix length */
+ if (rdlen == 0U) goto formerr;
+ len = SPRINTF((tmp, "%d ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ pbit = *rdata;
+ if (pbit > 128) goto formerr;
+ pbyte = (pbit & ~7) / 8;
+ rdata++;
+
+ /* address suffix: provided only when prefix len != 128 */
+ if (pbit < 128) {
+ if (rdata + pbyte >= edata) goto formerr;
+ memset(&a, 0, sizeof(a));
+ memcpy(&a.s6_addr[pbyte], rdata, sizeof(a) - pbyte);
+ (void) inet_ntop(AF_INET6, &a, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += sizeof(a) - pbyte;
+ }
+
+ /* prefix name: provided only when prefix len > 0 */
+ if (pbit == 0)
+ break;
+ if (rdata >= edata) goto formerr;
+ T(addstr(" ", 1, &buf, &buflen));
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_opt: {
+ len = SPRINTF((tmp, "%u bytes", class));
+ T(addstr(tmp, len, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_ds:
+ case ns_t_dlv:
+ case ns_t_sshfp: {
+ u_int t;
+
+ if (type == ns_t_ds || type == ns_t_dlv) {
+ if (rdlen < 4U) goto formerr;
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+ } else
+ if (rdlen < 2U) goto formerr;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ while (rdata < edata) {
+ len = SPRINTF((tmp, "%02X", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ break;
+ }
+
+ case ns_t_nsec3:
+ case ns_t_nsec3param: {
+ u_int t, w, l, j, k, c;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ t = *rdata++;
+ if (t == 0) {
+ T(addstr("-", 1, &buf, &buflen));
+ } else {
+ while (t-- > 0) {
+ len = SPRINTF((tmp, "%02X", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ }
+ if (type == ns_t_nsec3param)
+ break;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ t = *rdata++;
+ while (t > 0) {
+ switch (t) {
+ case 1:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)];
+ tmp[2] = tmp[3] = tmp[4] = '=';
+ tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 2:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)];
+ tmp[4] = tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 3:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)];
+ tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 4:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)|
+ ((rdata[3]>>7)&0x01)];
+ tmp[5] = base32hex[((rdata[3]>>2)&0x1f)];
+ tmp[6] = base32hex[(rdata[3]<<3)&0x18];
+ tmp[7] = '=';
+ break;
+ default:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)|
+ ((rdata[3]>>7)&0x01)];
+ tmp[5] = base32hex[((rdata[3]>>2)&0x1f)];
+ tmp[6] = base32hex[((rdata[3]<<3)&0x18)|
+ ((rdata[4]>>5)&0x07)];
+ tmp[7] = base32hex[(rdata[4]&0x1f)];
+ break;
+ }
+ T(addstr(tmp, 8, &buf, &buflen));
+ if (t >= 5) {
+ rdata += 5;
+ t -= 5;
+ } else {
+ rdata += t;
+ t -= t;
+ }
+ }
+
+ while (rdata < edata) {
+ w = *rdata++;
+ l = *rdata++;
+ for (j = 0; j < l; j++) {
+ if (rdata[j] == 0)
+ continue;
+ for (k = 0; k < 8; k++) {
+ if ((rdata[j] & (0x80 >> k)) == 0)
+ continue;
+ c = w * 256 + j * 8 + k;
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ }
+ rdata += l;
+ }
+ break;
+ }
+
+ case ns_t_nsec: {
+ u_int w, l, j, k, c;
+
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ while (rdata < edata) {
+ w = *rdata++;
+ l = *rdata++;
+ for (j = 0; j < l; j++) {
+ if (rdata[j] == 0)
+ continue;
+ for (k = 0; k < 8; k++) {
+ if ((rdata[j] & (0x80 >> k)) == 0)
+ continue;
+ c = w * 256 + j * 8 + k;
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ }
+ rdata += l;
+ }
+ break;
+ }
+
+ case ns_t_dhcid: {
+ int n;
+ unsigned int siz;
+ char base64_dhcid[8192];
+ const char *leader;
+
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_dhcid) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = b64_ntop(rdata, edata-rdata, base64_dhcid, siz);
+
+ if (len < 0)
+ goto formerr;
+
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_dhcid + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ }
+
+ case ns_t_ipseckey: {
+ int n;
+ unsigned int siz;
+ char base64_key[8192];
+ const char *leader;
+
+ if (rdlen < 2)
+ goto formerr;
+
+ switch (rdata[1]) {
+ case 0:
+ case 3:
+ if (rdlen < 3)
+ goto formerr;
+ break;
+ case 1:
+ if (rdlen < 7)
+ goto formerr;
+ break;
+ case 2:
+ if (rdlen < 19)
+ goto formerr;
+ break;
+ default:
+ comment = "unknown IPSECKEY gateway type";
+ goto hexify;
+ }
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ switch (rdata[-2]) {
+ case 0:
+ T(addstr(".", 1, &buf, &buflen));
+ break;
+ case 1:
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += 4;
+ break;
+ case 2:
+ (void) inet_ntop(AF_INET6, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += 16;
+ break;
+ case 3:
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ if (rdata >= edata)
+ break;
+
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_key) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = b64_ntop(rdata, edata-rdata, base64_key, siz);
+
+ if (len < 0)
+ goto formerr;
+
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ }
+
+ case ns_t_hip: {
+ unsigned int i, hip_len, algorithm, key_len;
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ unsigned int siz;
+ const char *leader = "\n\t\t\t\t\t";
+
+ hip_len = *rdata++;
+ algorithm = *rdata++;
+ key_len = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+
+ siz = key_len*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_key) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = sprintf(tmp, "( %u ", algorithm);
+ T(addstr(tmp, len, &buf, &buflen));
+
+ for (i = 0; i < hip_len; i++) {
+ len = sprintf(tmp, "%02X", *rdata);
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+
+ len = b64_ntop(rdata, key_len, base64_key, siz);
+ if (len < 0)
+ goto formerr;
+
+ T(addstr(base64_key, len, &buf, &buflen));
+
+ rdata += key_len;
+ while (rdata < edata) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addname(msg, msglen, &rdata, origin,
+ &buf, &buflen));
+ }
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ break;
+ }
+
+ default:
+ comment = "unknown RR type";
+ goto hexify;
+ }
+ return (buf - obuf);
+ formerr:
+ comment = "RR format error";
+ hexify: {
+ int n, m;
+ char *p;
+
+ len = SPRINTF((tmp, "\\# %u%s\t; %s", (unsigned)(edata - rdata),
+ rdlen != 0U ? " (" : "", comment));
+ T(addstr(tmp, len, &buf, &buflen));
+ while (rdata < edata) {
+ p = tmp;
+ p += SPRINTF((p, "\n\t"));
+ spaced = 0;
+ n = MIN(16, edata - rdata);
+ for (m = 0; m < n; m++)
+ p += SPRINTF((p, "%02x ", rdata[m]));
+ T(addstr(tmp, p - tmp, &buf, &buflen));
+ if (n < 16) {
+ T(addstr(")", 1, &buf, &buflen));
+ T(addtab(p - tmp + 1, 48, spaced, &buf, &buflen));
+ }
+ p = tmp;
+ p += SPRINTF((p, "; "));
+ for (m = 0; m < n; m++)
+ *p++ = (isascii(rdata[m]) && isprint(rdata[m]))
+ ? rdata[m]
+ : '.';
+ T(addstr(tmp, p - tmp, &buf, &buflen));
+ rdata += n;
+ }
+ return (buf - obuf);
+ }
+}
+
+/* Private. */
+
+/*%
+ * size_t
+ * prune_origin(name, origin)
+ * Find out if the name is at or under the current origin.
+ * return:
+ * Number of characters in name before start of origin,
+ * or length of name if origin does not match.
+ * notes:
+ * This function should share code with samedomain().
+ */
+static size_t
+prune_origin(const char *name, const char *origin) {
+ const char *oname = name;
+
+ while (*name != '\0') {
+ if (origin != NULL && ns_samename(name, origin) == 1)
+ return (name - oname - (name > oname));
+ while (*name != '\0') {
+ if (*name == '\\') {
+ name++;
+ /* XXX need to handle \nnn form. */
+ if (*name == '\0')
+ break;
+ } else if (*name == '.') {
+ name++;
+ break;
+ }
+ name++;
+ }
+ }
+ return (name - oname);
+}
+
+/*%
+ * int
+ * charstr(rdata, edata, buf, buflen)
+ * Format a <character-string> into the presentation buffer.
+ * return:
+ * Number of rdata octets consumed
+ * 0 for protocol format error
+ * -1 for output buffer error
+ * side effects:
+ * buffer is advanced on success.
+ */
+static int
+charstr(const u_char *rdata, const u_char *edata, char **buf, size_t *buflen) {
+ const u_char *odata = rdata;
+ size_t save_buflen = *buflen;
+ char *save_buf = *buf;
+
+ if (addstr("\"", 1, buf, buflen) < 0)
+ goto enospc;
+ if (rdata < edata) {
+ int n = *rdata;
+
+ if (rdata + 1 + n <= edata) {
+ rdata++;
+ while (n-- > 0) {
+ if (strchr("\n\"\\", *rdata) != NULL)
+ if (addstr("\\", 1, buf, buflen) < 0)
+ goto enospc;
+ if (addstr((const char *)rdata, 1,
+ buf, buflen) < 0)
+ goto enospc;
+ rdata++;
+ }
+ }
+ }
+ if (addstr("\"", 1, buf, buflen) < 0)
+ goto enospc;
+ return (rdata - odata);
+ enospc:
+ errno = ENOSPC;
+ *buf = save_buf;
+ *buflen = save_buflen;
+ return (-1);
+}
+
+static int
+addname(const u_char *msg, size_t msglen,
+ const u_char **pp, const char *origin,
+ char **buf, size_t *buflen)
+{
+ size_t newlen, save_buflen = *buflen;
+ char *save_buf = *buf;
+ int n;
+
+ n = dn_expand(msg, msg + msglen, *pp, *buf, *buflen);
+ if (n < 0)
+ goto enospc; /*%< Guess. */
+ newlen = prune_origin(*buf, origin);
+ if (**buf == '\0') {
+ goto root;
+ } else if (newlen == 0U) {
+ /* Use "@" instead of name. */
+ if (newlen + 2 > *buflen)
+ goto enospc; /* No room for "@\0". */
+ (*buf)[newlen++] = '@';
+ (*buf)[newlen] = '\0';
+ } else {
+ if (((origin == NULL || origin[0] == '\0') ||
+ (origin[0] != '.' && origin[1] != '\0' &&
+ (*buf)[newlen] == '\0')) && (*buf)[newlen - 1] != '.') {
+ /* No trailing dot. */
+ root:
+ if (newlen + 2 > *buflen)
+ goto enospc; /* No room for ".\0". */
+ (*buf)[newlen++] = '.';
+ (*buf)[newlen] = '\0';
+ }
+ }
+ *pp += n;
+ addlen(newlen, buf, buflen);
+ **buf = '\0';
+ return (newlen);
+ enospc:
+ errno = ENOSPC;
+ *buf = save_buf;
+ *buflen = save_buflen;
+ return (-1);
+}
+
+static void
+addlen(size_t len, char **buf, size_t *buflen) {
+ INSIST(len <= *buflen);
+ *buf += len;
+ *buflen -= len;
+}
+
+static int
+addstr(const char *src, size_t len, char **buf, size_t *buflen) {
+ if (len >= *buflen) {
+ errno = ENOSPC;
+ return (-1);
+ }
+ memcpy(*buf, src, len);
+ addlen(len, buf, buflen);
+ **buf = '\0';
+ return (0);
+}
+
+static int
+addtab(size_t len, size_t target, int spaced, char **buf, size_t *buflen) {
+ size_t save_buflen = *buflen;
+ char *save_buf = *buf;
+ int t;
+
+ if (spaced || len >= target - 1) {
+ T(addstr(" ", 2, buf, buflen));
+ spaced = 1;
+ } else {
+ for (t = (target - len - 1) / 8; t >= 0; t--)
+ if (addstr("\t", 1, buf, buflen) < 0) {
+ *buflen = save_buflen;
+ *buf = save_buf;
+ return (-1);
+ }
+ spaced = 0;
+ }
+ return (spaced);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c
new file mode 100644
index 0000000000..eac4052278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2009 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_rdata.c,v 1.2 2009/01/23 23:49:15 tbox Exp $";
+#endif
+
+#include "port_before.h"
+
+#if __OpenBSD__
+#include <sys/types.h>
+#endif
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#define CONSUME_SRC \
+ do { \
+ rdata += n, rdlen -= n; \
+ } while (0)
+
+#define CONSUME_DST \
+ do { \
+ nrdata += n, nrdsiz -= n, nrdlen += n; \
+ } while (0)
+
+#define UNPACK_DNAME \
+ do { \
+ size_t t; \
+ \
+ if ((n = ns_name_unpack2(msg,eom,rdata,nrdata,nrdsiz,&t))<0) {\
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ CONSUME_SRC; \
+ n = t; \
+ CONSUME_DST; \
+ } while (0)
+
+#define UNPACK_SOME(x) \
+ do { \
+ n = (x); \
+ if ((size_t)n > rdlen || (size_t)n > nrdsiz) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ memcpy(nrdata, rdata, n); \
+ CONSUME_SRC; CONSUME_DST; \
+ } while (0)
+
+#define UNPACK_REST(x) \
+ do { \
+ n = (x); \
+ if ((size_t)n != rdlen) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ memcpy(nrdata, rdata, n); \
+ CONSUME_SRC; CONSUME_DST; \
+ } while (0)
+
+ssize_t
+ns_rdata_unpack(const u_char *msg, const u_char *eom,
+ ns_type type, const u_char *rdata, size_t rdlen,
+ u_char *nrdata, size_t nrdsiz)
+{
+ size_t nrdlen = 0;
+ int n;
+
+ switch (type) {
+ case ns_t_a:
+ UNPACK_REST(NS_INADDRSZ);
+ break;
+ case ns_t_aaaa:
+ UNPACK_REST(NS_IN6ADDRSZ);
+ break;
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ UNPACK_DNAME;
+ break;
+ case ns_t_soa:
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ UNPACK_SOME(NS_INT32SZ * 5);
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ UNPACK_SOME(NS_INT16SZ);
+ UNPACK_DNAME;
+ break;
+ case ns_t_px:
+ UNPACK_SOME(NS_INT16SZ);
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ break;
+ case ns_t_srv:
+ UNPACK_SOME(NS_INT16SZ * 3);
+ UNPACK_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ break;
+ default:
+ UNPACK_SOME(rdlen);
+ break;
+ }
+ if (rdlen > 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (nrdlen);
+}
+
+#define EQUAL_CONSUME \
+ do { \
+ rdata1 += n, rdlen1 -= n; \
+ rdata2 += n, rdlen2 -= n; \
+ } while (0)
+
+#define EQUAL_DNAME \
+ do { \
+ ssize_t n; \
+ \
+ if (rdlen1 != rdlen2) \
+ return (0); \
+ n = ns_name_eq(rdata1, rdlen1, rdata2, rdlen2); \
+ if (n <= 0) \
+ return (n); \
+ n = rdlen1; \
+ EQUAL_CONSUME; \
+ } while (0)
+
+#define EQUAL_SOME(x) \
+ do { \
+ size_t n = (x); \
+ \
+ if (n > rdlen1 || n > rdlen2) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ if (memcmp(rdata1, rdata2, n) != 0) \
+ return (0); \
+ EQUAL_CONSUME; \
+ } while (0)
+
+int
+ns_rdata_equal(ns_type type,
+ const u_char *rdata1, size_t rdlen1,
+ const u_char *rdata2, size_t rdlen2)
+{
+ switch (type) {
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ EQUAL_DNAME;
+ break;
+ case ns_t_soa:
+ /* "There can be only one." --Highlander */
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ EQUAL_SOME(NS_INT16SZ);
+ EQUAL_DNAME;
+ break;
+ case ns_t_px:
+ EQUAL_SOME(NS_INT16SZ);
+ EQUAL_DNAME;
+ EQUAL_DNAME;
+ break;
+ case ns_t_srv:
+ EQUAL_SOME(NS_INT16SZ * 3);
+ EQUAL_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ EQUAL_DNAME;
+ EQUAL_DNAME;
+ break;
+ default:
+ EQUAL_SOME(rdlen1);
+ break;
+ }
+ if (rdlen1 != 0 || rdlen2 != 0)
+ return (0);
+ return (1);
+}
+
+#define REFERS_DNAME \
+ do { \
+ int n; \
+ \
+ n = ns_name_eq(rdata, rdlen, nname, NS_MAXNNAME); \
+ if (n < 0) \
+ return (-1); \
+ if (n > 0) \
+ return (1); \
+ n = dn_skipname(rdata, rdata + rdlen); \
+ if (n < 0) \
+ return (-1); \
+ CONSUME_SRC; \
+ } while (0)
+
+#define REFERS_SOME(x) \
+ do { \
+ size_t n = (x); \
+ \
+ if (n > rdlen) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ CONSUME_SRC; \
+ } while (0)
+
+int
+ns_rdata_refers(ns_type type,
+ const u_char *rdata, size_t rdlen,
+ const u_char *nname)
+{
+ switch (type) {
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ REFERS_DNAME;
+ break;
+ case ns_t_soa:
+ REFERS_DNAME;
+ REFERS_DNAME;
+ REFERS_SOME(NS_INT32SZ * 5);
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ REFERS_SOME(NS_INT16SZ);
+ REFERS_DNAME;
+ break;
+ case ns_t_px:
+ REFERS_SOME(NS_INT16SZ);
+ REFERS_DNAME;
+ REFERS_DNAME;
+ break;
+ case ns_t_srv:
+ REFERS_SOME(NS_INT16SZ * 3);
+ REFERS_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ REFERS_DNAME;
+ REFERS_DNAME;
+ break;
+ default:
+ REFERS_SOME(rdlen);
+ break;
+ }
+ if (rdlen != 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c
new file mode 100644
index 0000000000..5e9f5cab54
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_samedomain.c,v 1.6 2005/04/27 04:56:40 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <arpa/nameser.h>
+#include <errno.h>
+#include <string.h>
+
+#include "port_after.h"
+
+/*%
+ * Check whether a name belongs to a domain.
+ *
+ * Inputs:
+ *\li a - the domain whose ancestory is being verified
+ *\li b - the potential ancestor we're checking against
+ *
+ * Return:
+ *\li boolean - is a at or below b?
+ *
+ * Notes:
+ *\li Trailing dots are first removed from name and domain.
+ * Always compare complete subdomains, not only whether the
+ * domain name is the trailing string of the given name.
+ *
+ *\li "host.foobar.top" lies in "foobar.top" and in "top" and in ""
+ * but NOT in "bar.top"
+ */
+
+int
+ns_samedomain(const char *a, const char *b) {
+ size_t la, lb;
+ int diff, i, escaped;
+ const char *cp;
+
+ la = strlen(a);
+ lb = strlen(b);
+
+ /* Ignore a trailing label separator (i.e. an unescaped dot) in 'a'. */
+ if (la != 0U && a[la - 1] == '.') {
+ escaped = 0;
+ /* Note this loop doesn't get executed if la==1. */
+ for (i = la - 2; i >= 0; i--)
+ if (a[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (!escaped)
+ la--;
+ }
+
+ /* Ignore a trailing label separator (i.e. an unescaped dot) in 'b'. */
+ if (lb != 0U && b[lb - 1] == '.') {
+ escaped = 0;
+ /* note this loop doesn't get executed if lb==1 */
+ for (i = lb - 2; i >= 0; i--)
+ if (b[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (!escaped)
+ lb--;
+ }
+
+ /* lb == 0 means 'b' is the root domain, so 'a' must be in 'b'. */
+ if (lb == 0U)
+ return (1);
+
+ /* 'b' longer than 'a' means 'a' can't be in 'b'. */
+ if (lb > la)
+ return (0);
+
+ /* 'a' and 'b' being equal at this point indicates sameness. */
+ if (lb == la)
+ return (strncasecmp(a, b, lb) == 0);
+
+ /* Ok, we know la > lb. */
+
+ diff = la - lb;
+
+ /*
+ * If 'a' is only 1 character longer than 'b', then it can't be
+ * a subdomain of 'b' (because of the need for the '.' label
+ * separator).
+ */
+ if (diff < 2)
+ return (0);
+
+ /*
+ * If the character before the last 'lb' characters of 'b'
+ * isn't '.', then it can't be a match (this lets us avoid
+ * having "foobar.com" match "bar.com").
+ */
+ if (a[diff - 1] != '.')
+ return (0);
+
+ /*
+ * We're not sure about that '.', however. It could be escaped
+ * and thus not a really a label separator.
+ */
+ escaped = 0;
+ for (i = diff - 2; i >= 0; i--)
+ if (a[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (escaped)
+ return (0);
+
+ /* Now compare aligned trailing substring. */
+ cp = a + diff;
+ return (strncasecmp(cp, b, lb) == 0);
+}
+
+/*%
+ * is "a" a subdomain of "b"?
+ */
+int
+ns_subdomain(const char *a, const char *b) {
+ return (ns_samename(a, b) != 1 && ns_samedomain(a, b));
+}
+
+/*%
+ * make a canonical copy of domain name "src"
+ *
+ * notes:
+ * \code
+ * foo -> foo.
+ * foo. -> foo.
+ * foo.. -> foo.
+ * foo\. -> foo\..
+ * foo\\. -> foo\\.
+ * \endcode
+ */
+
+int
+ns_makecanon(const char *src, char *dst, size_t dstsize) {
+ size_t n = strlen(src);
+
+ if (n + sizeof "." > dstsize) { /*%< Note: sizeof == 2 */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ strcpy(dst, src);
+ while (n >= 1U && dst[n - 1] == '.') /*%< Ends in "." */
+ if (n >= 2U && dst[n - 2] == '\\' && /*%< Ends in "\." */
+ (n < 3U || dst[n - 3] != '\\')) /*%< But not "\\." */
+ break;
+ else
+ dst[--n] = '\0';
+ dst[n++] = '.';
+ dst[n] = '\0';
+ return (0);
+}
+
+/*%
+ * determine whether domain name "a" is the same as domain name "b"
+ *
+ * return:
+ *\li -1 on error
+ *\li 0 if names differ
+ *\li 1 if names are the same
+ */
+
+int
+ns_samename(const char *a, const char *b) {
+ char ta[NS_MAXDNAME], tb[NS_MAXDNAME];
+
+ if (ns_makecanon(a, ta, sizeof ta) < 0 ||
+ ns_makecanon(b, tb, sizeof tb) < 0)
+ return (-1);
+ if (strcasecmp(ta, tb) == 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c
new file mode 100644
index 0000000000..fc7dd19f7b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_sign.c,v 1.6 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/dst.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#define BOUNDS_CHECK(ptr, count) \
+ do { \
+ if ((ptr) + (count) > eob) { \
+ errno = EMSGSIZE; \
+ return(NS_TSIG_ERROR_NO_SPACE); \
+ } \
+ } while (0)
+
+/*%
+ * ns_sign
+ *
+ * Parameters:
+ *\li msg message to be sent
+ *\li msglen input - length of message
+ * output - length of signed message
+ *\li msgsize length of buffer containing message
+ *\li error value to put in the error field
+ *\li key tsig key used for signing
+ *\li querysig (response), the signature in the query
+ *\li querysiglen (response), the length of the signature in the query
+ *\li sig a buffer to hold the generated signature
+ *\li siglen input - length of signature buffer
+ * output - length of signature
+ *
+ * Errors:
+ *\li - bad input data (-1)
+ *\li - bad key / sign failed (-BADKEY)
+ *\li - not enough space (NS_TSIG_ERROR_NO_SPACE)
+ */
+int
+ns_sign(u_char *msg, int *msglen, int msgsize, int error, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t in_timesigned)
+{
+ return(ns_sign2(msg, msglen, msgsize, error, k,
+ querysig, querysiglen, sig, siglen,
+ in_timesigned, NULL, NULL));
+}
+
+int
+ns_sign2(u_char *msg, int *msglen, int msgsize, int error, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t in_timesigned, u_char **dnptrs, u_char **lastdnptr)
+{
+ HEADER *hp = (HEADER *)msg;
+ DST_KEY *key = (DST_KEY *)k;
+ u_char *cp, *eob;
+ u_char *lenp;
+ u_char *alg;
+ int n;
+ time_t timesigned;
+ u_char name[NS_MAXCDNAME];
+
+ dst_init();
+ if (msg == NULL || msglen == NULL || sig == NULL || siglen == NULL)
+ return (-1);
+
+ cp = msg + *msglen;
+ eob = msg + msgsize;
+
+ /* Name. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ n = ns_name_pton(key->dk_key_name, name, sizeof name);
+ if (n != -1)
+ n = ns_name_pack(name, cp, eob - cp,
+ (const u_char **)dnptrs,
+ (const u_char **)lastdnptr);
+
+ } else {
+ n = ns_name_pton("", name, sizeof name);
+ if (n != -1)
+ n = ns_name_pack(name, cp, eob - cp, NULL, NULL);
+ }
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Type, class, ttl, length (not filled in yet). */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(ns_t_tsig, cp);
+ PUTSHORT(ns_c_any, cp);
+ PUTLONG(0, cp); /*%< TTL */
+ lenp = cp;
+ cp += 2;
+
+ /* Alg. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ if (key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ n = dn_comp(NS_TSIG_ALG_HMAC_MD5, cp, eob - cp, NULL, NULL);
+ }
+ else
+ n = dn_comp("", cp, eob - cp, NULL, NULL);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ alg = cp;
+ cp += n;
+
+ /* Time. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(0, cp);
+ timesigned = time(NULL);
+ if (error != ns_r_badtime)
+ PUTLONG(timesigned, cp);
+ else
+ PUTLONG(in_timesigned, cp);
+ PUTSHORT(NS_TSIG_FUDGE, cp);
+
+ /* Compute the signature. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ void *ctx;
+ u_char buf[NS_MAXCDNAME], *cp2;
+ int n;
+
+ dst_sign_data(SIG_MODE_INIT, key, &ctx, NULL, 0, NULL, 0);
+
+ /* Digest the query signature, if this is a response. */
+ if (querysiglen > 0 && querysig != NULL) {
+ u_int16_t len_n = htons(querysiglen);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx,
+ (u_char *)&len_n, INT16SZ, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx,
+ querysig, querysiglen, NULL, 0);
+ }
+
+ /* Digest the message. */
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, msg, *msglen,
+ NULL, 0);
+
+ /* Digest the key name. */
+ n = ns_name_ntol(name, buf, sizeof(buf));
+ INSIST(n > 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the class and TTL. */
+ cp2 = buf;
+ PUTSHORT(ns_c_any, cp2);
+ PUTLONG(0, cp2);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, cp2-buf,
+ NULL, 0);
+
+ /* Digest the algorithm. */
+ n = ns_name_ntol(alg, buf, sizeof(buf));
+ INSIST(n > 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the time signed, fudge, error, and other data */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ if (error != ns_r_badtime)
+ PUTLONG(timesigned, cp2);
+ else
+ PUTLONG(in_timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+ PUTSHORT(error, cp2); /*%< Error */
+ if (error != ns_r_badtime)
+ PUTSHORT(0, cp2); /*%< Other data length */
+ else {
+ PUTSHORT(INT16SZ+INT32SZ, cp2); /*%< Other data length */
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp2);
+ }
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, cp2-buf,
+ NULL, 0);
+
+ n = dst_sign_data(SIG_MODE_FINAL, key, &ctx, NULL, 0,
+ sig, *siglen);
+ if (n < 0)
+ return (-ns_r_badkey);
+ *siglen = n;
+ } else
+ *siglen = 0;
+
+ /* Add the signature. */
+ BOUNDS_CHECK(cp, INT16SZ + (*siglen));
+ PUTSHORT(*siglen, cp);
+ memcpy(cp, sig, *siglen);
+ cp += (*siglen);
+
+ /* The original message ID & error. */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ);
+ PUTSHORT(ntohs(hp->id), cp); /*%< already in network order */
+ PUTSHORT(error, cp);
+
+ /* Other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ if (error != ns_r_badtime)
+ PUTSHORT(0, cp); /*%< Other data length */
+ else {
+ PUTSHORT(INT16SZ+INT32SZ, cp); /*%< Other data length */
+ BOUNDS_CHECK(cp, INT32SZ+INT16SZ);
+ PUTSHORT(0, cp); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp);
+ }
+
+ /* Go back and fill in the length. */
+ PUTSHORT(cp - lenp - INT16SZ, lenp);
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+ *msglen = (cp - msg);
+ return (0);
+}
+
+int
+ns_sign_tcp_init(void *k, const u_char *querysig, int querysiglen,
+ ns_tcp_tsig_state *state)
+{
+ dst_init();
+ if (state == NULL || k == NULL || querysig == NULL || querysiglen < 0)
+ return (-1);
+ state->counter = -1;
+ state->key = k;
+ if (state->key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (querysiglen > (int)sizeof(state->sig))
+ return (-1);
+ memcpy(state->sig, querysig, querysiglen);
+ state->siglen = querysiglen;
+ return (0);
+}
+
+int
+ns_sign_tcp(u_char *msg, int *msglen, int msgsize, int error,
+ ns_tcp_tsig_state *state, int done)
+{
+ return (ns_sign_tcp2(msg, msglen, msgsize, error, state,
+ done, NULL, NULL));
+}
+
+int
+ns_sign_tcp2(u_char *msg, int *msglen, int msgsize, int error,
+ ns_tcp_tsig_state *state, int done,
+ u_char **dnptrs, u_char **lastdnptr)
+{
+ u_char *cp, *eob, *lenp;
+ u_char buf[MAXDNAME], *cp2;
+ HEADER *hp = (HEADER *)msg;
+ time_t timesigned;
+ int n;
+
+ if (msg == NULL || msglen == NULL || state == NULL)
+ return (-1);
+
+ state->counter++;
+ if (state->counter == 0)
+ return (ns_sign2(msg, msglen, msgsize, error, state->key,
+ state->sig, state->siglen,
+ state->sig, &state->siglen, 0,
+ dnptrs, lastdnptr));
+
+ if (state->siglen > 0) {
+ u_int16_t siglen_n = htons(state->siglen);
+ dst_sign_data(SIG_MODE_INIT, state->key, &state->ctx,
+ NULL, 0, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ (u_char *)&siglen_n, INT16SZ, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ state->sig, state->siglen, NULL, 0);
+ state->siglen = 0;
+ }
+
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx, msg, *msglen,
+ NULL, 0);
+
+ if (done == 0 && (state->counter % 100 != 0))
+ return (0);
+
+ cp = msg + *msglen;
+ eob = msg + msgsize;
+
+ /* Name. */
+ n = dn_comp(state->key->dk_key_name, cp, eob - cp, dnptrs, lastdnptr);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Type, class, ttl, length (not filled in yet). */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(ns_t_tsig, cp);
+ PUTSHORT(ns_c_any, cp);
+ PUTLONG(0, cp); /*%< TTL */
+ lenp = cp;
+ cp += 2;
+
+ /* Alg. */
+ n = dn_comp(NS_TSIG_ALG_HMAC_MD5, cp, eob - cp, NULL, NULL);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Time. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(0, cp);
+ timesigned = time(NULL);
+ PUTLONG(timesigned, cp);
+ PUTSHORT(NS_TSIG_FUDGE, cp);
+
+ /*
+ * Compute the signature.
+ */
+
+ /* Digest the time signed and fudge. */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ buf, cp2 - buf, NULL, 0);
+
+ n = dst_sign_data(SIG_MODE_FINAL, state->key, &state->ctx, NULL, 0,
+ state->sig, sizeof(state->sig));
+ if (n < 0)
+ return (-ns_r_badkey);
+ state->siglen = n;
+
+ /* Add the signature. */
+ BOUNDS_CHECK(cp, INT16SZ + state->siglen);
+ PUTSHORT(state->siglen, cp);
+ memcpy(cp, state->sig, state->siglen);
+ cp += state->siglen;
+
+ /* The original message ID & error. */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ);
+ PUTSHORT(ntohs(hp->id), cp); /*%< already in network order */
+ PUTSHORT(error, cp);
+
+ /* Other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ PUTSHORT(0, cp);
+
+ /* Go back and fill in the length. */
+ PUTSHORT(cp - lenp - INT16SZ, lenp);
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+ *msglen = (cp - msg);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c
new file mode 100644
index 0000000000..69c2f83f57
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_ttl.c,v 1.4 2005/07/28 06:51:49 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static int fmt1(int t, char s, char **buf, size_t *buflen);
+
+/* Macros. */
+
+#define T(x) if ((x) < 0) return (-1); else (void)NULL
+
+/* Public. */
+
+int
+ns_format_ttl(u_long src, char *dst, size_t dstlen) {
+ char *odst = dst;
+ int secs, mins, hours, days, weeks, x;
+ char *p;
+
+ secs = src % 60; src /= 60;
+ mins = src % 60; src /= 60;
+ hours = src % 24; src /= 24;
+ days = src % 7; src /= 7;
+ weeks = src; src = 0;
+
+ x = 0;
+ if (weeks) {
+ T(fmt1(weeks, 'W', &dst, &dstlen));
+ x++;
+ }
+ if (days) {
+ T(fmt1(days, 'D', &dst, &dstlen));
+ x++;
+ }
+ if (hours) {
+ T(fmt1(hours, 'H', &dst, &dstlen));
+ x++;
+ }
+ if (mins) {
+ T(fmt1(mins, 'M', &dst, &dstlen));
+ x++;
+ }
+ if (secs || !(weeks || days || hours || mins)) {
+ T(fmt1(secs, 'S', &dst, &dstlen));
+ x++;
+ }
+
+ if (x > 1) {
+ int ch;
+
+ for (p = odst; (ch = *p) != '\0'; p++)
+ if (isascii(ch) && isupper(ch))
+ *p = tolower(ch);
+ }
+
+ return (dst - odst);
+}
+
+int
+ns_parse_ttl(const char *src, u_long *dst) {
+ u_long ttl, tmp;
+ int ch, digits, dirty;
+
+ ttl = 0;
+ tmp = 0;
+ digits = 0;
+ dirty = 0;
+ while ((ch = *src++) != '\0') {
+ if (!isascii(ch) || !isprint(ch))
+ goto einval;
+ if (isdigit(ch)) {
+ tmp *= 10;
+ tmp += (ch - '0');
+ digits++;
+ continue;
+ }
+ if (digits == 0)
+ goto einval;
+ if (islower(ch))
+ ch = toupper(ch);
+ switch (ch) {
+ case 'W': tmp *= 7;
+ case 'D': tmp *= 24;
+ case 'H': tmp *= 60;
+ case 'M': tmp *= 60;
+ case 'S': break;
+ default: goto einval;
+ }
+ ttl += tmp;
+ tmp = 0;
+ digits = 0;
+ dirty = 1;
+ }
+ if (digits > 0) {
+ if (dirty)
+ goto einval;
+ else
+ ttl += tmp;
+ } else if (!dirty)
+ goto einval;
+ *dst = ttl;
+ return (0);
+
+ einval:
+ errno = EINVAL;
+ return (-1);
+}
+
+/* Private. */
+
+static int
+fmt1(int t, char s, char **buf, size_t *buflen) {
+ char tmp[50];
+ size_t len;
+
+ len = SPRINTF((tmp, "%d%c", t, s));
+ if (len + 1 > *buflen)
+ return (-1);
+ strcpy(*buf, tmp);
+ *buf += len;
+ *buflen -= len;
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c
new file mode 100644
index 0000000000..b4b63d4641
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_verify.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/dst.h>
+
+#include "port_after.h"
+
+/* Private. */
+
+#define BOUNDS_CHECK(ptr, count) \
+ do { \
+ if ((ptr) + (count) > eom) { \
+ return (NS_TSIG_ERROR_FORMERR); \
+ } \
+ } while (0)
+
+/* Public. */
+
+u_char *
+ns_find_tsig(u_char *msg, u_char *eom) {
+ HEADER *hp = (HEADER *)msg;
+ int n, type;
+ u_char *cp = msg, *start;
+
+ if (msg == NULL || eom == NULL || msg > eom)
+ return (NULL);
+
+ if (cp + HFIXEDSZ >= eom)
+ return (NULL);
+
+ if (hp->arcount == 0)
+ return (NULL);
+
+ cp += HFIXEDSZ;
+
+ n = ns_skiprr(cp, eom, ns_s_qd, ntohs(hp->qdcount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_an, ntohs(hp->ancount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_ns, ntohs(hp->nscount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_ar, ntohs(hp->arcount) - 1);
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ start = cp;
+ n = dn_skipname(cp, eom);
+ if (n < 0)
+ return (NULL);
+ cp += n;
+ if (cp + INT16SZ >= eom)
+ return (NULL);
+
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NULL);
+ return (start);
+}
+
+/* ns_verify
+ *
+ * Parameters:
+ *\li statp res stuff
+ *\li msg received message
+ *\li msglen length of message
+ *\li key tsig key used for verifying.
+ *\li querysig (response), the signature in the query
+ *\li querysiglen (response), the length of the signature in the query
+ *\li sig (query), a buffer to hold the signature
+ *\li siglen (query), input - length of signature buffer
+ * output - length of signature
+ *
+ * Errors:
+ *\li - bad input (-1)
+ *\li - invalid dns message (NS_TSIG_ERROR_FORMERR)
+ *\li - TSIG is not present (NS_TSIG_ERROR_NO_TSIG)
+ *\li - key doesn't match (-ns_r_badkey)
+ *\li - TSIG verification fails with BADKEY (-ns_r_badkey)
+ *\li - TSIG verification fails with BADSIG (-ns_r_badsig)
+ *\li - TSIG verification fails with BADTIME (-ns_r_badtime)
+ *\li - TSIG verification succeeds, error set to BAKEY (ns_r_badkey)
+ *\li - TSIG verification succeeds, error set to BADSIG (ns_r_badsig)
+ *\li - TSIG verification succeeds, error set to BADTIME (ns_r_badtime)
+ */
+int
+ns_verify(u_char *msg, int *msglen, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t *timesigned, int nostrip)
+{
+ HEADER *hp = (HEADER *)msg;
+ DST_KEY *key = (DST_KEY *)k;
+ u_char *cp = msg, *eom;
+ char name[MAXDNAME], alg[MAXDNAME];
+ u_char *recstart, *rdatastart;
+ u_char *sigstart, *otherstart;
+ int n;
+ int error;
+ u_int16_t type, length;
+ u_int16_t fudge, sigfieldlen, otherfieldlen;
+
+ dst_init();
+ if (msg == NULL || msglen == NULL || *msglen < 0)
+ return (-1);
+
+ eom = msg + *msglen;
+
+ recstart = ns_find_tsig(msg, eom);
+ if (recstart == NULL)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ cp = recstart;
+
+ /* Read the key name. */
+ n = dn_expand(msg, eom, cp, name, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ cp += n;
+
+ /* Read the type. */
+ BOUNDS_CHECK(cp, 2*INT16SZ + INT32SZ + INT16SZ);
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ /* Skip the class and TTL, save the length. */
+ cp += INT16SZ + INT32SZ;
+ GETSHORT(length, cp);
+ if (eom - cp != length)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Read the algorithm name. */
+ rdatastart = cp;
+ n = dn_expand(msg, eom, cp, alg, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (ns_samename(alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ return (-ns_r_badkey);
+ cp += n;
+
+ /* Read the time signed and fudge. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ cp += INT16SZ;
+ GETLONG((*timesigned), cp);
+ GETSHORT(fudge, cp);
+
+ /* Read the signature. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(sigfieldlen, cp);
+ BOUNDS_CHECK(cp, sigfieldlen);
+ sigstart = cp;
+ cp += sigfieldlen;
+
+ /* Skip id and read error. */
+ BOUNDS_CHECK(cp, 2*INT16SZ);
+ cp += INT16SZ;
+ GETSHORT(error, cp);
+
+ /* Parse the other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(otherfieldlen, cp);
+ BOUNDS_CHECK(cp, otherfieldlen);
+ otherstart = cp;
+ cp += otherfieldlen;
+
+ if (cp != eom)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Verify that the key used is OK. */
+ if (key != NULL) {
+ if (key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (error != ns_r_badsig && error != ns_r_badkey) {
+ if (ns_samename(key->dk_key_name, name) != 1)
+ return (-ns_r_badkey);
+ }
+ }
+
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+
+ /*
+ * Do the verification.
+ */
+
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ void *ctx;
+ u_char buf[MAXDNAME];
+ u_char buf2[MAXDNAME];
+
+ /* Digest the query signature, if this is a response. */
+ dst_verify_data(SIG_MODE_INIT, key, &ctx, NULL, 0, NULL, 0);
+ if (querysiglen > 0 && querysig != NULL) {
+ u_int16_t len_n = htons(querysiglen);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ (u_char *)&len_n, INT16SZ, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ querysig, querysiglen, NULL, 0);
+ }
+
+ /* Digest the message. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, msg, recstart - msg,
+ NULL, 0);
+
+ /* Digest the key name. */
+ n = ns_name_pton(name, buf2, sizeof(buf2));
+ if (n < 0)
+ return (-1);
+ n = ns_name_ntol(buf2, buf, sizeof(buf));
+ if (n < 0)
+ return (-1);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the class and TTL. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ recstart + dn_skipname(recstart, eom) + INT16SZ,
+ INT16SZ + INT32SZ, NULL, 0);
+
+ /* Digest the algorithm. */
+ n = ns_name_pton(alg, buf2, sizeof(buf2));
+ if (n < 0)
+ return (-1);
+ n = ns_name_ntol(buf2, buf, sizeof(buf));
+ if (n < 0)
+ return (-1);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the time signed and fudge. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ rdatastart + dn_skipname(rdatastart, eom),
+ INT16SZ + INT32SZ + INT16SZ, NULL, 0);
+
+ /* Digest the error and other data. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ otherstart - INT16SZ - INT16SZ,
+ otherfieldlen + INT16SZ + INT16SZ, NULL, 0);
+
+ n = dst_verify_data(SIG_MODE_FINAL, key, &ctx, NULL, 0,
+ sigstart, sigfieldlen);
+
+ if (n < 0)
+ return (-ns_r_badsig);
+
+ if (sig != NULL && siglen != NULL) {
+ if (*siglen < sigfieldlen)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ memcpy(sig, sigstart, sigfieldlen);
+ *siglen = sigfieldlen;
+ }
+ } else {
+ if (sigfieldlen > 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (sig != NULL && siglen != NULL)
+ *siglen = 0;
+ }
+
+ /* Reset the counter, since we still need to check for badtime. */
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+
+ /* Verify the time. */
+ if (abs((*timesigned) - time(NULL)) > fudge)
+ return (-ns_r_badtime);
+
+ if (nostrip == 0) {
+ *msglen = recstart - msg;
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+ }
+
+ if (error != NOERROR)
+ return (error);
+
+ return (0);
+}
+
+int
+ns_verify_tcp_init(void *k, const u_char *querysig, int querysiglen,
+ ns_tcp_tsig_state *state)
+{
+ dst_init();
+ if (state == NULL || k == NULL || querysig == NULL || querysiglen < 0)
+ return (-1);
+ state->counter = -1;
+ state->key = k;
+ if (state->key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (querysiglen > (int)sizeof(state->sig))
+ return (-1);
+ memcpy(state->sig, querysig, querysiglen);
+ state->siglen = querysiglen;
+ return (0);
+}
+
+int
+ns_verify_tcp(u_char *msg, int *msglen, ns_tcp_tsig_state *state,
+ int required)
+{
+ HEADER *hp = (HEADER *)msg;
+ u_char *recstart, *sigstart;
+ unsigned int sigfieldlen, otherfieldlen;
+ u_char *cp, *eom, *cp2;
+ char name[MAXDNAME], alg[MAXDNAME];
+ u_char buf[MAXDNAME];
+ int n, type, length, fudge, error;
+ time_t timesigned;
+
+ if (msg == NULL || msglen == NULL || state == NULL)
+ return (-1);
+
+ eom = msg + *msglen;
+
+ state->counter++;
+ if (state->counter == 0)
+ return (ns_verify(msg, msglen, state->key,
+ state->sig, state->siglen,
+ state->sig, &state->siglen, &timesigned, 0));
+
+ if (state->siglen > 0) {
+ u_int16_t siglen_n = htons(state->siglen);
+
+ dst_verify_data(SIG_MODE_INIT, state->key, &state->ctx,
+ NULL, 0, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ (u_char *)&siglen_n, INT16SZ, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ state->sig, state->siglen, NULL, 0);
+ state->siglen = 0;
+ }
+
+ cp = recstart = ns_find_tsig(msg, eom);
+
+ if (recstart == NULL) {
+ if (required)
+ return (NS_TSIG_ERROR_NO_TSIG);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ msg, *msglen, NULL, 0);
+ return (0);
+ }
+
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ msg, recstart - msg, NULL, 0);
+
+ /* Read the key name. */
+ n = dn_expand(msg, eom, cp, name, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ cp += n;
+
+ /* Read the type. */
+ BOUNDS_CHECK(cp, 2*INT16SZ + INT32SZ + INT16SZ);
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ /* Skip the class and TTL, save the length. */
+ cp += INT16SZ + INT32SZ;
+ GETSHORT(length, cp);
+ if (eom - cp != length)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Read the algorithm name. */
+ n = dn_expand(msg, eom, cp, alg, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (ns_samename(alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ return (-ns_r_badkey);
+ cp += n;
+
+ /* Verify that the key used is OK. */
+ if ((ns_samename(state->key->dk_key_name, name) != 1 ||
+ state->key->dk_alg != KEY_HMAC_MD5))
+ return (-ns_r_badkey);
+
+ /* Read the time signed and fudge. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ cp += INT16SZ;
+ GETLONG(timesigned, cp);
+ GETSHORT(fudge, cp);
+
+ /* Read the signature. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(sigfieldlen, cp);
+ BOUNDS_CHECK(cp, sigfieldlen);
+ sigstart = cp;
+ cp += sigfieldlen;
+
+ /* Skip id and read error. */
+ BOUNDS_CHECK(cp, 2*INT16SZ);
+ cp += INT16SZ;
+ GETSHORT(error, cp);
+
+ /* Parse the other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(otherfieldlen, cp);
+ BOUNDS_CHECK(cp, otherfieldlen);
+ cp += otherfieldlen;
+
+ if (cp != eom)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /*
+ * Do the verification.
+ */
+
+ /* Digest the time signed and fudge. */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time. */
+ PUTLONG(timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ buf, cp2 - buf, NULL, 0);
+
+ n = dst_verify_data(SIG_MODE_FINAL, state->key, &state->ctx, NULL, 0,
+ sigstart, sigfieldlen);
+ if (n < 0)
+ return (-ns_r_badsig);
+
+ if (sigfieldlen > sizeof(state->sig))
+ return (NS_TSIG_ERROR_NO_SPACE);
+
+ memcpy(state->sig, sigstart, sigfieldlen);
+ state->siglen = sigfieldlen;
+
+ /* Verify the time. */
+ if (abs(timesigned - time(NULL)) > fudge)
+ return (-ns_r_badtime);
+
+ *msglen = recstart - msg;
+
+ if (error != NOERROR)
+ return (error);
+
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/herror.c b/usr/src/lib/libresolv2_joy/common/resolv/herror.c
new file mode 100644
index 0000000000..568ce3ce68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/herror.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)herror.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: herror.c,v 1.4 2005/04/27 04:56:41 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <unistd.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+const char *h_errlist[] = {
+ "Resolver Error 0 (no error)",
+ "Unknown host", /*%< 1 HOST_NOT_FOUND */
+ "Host name lookup failure", /*%< 2 TRY_AGAIN */
+ "Unknown server error", /*%< 3 NO_RECOVERY */
+ "No address associated with name", /*%< 4 NO_ADDRESS */
+};
+int h_nerr = { sizeof h_errlist / sizeof h_errlist[0] };
+
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+#undef h_errno
+int h_errno;
+#endif
+
+/*%
+ * herror --
+ * print the error indicated by the h_errno value.
+ */
+void
+herror(const char *s) {
+ struct iovec iov[4], *v = iov;
+ char *t;
+
+ if (s != NULL && *s != '\0') {
+ DE_CONST(s, t);
+ v->iov_base = t;
+ v->iov_len = strlen(t);
+ v++;
+ DE_CONST(": ", t);
+ v->iov_base = t;
+ v->iov_len = 2;
+ v++;
+ }
+ DE_CONST(hstrerror(*__h_errno()), t);
+ v->iov_base = t;
+ v->iov_len = strlen(v->iov_base);
+ v++;
+ DE_CONST("\n", t);
+ v->iov_base = t;
+ v->iov_len = 1;
+ writev(STDERR_FILENO, iov, (v - iov) + 1);
+}
+
+/*%
+ * hstrerror --
+ * return the string associated with a given "host" errno value.
+ */
+const char *
+hstrerror(int err) {
+ if (err < 0)
+ return ("Resolver internal error");
+ else if (err < h_nerr)
+ return (h_errlist[err]);
+ return ("Unknown resolver error");
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c b/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c
new file mode 100644
index 0000000000..2e79b1e7b4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <port_before.h>
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#endif
+#include <errno.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <resolv_mt.h>
+#include <irs.h>
+#include <port_after.h>
+
+#ifdef DO_PTHREADS
+static pthread_key_t key;
+static int mt_key_initialized = 0;
+
+static int __res_init_ctx(void);
+static void __res_destroy_ctx(void *);
+
+#if defined(sun) && !defined(__GNUC__)
+#pragma init (_mtctxres_init)
+#endif
+#endif
+
+static mtctxres_t sharedctx;
+
+#ifdef DO_PTHREADS
+/*
+ * Initialize the TSD key. By doing this at library load time, we're
+ * implicitly running without interference from other threads, so there's
+ * no need for locking.
+ */
+static void
+_mtctxres_init(void) {
+ int pthread_keycreate_ret;
+
+ pthread_keycreate_ret = pthread_key_create(&key, __res_destroy_ctx);
+ if (pthread_keycreate_ret == 0)
+ mt_key_initialized = 1;
+}
+#endif
+
+/*
+ * To support binaries that used the private MT-safe interface in
+ * Solaris 8, we still need to provide the __res_enable_mt()
+ * and __res_disable_mt() entry points. They're do-nothing routines.
+ */
+int
+__res_enable_mt(void) {
+ return (-1);
+}
+
+int
+__res_disable_mt(void) {
+ return (0);
+}
+
+#ifdef DO_PTHREADS
+static int
+__res_init_ctx(void) {
+
+ mtctxres_t *mt;
+ int ret;
+
+
+ if (pthread_getspecific(key) != 0) {
+ /* Already exists */
+ return (0);
+ }
+
+ if ((mt = malloc(sizeof (mtctxres_t))) == 0) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ memset(mt, 0, sizeof (mtctxres_t));
+
+ if ((ret = pthread_setspecific(key, mt)) != 0) {
+ free(mt);
+ errno = ret;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+__res_destroy_ctx(void *value) {
+
+ mtctxres_t *mt = (mtctxres_t *)value;
+
+ if (mt != 0)
+ free(mt);
+}
+#endif
+
+mtctxres_t *
+___mtctxres(void) {
+#ifdef DO_PTHREADS
+ mtctxres_t *mt;
+
+ /*
+ * This if clause should only be executed if we are linking
+ * statically. When linked dynamically _mtctxres_init() should
+ * be called at binding time due the #pragma above.
+ */
+ if (!mt_key_initialized) {
+ static pthread_mutex_t keylock = PTHREAD_MUTEX_INITIALIZER;
+ if (pthread_mutex_lock(&keylock) == 0) {
+ _mtctxres_init();
+ (void) pthread_mutex_unlock(&keylock);
+ }
+ }
+
+ /*
+ * If we have already been called in this thread return the existing
+ * context. Otherwise recreat a new context and return it. If
+ * that fails return a global context.
+ */
+ if (mt_key_initialized) {
+ if (((mt = pthread_getspecific(key)) != 0) ||
+ (__res_init_ctx() == 0 &&
+ (mt = pthread_getspecific(key)) != 0)) {
+ return (mt);
+ }
+ }
+#endif
+ return (&sharedctx);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c b/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c
new file mode 100644
index 0000000000..c82bf01eb8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_comp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_comp.c,v 1.5 2005/07/28 06:51:50 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <ctype.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __dn_skipname = dn_skipname
+#pragma weak __res_dnok = res_dnok
+#pragma weak __res_hnok = res_hnok
+#pragma weak __res_mailok = res_mailok
+#pragma weak __res_ownok = res_ownok
+#endif /* ORIGINAL_ISC_CODE */
+
+/*%
+ * Expand compressed domain name 'src' to full domain name.
+ *
+ * \li 'msg' is a pointer to the begining of the message,
+ * \li 'eom' points to the first location after the message,
+ * \li 'dst' is a pointer to a buffer of size 'dstsiz' for the result.
+ * \li Return size of compressed name or -1 if there was an error.
+ */
+int
+dn_expand(const u_char *msg, const u_char *eom, const u_char *src,
+ char *dst, int dstsiz)
+{
+ int n = ns_name_uncompress(msg, eom, src, dst, (size_t)dstsiz);
+
+ if (n > 0 && dst[0] == '.')
+ dst[0] = '\0';
+ return (n);
+}
+
+/*%
+ * Pack domain name 'exp_dn' in presentation form into 'comp_dn'.
+ *
+ * \li Return the size of the compressed name or -1.
+ * \li 'length' is the size of the array pointed to by 'comp_dn'.
+ */
+int
+dn_comp(const char *src, u_char *dst, int dstsiz,
+ u_char **dnptrs, u_char **lastdnptr)
+{
+ return (ns_name_compress(src, dst, (size_t)dstsiz,
+ (const u_char **)dnptrs,
+ (const u_char **)lastdnptr));
+}
+
+
+/*%
+ * Skip over a compressed domain name. Return the size or -1.
+ */
+int
+dn_skipname(const u_char *ptr, const u_char *eom) {
+ const u_char *saveptr = ptr;
+
+ if (ns_name_skip(&ptr, eom) == -1)
+ return (-1);
+ return (ptr - saveptr);
+}
+
+/*%
+ * Verify that a domain name uses an acceptable character set.
+ *
+ * Note the conspicuous absence of ctype macros in these definitions. On
+ * non-ASCII hosts, we can't depend on string literals or ctype macros to
+ * tell us anything about network-format data. The rest of the BIND system
+ * is not careful about this, but for some reason, we're doing it right here.
+ */
+#define PERIOD 0x2e
+#define hyphenchar(c) ((c) == 0x2d)
+#define bslashchar(c) ((c) == 0x5c)
+#ifdef SUNW_HNOK_UNDERSCORE
+#define underscorechar(c) ((c) == 0x5f)
+#endif /* SUNW_HNOK_UNDERSCORE */
+#define periodchar(c) ((c) == PERIOD)
+#define asterchar(c) ((c) == 0x2a)
+#define alphachar(c) (((c) >= 0x41 && (c) <= 0x5a) \
+ || ((c) >= 0x61 && (c) <= 0x7a))
+#define digitchar(c) ((c) >= 0x30 && (c) <= 0x39)
+
+#define borderchar(c) (alphachar(c) || digitchar(c))
+#ifdef SUNW_HNOK_UNDERSCORE
+#define middlechar(c) (borderchar(c) || hyphenchar(c) || underscorechar(c))
+#else
+#define middlechar(c) (borderchar(c) || hyphenchar(c))
+#endif /* SUNW_HNOK_UNDERSCORE */
+#define domainchar(c) ((c) > 0x20 && (c) < 0x7f)
+
+int
+res_hnok(const char *dn) {
+ int pch = PERIOD, ch = *dn++;
+
+ while (ch != '\0') {
+ int nch = *dn++;
+
+ if (periodchar(ch)) {
+ (void)NULL;
+ } else if (periodchar(pch)) {
+ if (!borderchar(ch))
+ return (0);
+ } else if (periodchar(nch) || nch == '\0') {
+ if (!borderchar(ch))
+ return (0);
+ } else {
+ if (!middlechar(ch))
+ return (0);
+ }
+ pch = ch, ch = nch;
+ }
+ return (1);
+}
+
+/*%
+ * hostname-like (A, MX, WKS) owners can have "*" as their first label
+ * but must otherwise be as a host name.
+ */
+int
+res_ownok(const char *dn) {
+ if (asterchar(dn[0])) {
+ if (periodchar(dn[1]))
+ return (res_hnok(dn+2));
+ if (dn[1] == '\0')
+ return (1);
+ }
+ return (res_hnok(dn));
+}
+
+/*%
+ * SOA RNAMEs and RP RNAMEs can have any printable character in their first
+ * label, but the rest of the name has to look like a host name.
+ */
+int
+res_mailok(const char *dn) {
+ int ch, escaped = 0;
+
+ /* "." is a valid missing representation */
+ if (*dn == '\0')
+ return (1);
+
+ /* otherwise <label>.<hostname> */
+ while ((ch = *dn++) != '\0') {
+ if (!domainchar(ch))
+ return (0);
+ if (!escaped && periodchar(ch))
+ break;
+ if (escaped)
+ escaped = 0;
+ else if (bslashchar(ch))
+ escaped = 1;
+ }
+ if (periodchar(ch))
+ return (res_hnok(dn));
+ return (0);
+}
+
+/*%
+ * This function is quite liberal, since RFC1034's character sets are only
+ * recommendations.
+ */
+int
+res_dnok(const char *dn) {
+ int ch;
+
+ while ((ch = *dn++) != '\0')
+ if (!domainchar(ch))
+ return (0);
+ return (1);
+}
+
+#ifdef BIND_4_COMPAT
+/*%
+ * This module must export the following externally-visible symbols:
+ * ___putlong
+ * ___putshort
+ * __getlong
+ * __getshort
+ * Note that one _ comes from C and the others come from us.
+ */
+
+#ifdef SOLARIS2
+#ifdef __putlong
+#undef __putlong
+#endif
+#ifdef __putshort
+#undef __putshort
+#endif
+#pragma weak putlong = __putlong
+#pragma weak putshort = __putshort
+#endif /* SOLARIS2 */
+
+void __putlong(u_int32_t src, u_char *dst) { ns_put32(src, dst); }
+void __putshort(u_int16_t src, u_char *dst) { ns_put16(src, dst); }
+#ifndef __ultrix__
+u_int32_t _getlong(const u_char *src) { return (ns_get32(src)); }
+u_int16_t _getshort(const u_char *src) { return (ns_get16(src)); }
+#endif /*__ultrix__*/
+#endif /*BIND_4_COMPAT*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_data.c b/usr/src/lib/libresolv2_joy/common/resolv/res_data.c
new file mode 100644
index 0000000000..f1e0dd030b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_data.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: res_data.c,v 1.7 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <res_update.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __fp_nquery = fp_nquery
+#pragma weak __fp_query = fp_query
+#pragma weak __p_query = p_query
+#pragma weak __hostalias = hostalias
+#pragma weak __res_randomid = res_randomid
+#endif
+
+const char *_res_opcodes[] = {
+ "QUERY",
+ "IQUERY",
+ "CQUERYM",
+ "CQUERYU", /*%< experimental */
+ "NOTIFY", /*%< experimental */
+ "UPDATE",
+ "6",
+ "7",
+ "8",
+ "9",
+ "10",
+ "11",
+ "12",
+ "13",
+ "ZONEINIT",
+ "ZONEREF",
+};
+
+#ifdef BIND_UPDATE
+const char *_res_sectioncodes[] = {
+ "ZONE",
+ "PREREQUISITES",
+ "UPDATE",
+ "ADDITIONAL",
+};
+#endif
+
+#undef _res
+#ifndef __BIND_NOSTATIC
+struct __res_state _res
+# if defined(__BIND_RES_TEXT)
+ = { RES_TIMEOUT, } /*%< Motorola, et al. */
+# endif
+ ;
+
+#ifdef ORIGINAL_ISC_CODE
+#if defined(DO_PTHREADS) || defined(__linux)
+#define _res (*__res_state())
+#endif
+#endif
+
+/* Proto. */
+
+int res_ourserver_p(const res_state, const struct sockaddr_in *);
+
+int
+res_init(void) {
+ extern int __res_vinit(res_state, int);
+
+ /*
+ * These three fields used to be statically initialized. This made
+ * it hard to use this code in a shared library. It is necessary,
+ * now that we're doing dynamic initialization here, that we preserve
+ * the old semantics: if an application modifies one of these three
+ * fields of _res before res_init() is called, res_init() will not
+ * alter them. Of course, if an application is setting them to
+ * _zero_ before calling res_init(), hoping to override what used
+ * to be the static default, we can't detect it and unexpected results
+ * will follow. Zero for any of these fields would make no sense,
+ * so one can safely assume that the applications were already getting
+ * unexpected results.
+ *
+ * _res.options is tricky since some apps were known to diddle the bits
+ * before res_init() was first called. We can't replicate that semantic
+ * with dynamic initialization (they may have turned bits off that are
+ * set in RES_DEFAULT). Our solution is to declare such applications
+ * "broken". They could fool us by setting RES_INIT but none do (yet).
+ */
+ if (!_res.retrans)
+ _res.retrans = RES_TIMEOUT;
+ if (!_res.retry)
+ _res.retry = 4;
+ if (!(_res.options & RES_INIT))
+ _res.options = RES_DEFAULT;
+
+ /*
+ * This one used to initialize implicitly to zero, so unless the app
+ * has set it to something in particular, we can randomize it now.
+ */
+ if (!_res.id)
+ _res.id = res_nrandomid(&_res);
+
+ return (__res_vinit(&_res, 1));
+}
+
+void
+p_query(const u_char *msg) {
+ fp_query(msg, stdout);
+}
+
+void
+fp_query(const u_char *msg, FILE *file) {
+ fp_nquery(msg, PACKETSZ, file);
+}
+
+void
+fp_nquery(const u_char *msg, int len, FILE *file) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1)
+ return;
+
+ res_pquery(&_res, msg, len, file);
+}
+
+int
+res_mkquery(int op, /*!< opcode of query */
+ const char *dname, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ const u_char *data, /*!< resource record data */
+ int datalen, /*!< length of data */
+ const u_char *newrr_in, /*!< new rr for modify or append */
+ u_char *buf, /*!< buffer to put query */
+ int buflen) /*!< size of buffer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+ return (res_nmkquery(&_res, op, dname, class, type,
+ data, datalen,
+ newrr_in, buf, buflen));
+}
+
+int
+res_mkupdate(ns_updrec *rrecp_in, u_char *buf, int buflen) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nmkupdate(&_res, rrecp_in, buf, buflen));
+}
+
+int
+res_query(const char *name, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer buffer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+ return (res_nquery(&_res, name, class, type, answer, anslen));
+}
+
+void
+res_send_setqhook(res_send_qhook hook) {
+ _res.qhook = hook;
+}
+
+void
+res_send_setrhook(res_send_rhook hook) {
+ _res.rhook = hook;
+}
+
+int
+res_isourserver(const struct sockaddr_in *inp) {
+ return (res_ourserver_p(&_res, inp));
+}
+
+int
+res_send(const u_char *buf, int buflen, u_char *ans, int anssiz) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ /* errno should have been set by res_init() in this case. */
+ return (-1);
+ }
+
+ return (res_nsend(&_res, buf, buflen, ans, anssiz));
+}
+
+int
+res_sendsigned(const u_char *buf, int buflen, ns_tsig_key *key,
+ u_char *ans, int anssiz)
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ /* errno should have been set by res_init() in this case. */
+ return (-1);
+ }
+
+ return (res_nsendsigned(&_res, buf, buflen, key, ans, anssiz));
+}
+
+void
+res_close(void) {
+ res_nclose(&_res);
+}
+
+int
+res_update(ns_updrec *rrecp_in) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nupdate(&_res, rrecp_in, NULL));
+}
+
+int
+res_search(const char *name, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nsearch(&_res, name, class, type, answer, anslen));
+}
+
+int
+res_querydomain(const char *name,
+ const char *domain,
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nquerydomain(&_res, name, domain,
+ class, type,
+ answer, anslen));
+}
+
+u_int
+res_randomid(void) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nrandomid(&_res));
+}
+
+const char *
+hostalias(const char *name) {
+ static char abuf[MAXDNAME];
+
+ return (res_hostalias(&_res, name, abuf, sizeof abuf));
+}
+
+#ifdef ultrix
+int
+local_hostname_length(const char *hostname) {
+ int len_host, len_domain;
+
+ if (!*_res.defdname)
+ res_init();
+ len_host = strlen(hostname);
+ len_domain = strlen(_res.defdname);
+ if (len_host > len_domain &&
+ !strcasecmp(hostname + len_host - len_domain, _res.defdname) &&
+ hostname[len_host - len_domain - 1] == '.')
+ return (len_host - len_domain - 1);
+ return (0);
+}
+#endif /*ultrix*/
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c
new file mode 100644
index 0000000000..e11fb29612
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c
@@ -0,0 +1,1252 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004, 2005, 2008, 2009 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_debug.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_debug.c,v 1.19 2009/02/26 11:20:20 tbox Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <math.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <resolv_mt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+extern const char *_res_opcodes[];
+extern const char *_res_sectioncodes[];
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __dn_count_labels = dn_count_labels
+#pragma weak __fp_resstat = fp_resstat
+#pragma weak __loc_aton = loc_aton
+#pragma weak __loc_ntoa = loc_ntoa
+#pragma weak __p_cdname = p_cdname
+#pragma weak __p_class = p_class
+#pragma weak __p_section = p_section
+#pragma weak __p_time = p_time
+#pragma weak __p_type = p_type
+#pragma weak __sym_ntop = sym_ntop
+#pragma weak __sym_ntos = sym_ntos
+#pragma weak __sym_ston = sym_ston
+#endif /* ORIGINAL_ISC_CODE */
+
+/*%
+ * Print the current options.
+ */
+void
+fp_resstat(const res_state statp, FILE *file) {
+ u_long mask;
+
+ fprintf(file, ";; res options:");
+ for (mask = 1; mask != 0U; mask <<= 1)
+ if (statp->options & mask)
+ fprintf(file, " %s", p_option(mask));
+ putc('\n', file);
+}
+
+static void
+do_section(const res_state statp,
+ ns_msg *handle, ns_sect section,
+ int pflag, FILE *file)
+{
+ int n, sflag, rrnum;
+ static int buflen = 2048;
+ char *buf;
+ ns_opcode opcode;
+ ns_rr rr;
+
+ /*
+ * Print answer records.
+ */
+ sflag = (statp->pfcode & pflag);
+ if (statp->pfcode && !sflag)
+ return;
+
+ buf = malloc(buflen);
+ if (buf == NULL) {
+ fprintf(file, ";; memory allocation failure\n");
+ return;
+ }
+
+ opcode = (ns_opcode) ns_msg_getflag(*handle, ns_f_opcode);
+ rrnum = 0;
+ for (;;) {
+ if (ns_parserr(handle, section, rrnum, &rr)) {
+ if (errno != ENODEV)
+ fprintf(file, ";; ns_parserr: %s\n",
+ strerror(errno));
+ else if (rrnum > 0 && sflag != 0 &&
+ (statp->pfcode & RES_PRF_HEAD1))
+ putc('\n', file);
+ goto cleanup;
+ }
+ if (rrnum == 0 && sflag != 0 && (statp->pfcode & RES_PRF_HEAD1))
+ fprintf(file, ";; %s SECTION:\n",
+ p_section(section, opcode));
+ if (section == ns_s_qd)
+ fprintf(file, ";;\t%s, type = %s, class = %s\n",
+ ns_rr_name(rr),
+ p_type(ns_rr_type(rr)),
+ p_class(ns_rr_class(rr)));
+ else if (section == ns_s_ar && ns_rr_type(rr) == ns_t_opt) {
+ u_int16_t optcode, optlen, rdatalen = ns_rr_rdlen(rr);
+ u_int32_t ttl = ns_rr_ttl(rr);
+
+ fprintf(file,
+ "; EDNS: version: %u, udp=%u, flags=%04x\n",
+ (ttl>>16)&0xff, ns_rr_class(rr), ttl&0xffff);
+
+ while (rdatalen >= 4) {
+ const u_char *cp = ns_rr_rdata(rr);
+ int i;
+
+ GETSHORT(optcode, cp);
+ GETSHORT(optlen, cp);
+
+ if (optcode == NS_OPT_NSID) {
+ fputs("; NSID: ", file);
+ if (optlen == 0) {
+ fputs("; NSID\n", file);
+ } else {
+ fputs("; NSID: ", file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%02x ",
+ cp[i]);
+ fputs(" (",file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%c",
+ isprint(cp[i])?
+ cp[i] : '.');
+ fputs(")\n", file);
+ }
+ } else {
+ if (optlen == 0) {
+ fprintf(file, "; OPT=%u\n",
+ optcode);
+ } else {
+ fprintf(file, "; OPT=%u: ",
+ optcode);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%02x ",
+ cp[i]);
+ fputs(" (",file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%c",
+ isprint(cp[i]) ?
+ cp[i] : '.');
+ fputs(")\n", file);
+ }
+ }
+ rdatalen -= 4 + optlen;
+ }
+ } else {
+ n = ns_sprintrr(handle, &rr, NULL, NULL,
+ buf, buflen);
+ if (n < 0) {
+ if (errno == ENOSPC) {
+ free(buf);
+ buf = NULL;
+ if (buflen < 131072)
+ buf = malloc(buflen += 1024);
+ if (buf == NULL) {
+ fprintf(file,
+ ";; memory allocation failure\n");
+ return;
+ }
+ continue;
+ }
+ fprintf(file, ";; ns_sprintrr: %s\n",
+ strerror(errno));
+ goto cleanup;
+ }
+ fputs(buf, file);
+ fputc('\n', file);
+ }
+ rrnum++;
+ }
+ cleanup:
+ if (buf != NULL)
+ free(buf);
+}
+
+/*%
+ * Print the contents of a query.
+ * This is intended to be primarily a debugging routine.
+ */
+void
+res_pquery(const res_state statp, const u_char *msg, int len, FILE *file) {
+ ns_msg handle;
+ int qdcount, ancount, nscount, arcount;
+ u_int opcode, rcode, id;
+
+ if (ns_initparse(msg, len, &handle) < 0) {
+ fprintf(file, ";; ns_initparse: %s\n", strerror(errno));
+ return;
+ }
+ opcode = ns_msg_getflag(handle, ns_f_opcode);
+ rcode = ns_msg_getflag(handle, ns_f_rcode);
+ id = ns_msg_id(handle);
+ qdcount = ns_msg_count(handle, ns_s_qd);
+ ancount = ns_msg_count(handle, ns_s_an);
+ nscount = ns_msg_count(handle, ns_s_ns);
+ arcount = ns_msg_count(handle, ns_s_ar);
+
+ /*
+ * Print header fields.
+ */
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX) || rcode)
+ fprintf(file,
+ ";; ->>HEADER<<- opcode: %s, status: %s, id: %d\n",
+ _res_opcodes[opcode], p_rcode(rcode), id);
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX))
+ putc(';', file);
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD2)) {
+ fprintf(file, "; flags:");
+ if (ns_msg_getflag(handle, ns_f_qr))
+ fprintf(file, " qr");
+ if (ns_msg_getflag(handle, ns_f_aa))
+ fprintf(file, " aa");
+ if (ns_msg_getflag(handle, ns_f_tc))
+ fprintf(file, " tc");
+ if (ns_msg_getflag(handle, ns_f_rd))
+ fprintf(file, " rd");
+ if (ns_msg_getflag(handle, ns_f_ra))
+ fprintf(file, " ra");
+ if (ns_msg_getflag(handle, ns_f_z))
+ fprintf(file, " ??");
+ if (ns_msg_getflag(handle, ns_f_ad))
+ fprintf(file, " ad");
+ if (ns_msg_getflag(handle, ns_f_cd))
+ fprintf(file, " cd");
+ }
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD1)) {
+ fprintf(file, "; %s: %d",
+ p_section(ns_s_qd, opcode), qdcount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_an, opcode), ancount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_ns, opcode), nscount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_ar, opcode), arcount);
+ }
+ if ((!statp->pfcode) || (statp->pfcode &
+ (RES_PRF_HEADX | RES_PRF_HEAD2 | RES_PRF_HEAD1))) {
+ putc('\n',file);
+ }
+ /*
+ * Print the various sections.
+ */
+ do_section(statp, &handle, ns_s_qd, RES_PRF_QUES, file);
+ do_section(statp, &handle, ns_s_an, RES_PRF_ANS, file);
+ do_section(statp, &handle, ns_s_ns, RES_PRF_AUTH, file);
+ do_section(statp, &handle, ns_s_ar, RES_PRF_ADD, file);
+ if (qdcount == 0 && ancount == 0 &&
+ nscount == 0 && arcount == 0)
+ putc('\n', file);
+}
+
+const u_char *
+p_cdnname(const u_char *cp, const u_char *msg, int len, FILE *file) {
+ char name[MAXDNAME];
+ int n;
+
+ if ((n = dn_expand(msg, msg + len, cp, name, sizeof name)) < 0)
+ return (NULL);
+ if (name[0] == '\0')
+ putc('.', file);
+ else
+ fputs(name, file);
+ return (cp + n);
+}
+
+const u_char *
+p_cdname(const u_char *cp, const u_char *msg, FILE *file) {
+ return (p_cdnname(cp, msg, PACKETSZ, file));
+}
+
+/*%
+ * Return a fully-qualified domain name from a compressed name (with
+ length supplied). */
+
+const u_char *
+p_fqnname(cp, msg, msglen, name, namelen)
+ const u_char *cp, *msg;
+ int msglen;
+ char *name;
+ int namelen;
+{
+ int n, newlen;
+
+ if ((n = dn_expand(msg, cp + msglen, cp, name, namelen)) < 0)
+ return (NULL);
+ newlen = strlen(name);
+ if (newlen == 0 || name[newlen - 1] != '.') {
+ if (newlen + 1 >= namelen) /*%< Lack space for final dot */
+ return (NULL);
+ else
+ strcpy(name + newlen, ".");
+ }
+ return (cp + n);
+}
+
+/* XXX: the rest of these functions need to become length-limited, too. */
+
+const u_char *
+p_fqname(const u_char *cp, const u_char *msg, FILE *file) {
+ char name[MAXDNAME];
+ const u_char *n;
+
+ n = p_fqnname(cp, msg, MAXCDNAME, name, sizeof name);
+ if (n == NULL)
+ return (NULL);
+ fputs(name, file);
+ return (n);
+}
+
+/*%
+ * Names of RR classes and qclasses. Classes and qclasses are the same, except
+ * that C_ANY is a qclass but not a class. (You can ask for records of class
+ * C_ANY, but you can't have any records of that class in the database.)
+ */
+const struct res_sym __p_class_syms[] = {
+ {C_IN, "IN", (char *)0},
+ {C_CHAOS, "CH", (char *)0},
+ {C_CHAOS, "CHAOS", (char *)0},
+ {C_HS, "HS", (char *)0},
+ {C_HS, "HESIOD", (char *)0},
+ {C_ANY, "ANY", (char *)0},
+ {C_NONE, "NONE", (char *)0},
+ {C_IN, (char *)0, (char *)0}
+};
+
+/*%
+ * Names of message sections.
+ */
+const struct res_sym __p_default_section_syms[] = {
+ {ns_s_qd, "QUERY", (char *)0},
+ {ns_s_an, "ANSWER", (char *)0},
+ {ns_s_ns, "AUTHORITY", (char *)0},
+ {ns_s_ar, "ADDITIONAL", (char *)0},
+ {0, (char *)0, (char *)0}
+};
+
+const struct res_sym __p_update_section_syms[] = {
+ {S_ZONE, "ZONE", (char *)0},
+ {S_PREREQ, "PREREQUISITE", (char *)0},
+ {S_UPDATE, "UPDATE", (char *)0},
+ {S_ADDT, "ADDITIONAL", (char *)0},
+ {0, (char *)0, (char *)0}
+};
+
+const struct res_sym __p_key_syms[] = {
+ {NS_ALG_MD5RSA, "RSA", "RSA KEY with MD5 hash"},
+ {NS_ALG_DH, "DH", "Diffie Hellman"},
+ {NS_ALG_DSA, "DSA", "Digital Signature Algorithm"},
+ {NS_ALG_EXPIRE_ONLY, "EXPIREONLY", "No algorithm"},
+ {NS_ALG_PRIVATE_OID, "PRIVATE", "Algorithm obtained from OID"},
+ {0, NULL, NULL}
+};
+
+const struct res_sym __p_cert_syms[] = {
+ {cert_t_pkix, "PKIX", "PKIX (X.509v3) Certificate"},
+ {cert_t_spki, "SPKI", "SPKI certificate"},
+ {cert_t_pgp, "PGP", "PGP certificate"},
+ {cert_t_url, "URL", "URL Private"},
+ {cert_t_oid, "OID", "OID Private"},
+ {0, NULL, NULL}
+};
+
+/*%
+ * Names of RR types and qtypes. Types and qtypes are the same, except
+ * that T_ANY is a qtype but not a type. (You can ask for records of type
+ * T_ANY, but you can't have any records of that type in the database.)
+ */
+const struct res_sym __p_type_syms[] = {
+ {ns_t_a, "A", "address"},
+ {ns_t_ns, "NS", "name server"},
+ {ns_t_md, "MD", "mail destination (deprecated)"},
+ {ns_t_mf, "MF", "mail forwarder (deprecated)"},
+ {ns_t_cname, "CNAME", "canonical name"},
+ {ns_t_soa, "SOA", "start of authority"},
+ {ns_t_mb, "MB", "mailbox"},
+ {ns_t_mg, "MG", "mail group member"},
+ {ns_t_mr, "MR", "mail rename"},
+ {ns_t_null, "NULL", "null"},
+ {ns_t_wks, "WKS", "well-known service (deprecated)"},
+ {ns_t_ptr, "PTR", "domain name pointer"},
+ {ns_t_hinfo, "HINFO", "host information"},
+ {ns_t_minfo, "MINFO", "mailbox information"},
+ {ns_t_mx, "MX", "mail exchanger"},
+ {ns_t_txt, "TXT", "text"},
+ {ns_t_rp, "RP", "responsible person"},
+ {ns_t_afsdb, "AFSDB", "DCE or AFS server"},
+ {ns_t_x25, "X25", "X25 address"},
+ {ns_t_isdn, "ISDN", "ISDN address"},
+ {ns_t_rt, "RT", "router"},
+ {ns_t_nsap, "NSAP", "nsap address"},
+ {ns_t_nsap_ptr, "NSAP_PTR", "domain name pointer"},
+ {ns_t_sig, "SIG", "signature"},
+ {ns_t_key, "KEY", "key"},
+ {ns_t_px, "PX", "mapping information"},
+ {ns_t_gpos, "GPOS", "geographical position (withdrawn)"},
+ {ns_t_aaaa, "AAAA", "IPv6 address"},
+ {ns_t_loc, "LOC", "location"},
+ {ns_t_nxt, "NXT", "next valid name (unimplemented)"},
+ {ns_t_eid, "EID", "endpoint identifier (unimplemented)"},
+ {ns_t_nimloc, "NIMLOC", "NIMROD locator (unimplemented)"},
+ {ns_t_srv, "SRV", "server selection"},
+ {ns_t_atma, "ATMA", "ATM address (unimplemented)"},
+ {ns_t_naptr, "NAPTR", "naptr"},
+ {ns_t_kx, "KX", "key exchange"},
+ {ns_t_cert, "CERT", "certificate"},
+ {ns_t_a6, "A", "IPv6 address (experminental)"},
+ {ns_t_dname, "DNAME", "non-terminal redirection"},
+ {ns_t_opt, "OPT", "opt"},
+ {ns_t_apl, "apl", "apl"},
+ {ns_t_ds, "DS", "delegation signer"},
+ {ns_t_sshfp, "SSFP", "SSH fingerprint"},
+ {ns_t_ipseckey, "IPSECKEY", "IPSEC key"},
+ {ns_t_rrsig, "RRSIG", "rrsig"},
+ {ns_t_nsec, "NSEC", "nsec"},
+ {ns_t_dnskey, "DNSKEY", "DNS key"},
+ {ns_t_dhcid, "DHCID", "dynamic host configuration identifier"},
+ {ns_t_nsec3, "NSEC3", "nsec3"},
+ {ns_t_nsec3param, "NSEC3PARAM", "NSEC3 parameters"},
+ {ns_t_hip, "HIP", "host identity protocol"},
+ {ns_t_spf, "SPF", "sender policy framework"},
+ {ns_t_tkey, "TKEY", "tkey"},
+ {ns_t_tsig, "TSIG", "transaction signature"},
+ {ns_t_ixfr, "IXFR", "incremental zone transfer"},
+ {ns_t_axfr, "AXFR", "zone transfer"},
+ {ns_t_zxfr, "ZXFR", "compressed zone transfer"},
+ {ns_t_mailb, "MAILB", "mailbox-related data (deprecated)"},
+ {ns_t_maila, "MAILA", "mail agent (deprecated)"},
+ {ns_t_naptr, "NAPTR", "URN Naming Authority"},
+ {ns_t_kx, "KX", "Key Exchange"},
+ {ns_t_cert, "CERT", "Certificate"},
+ {ns_t_a6, "A6", "IPv6 Address"},
+ {ns_t_dname, "DNAME", "dname"},
+ {ns_t_sink, "SINK", "Kitchen Sink (experimental)"},
+ {ns_t_opt, "OPT", "EDNS Options"},
+ {ns_t_any, "ANY", "\"any\""},
+ {ns_t_dlv, "DLV", "DNSSEC look-aside validation"},
+ {0, NULL, NULL}
+};
+
+/*%
+ * Names of DNS rcodes.
+ */
+const struct res_sym __p_rcode_syms[] = {
+ {ns_r_noerror, "NOERROR", "no error"},
+ {ns_r_formerr, "FORMERR", "format error"},
+ {ns_r_servfail, "SERVFAIL", "server failed"},
+ {ns_r_nxdomain, "NXDOMAIN", "no such domain name"},
+ {ns_r_notimpl, "NOTIMP", "not implemented"},
+ {ns_r_refused, "REFUSED", "refused"},
+ {ns_r_yxdomain, "YXDOMAIN", "domain name exists"},
+ {ns_r_yxrrset, "YXRRSET", "rrset exists"},
+ {ns_r_nxrrset, "NXRRSET", "rrset doesn't exist"},
+ {ns_r_notauth, "NOTAUTH", "not authoritative"},
+ {ns_r_notzone, "NOTZONE", "Not in zone"},
+ {ns_r_max, "", ""},
+ {ns_r_badsig, "BADSIG", "bad signature"},
+ {ns_r_badkey, "BADKEY", "bad key"},
+ {ns_r_badtime, "BADTIME", "bad time"},
+ {0, NULL, NULL}
+};
+
+int
+sym_ston(const struct res_sym *syms, const char *name, int *success) {
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (strcasecmp (name, syms->name) == 0) {
+ if (success)
+ *success = 1;
+ return (syms->number);
+ }
+ }
+ if (success)
+ *success = 0;
+ return (syms->number); /*%< The default value. */
+}
+
+const char *
+sym_ntos(const struct res_sym *syms, int number, int *success) {
+ char *unname = sym_ntos_unname;
+
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (number == syms->number) {
+ if (success)
+ *success = 1;
+ return (syms->name);
+ }
+ }
+
+ sprintf(unname, "%d", number); /*%< XXX nonreentrant */
+ if (success)
+ *success = 0;
+ return (unname);
+}
+
+const char *
+sym_ntop(const struct res_sym *syms, int number, int *success) {
+ char *unname = sym_ntop_unname;
+
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (number == syms->number) {
+ if (success)
+ *success = 1;
+ return (syms->humanname);
+ }
+ }
+ sprintf(unname, "%d", number); /*%< XXX nonreentrant */
+ if (success)
+ *success = 0;
+ return (unname);
+}
+
+/*%
+ * Return a string for the type.
+ */
+const char *
+p_type(int type) {
+ int success;
+ const char *result;
+ static char typebuf[20];
+
+ result = sym_ntos(__p_type_syms, type, &success);
+ if (success)
+ return (result);
+ if (type < 0 || type > 0xffff)
+ return ("BADTYPE");
+ sprintf(typebuf, "TYPE%d", type);
+ return (typebuf);
+}
+
+/*%
+ * Return a string for the type.
+ */
+const char *
+p_section(int section, int opcode) {
+ const struct res_sym *symbols;
+
+ switch (opcode) {
+ case ns_o_update:
+ symbols = __p_update_section_syms;
+ break;
+ default:
+ symbols = __p_default_section_syms;
+ break;
+ }
+ return (sym_ntos(symbols, section, (int *)0));
+}
+
+/*%
+ * Return a mnemonic for class.
+ */
+const char *
+p_class(int class) {
+ int success;
+ const char *result;
+ static char classbuf[20];
+
+ result = sym_ntos(__p_class_syms, class, &success);
+ if (success)
+ return (result);
+ if (class < 0 || class > 0xffff)
+ return ("BADCLASS");
+ sprintf(classbuf, "CLASS%d", class);
+ return (classbuf);
+}
+
+/*%
+ * Return a mnemonic for an option
+ */
+const char *
+p_option(u_long option) {
+ char *nbuf = p_option_nbuf;
+
+ switch (option) {
+ case RES_INIT: return "init";
+ case RES_DEBUG: return "debug";
+ case RES_AAONLY: return "aaonly(unimpl)";
+ case RES_USEVC: return "usevc";
+ case RES_PRIMARY: return "primry(unimpl)";
+ case RES_IGNTC: return "igntc";
+ case RES_RECURSE: return "recurs";
+ case RES_DEFNAMES: return "defnam";
+ case RES_STAYOPEN: return "styopn";
+ case RES_DNSRCH: return "dnsrch";
+ case RES_INSECURE1: return "insecure1";
+ case RES_INSECURE2: return "insecure2";
+ case RES_NOALIASES: return "noaliases";
+ case RES_USE_INET6: return "inet6";
+#ifdef RES_USE_EDNS0 /*%< KAME extension */
+ case RES_USE_EDNS0: return "edns0";
+ case RES_NSID: return "nsid";
+#endif
+#ifdef RES_USE_DNAME
+ case RES_USE_DNAME: return "dname";
+#endif
+#ifdef RES_USE_DNSSEC
+ case RES_USE_DNSSEC: return "dnssec";
+#endif
+#ifdef RES_NOTLDQUERY
+ case RES_NOTLDQUERY: return "no-tld-query";
+#endif
+#ifdef RES_NO_NIBBLE2
+ case RES_NO_NIBBLE2: return "no-nibble2";
+#endif
+ /* XXX nonreentrant */
+ default: sprintf(nbuf, "?0x%lx?", (u_long)option);
+ return (nbuf);
+ }
+}
+
+/*%
+ * Return a mnemonic for a time to live.
+ */
+const char *
+p_time(u_int32_t value) {
+ char *nbuf = p_time_nbuf;
+
+ if (ns_format_ttl(value, nbuf, sizeof nbuf) < 0)
+ sprintf(nbuf, "%u", value);
+ return (nbuf);
+}
+
+/*%
+ * Return a string for the rcode.
+ */
+const char *
+p_rcode(int rcode) {
+ return (sym_ntos(__p_rcode_syms, rcode, (int *)0));
+}
+
+/*%
+ * Return a string for a res_sockaddr_union.
+ */
+const char *
+p_sockun(union res_sockaddr_union u, char *buf, size_t size) {
+ char ret[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:123.123.123.123"];
+
+ switch (u.sin.sin_family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &u.sin.sin_addr, ret, sizeof ret);
+ break;
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ inet_ntop(AF_INET6, &u.sin6.sin6_addr, ret, sizeof ret);
+ break;
+#endif
+ default:
+ sprintf(ret, "[af%d]", u.sin.sin_family);
+ break;
+ }
+ if (size > 0U) {
+ strncpy(buf, ret, size - 1);
+ buf[size - 1] = '0';
+ }
+ return (buf);
+}
+
+/*%
+ * routines to convert between on-the-wire RR format and zone file format.
+ * Does not contain conversion to/from decimal degrees; divide or multiply
+ * by 60*60*1000 for that.
+ */
+
+static unsigned int poweroften[10] = {1, 10, 100, 1000, 10000, 100000,
+ 1000000,10000000,100000000,1000000000};
+
+/*% takes an XeY precision/size value, returns a string representation. */
+static const char *
+precsize_ntoa(prec)
+ u_int8_t prec;
+{
+ char *retbuf = precsize_ntoa_retbuf;
+ unsigned long val;
+ int mantissa, exponent;
+
+ mantissa = (int)((prec >> 4) & 0x0f) % 10;
+ exponent = (int)((prec >> 0) & 0x0f) % 10;
+
+ val = mantissa * poweroften[exponent];
+
+ (void) sprintf(retbuf, "%lu.%.2lu", val/100, val%100);
+ return (retbuf);
+}
+
+/*% converts ascii size/precision X * 10**Y(cm) to 0xXY. moves pointer. */
+static u_int8_t
+precsize_aton(const char **strptr) {
+ unsigned int mval = 0, cmval = 0;
+ u_int8_t retval = 0;
+ const char *cp;
+ int exponent;
+ int mantissa;
+
+ cp = *strptr;
+
+ while (isdigit((unsigned char)*cp))
+ mval = mval * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< centimeters */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ cmval = (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ cmval += (*cp++ - '0');
+ }
+ }
+ }
+ cmval = (mval * 100) + cmval;
+
+ for (exponent = 0; exponent < 9; exponent++)
+ if (cmval < poweroften[exponent+1])
+ break;
+
+ mantissa = cmval / poweroften[exponent];
+ if (mantissa > 9)
+ mantissa = 9;
+
+ retval = (mantissa << 4) | exponent;
+
+ *strptr = cp;
+
+ return (retval);
+}
+
+/*% converts ascii lat/lon to unsigned encoded 32-bit number. moves pointer. */
+static u_int32_t
+latlon2ul(const char **latlonstrptr, int *which) {
+ const char *cp;
+ u_int32_t retval;
+ int deg = 0, min = 0, secs = 0, secsfrac = 0;
+
+ cp = *latlonstrptr;
+
+ while (isdigit((unsigned char)*cp))
+ deg = deg * 10 + (*cp++ - '0');
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ if (!(isdigit((unsigned char)*cp)))
+ goto fndhemi;
+
+ while (isdigit((unsigned char)*cp))
+ min = min * 10 + (*cp++ - '0');
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ if (!(isdigit((unsigned char)*cp)))
+ goto fndhemi;
+
+ while (isdigit((unsigned char)*cp))
+ secs = secs * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< decimal seconds */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac = (*cp++ - '0') * 100;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac += (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac += (*cp++ - '0');
+ }
+ }
+ }
+ }
+
+ while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */
+ cp++;
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ fndhemi:
+ switch (*cp) {
+ case 'N': case 'n':
+ case 'E': case 'e':
+ retval = ((unsigned)1<<31)
+ + (((((deg * 60) + min) * 60) + secs) * 1000)
+ + secsfrac;
+ break;
+ case 'S': case 's':
+ case 'W': case 'w':
+ retval = ((unsigned)1<<31)
+ - (((((deg * 60) + min) * 60) + secs) * 1000)
+ - secsfrac;
+ break;
+ default:
+ retval = 0; /*%< invalid value -- indicates error */
+ break;
+ }
+
+ switch (*cp) {
+ case 'N': case 'n':
+ case 'S': case 's':
+ *which = 1; /*%< latitude */
+ break;
+ case 'E': case 'e':
+ case 'W': case 'w':
+ *which = 2; /*%< longitude */
+ break;
+ default:
+ *which = 0; /*%< error */
+ break;
+ }
+
+ cp++; /*%< skip the hemisphere */
+ while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */
+ cp++;
+
+ while (isspace((unsigned char)*cp)) /*%< move to next field */
+ cp++;
+
+ *latlonstrptr = cp;
+
+ return (retval);
+}
+
+/*%
+ * converts a zone file representation in a string to an RDATA on-the-wire
+ * representation. */
+int
+loc_aton(ascii, binary)
+ const char *ascii;
+ u_char *binary;
+{
+ const char *cp, *maxcp;
+ u_char *bcp;
+
+ u_int32_t latit = 0, longit = 0, alt = 0;
+ u_int32_t lltemp1 = 0, lltemp2 = 0;
+ int altmeters = 0, altfrac = 0, altsign = 1;
+ u_int8_t hp = 0x16; /*%< default = 1e6 cm = 10000.00m = 10km */
+ u_int8_t vp = 0x13; /*%< default = 1e3 cm = 10.00m */
+ u_int8_t siz = 0x12; /*%< default = 1e2 cm = 1.00m */
+ int which1 = 0, which2 = 0;
+
+ cp = ascii;
+ maxcp = cp + strlen(ascii);
+
+ lltemp1 = latlon2ul(&cp, &which1);
+
+ lltemp2 = latlon2ul(&cp, &which2);
+
+ switch (which1 + which2) {
+ case 3: /*%< 1 + 2, the only valid combination */
+ if ((which1 == 1) && (which2 == 2)) { /*%< normal case */
+ latit = lltemp1;
+ longit = lltemp2;
+ } else if ((which1 == 2) && (which2 == 1)) { /*%< reversed */
+ longit = lltemp1;
+ latit = lltemp2;
+ } else { /*%< some kind of brokenness */
+ return (0);
+ }
+ break;
+ default: /*%< we didn't get one of each */
+ return (0);
+ }
+
+ /* altitude */
+ if (*cp == '-') {
+ altsign = -1;
+ cp++;
+ }
+
+ if (*cp == '+')
+ cp++;
+
+ while (isdigit((unsigned char)*cp))
+ altmeters = altmeters * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< decimal meters */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ altfrac = (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ altfrac += (*cp++ - '0');
+ }
+ }
+ }
+
+ alt = (10000000 + (altsign * (altmeters * 100 + altfrac)));
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ siz = precsize_aton(&cp);
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ hp = precsize_aton(&cp);
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ vp = precsize_aton(&cp);
+
+ defaults:
+
+ bcp = binary;
+ *bcp++ = (u_int8_t) 0; /*%< version byte */
+ *bcp++ = siz;
+ *bcp++ = hp;
+ *bcp++ = vp;
+ PUTLONG(latit,bcp);
+ PUTLONG(longit,bcp);
+ PUTLONG(alt,bcp);
+
+ return (16); /*%< size of RR in octets */
+}
+
+/*% takes an on-the-wire LOC RR and formats it in a human readable format. */
+const char *
+loc_ntoa(binary, ascii)
+ const u_char *binary;
+ char *ascii;
+{
+ static const char *error = "?";
+ static char tmpbuf[sizeof
+"1000 60 60.000 N 1000 60 60.000 W -12345678.00m 90000000.00m 90000000.00m 90000000.00m"];
+ const u_char *cp = binary;
+
+ int latdeg, latmin, latsec, latsecfrac;
+ int longdeg, longmin, longsec, longsecfrac;
+ char northsouth, eastwest;
+ const char *altsign;
+ int altmeters, altfrac;
+
+ const u_int32_t referencealt = 100000 * 100;
+
+ int32_t latval, longval, altval;
+ u_int32_t templ;
+ u_int8_t sizeval, hpval, vpval, versionval;
+
+ char *sizestr, *hpstr, *vpstr;
+
+ versionval = *cp++;
+
+ if (ascii == NULL)
+ ascii = tmpbuf;
+
+ if (versionval) {
+ (void) sprintf(ascii, "; error: unknown LOC RR version");
+ return (ascii);
+ }
+
+ sizeval = *cp++;
+
+ hpval = *cp++;
+ vpval = *cp++;
+
+ GETLONG(templ, cp);
+ latval = (templ - ((unsigned)1<<31));
+
+ GETLONG(templ, cp);
+ longval = (templ - ((unsigned)1<<31));
+
+ GETLONG(templ, cp);
+ if (templ < referencealt) { /*%< below WGS 84 spheroid */
+ altval = referencealt - templ;
+ altsign = "-";
+ } else {
+ altval = templ - referencealt;
+ altsign = "";
+ }
+
+ if (latval < 0) {
+ northsouth = 'S';
+ latval = -latval;
+ } else
+ northsouth = 'N';
+
+ latsecfrac = latval % 1000;
+ latval = latval / 1000;
+ latsec = latval % 60;
+ latval = latval / 60;
+ latmin = latval % 60;
+ latval = latval / 60;
+ latdeg = latval;
+
+ if (longval < 0) {
+ eastwest = 'W';
+ longval = -longval;
+ } else
+ eastwest = 'E';
+
+ longsecfrac = longval % 1000;
+ longval = longval / 1000;
+ longsec = longval % 60;
+ longval = longval / 60;
+ longmin = longval % 60;
+ longval = longval / 60;
+ longdeg = longval;
+
+ altfrac = altval % 100;
+ altmeters = (altval / 100);
+
+ sizestr = strdup(precsize_ntoa(sizeval));
+ hpstr = strdup(precsize_ntoa(hpval));
+ vpstr = strdup(precsize_ntoa(vpval));
+
+ sprintf(ascii,
+ "%d %.2d %.2d.%.3d %c %d %.2d %.2d.%.3d %c %s%d.%.2dm %sm %sm %sm",
+ latdeg, latmin, latsec, latsecfrac, northsouth,
+ longdeg, longmin, longsec, longsecfrac, eastwest,
+ altsign, altmeters, altfrac,
+ (sizestr != NULL) ? sizestr : error,
+ (hpstr != NULL) ? hpstr : error,
+ (vpstr != NULL) ? vpstr : error);
+
+ if (sizestr != NULL)
+ free(sizestr);
+ if (hpstr != NULL)
+ free(hpstr);
+ if (vpstr != NULL)
+ free(vpstr);
+
+ return (ascii);
+}
+
+
+/*% Return the number of DNS hierarchy levels in the name. */
+int
+dn_count_labels(const char *name) {
+ int i, len, count;
+
+ len = strlen(name);
+ for (i = 0, count = 0; i < len; i++) {
+ /* XXX need to check for \. or use named's nlabels(). */
+ if (name[i] == '.')
+ count++;
+ }
+
+ /* don't count initial wildcard */
+ if (name[0] == '*')
+ if (count)
+ count--;
+
+ /* don't count the null label for root. */
+ /* if terminating '.' not found, must adjust */
+ /* count to include last label */
+ if (len > 0 && name[len-1] != '.')
+ count++;
+ return (count);
+}
+
+/*%
+ * Make dates expressed in seconds-since-Jan-1-1970 easy to read.
+ * SIG records are required to be printed like this, by the Secure DNS RFC.
+ */
+char *
+p_secstodate (u_long secs) {
+ char *output = p_secstodate_output;
+ time_t clock = secs;
+ struct tm *time;
+#ifdef HAVE_TIME_R
+ struct tm res;
+
+ time = gmtime_r(&clock, &res);
+#else
+ time = gmtime(&clock);
+#endif
+ time->tm_year += 1900;
+ time->tm_mon += 1;
+ sprintf(output, "%04d%02d%02d%02d%02d%02d",
+ time->tm_year, time->tm_mon, time->tm_mday,
+ time->tm_hour, time->tm_min, time->tm_sec);
+ return (output);
+}
+
+u_int16_t
+res_nametoclass(const char *buf, int *successp) {
+ unsigned long result;
+ char *endptr;
+ int success;
+
+ result = sym_ston(__p_class_syms, buf, &success);
+ if (success)
+ goto done;
+
+ if (strncasecmp(buf, "CLASS", 5) != 0 ||
+ !isdigit((unsigned char)buf[5]))
+ goto done;
+ errno = 0;
+ result = strtoul(buf + 5, &endptr, 10);
+ if (errno == 0 && *endptr == '\0' && result <= 0xffffU)
+ success = 1;
+ done:
+ if (successp)
+ *successp = success;
+ return (result);
+}
+
+u_int16_t
+res_nametotype(const char *buf, int *successp) {
+ unsigned long result;
+ char *endptr;
+ int success;
+
+ result = sym_ston(__p_type_syms, buf, &success);
+ if (success)
+ goto done;
+
+ if (strncasecmp(buf, "type", 4) != 0 ||
+ !isdigit((unsigned char)buf[4]))
+ goto done;
+ errno = 0;
+ result = strtoul(buf + 4, &endptr, 10);
+ if (errno == 0 && *endptr == '\0' && result <= 0xffffU)
+ success = 1;
+ done:
+ if (successp)
+ *successp = success;
+ return (result);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h
new file mode 100644
index 0000000000..c28171d7c8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _RES_DEBUG_H_
+#define _RES_DEBUG_H_
+
+#ifndef DEBUG
+# define Dprint(cond, args) /*empty*/
+# define DprintQ(cond, args, query, size) /*empty*/
+# define Aerror(statp, file, string, error, address) /*empty*/
+# define Perror(statp, file, string, error) /*empty*/
+#else
+# define Dprint(cond, args) if (cond) {fprintf args;} else {}
+# define DprintQ(cond, args, query, size) if (cond) {\
+ fprintf args;\
+ res_pquery(statp, query, size, stdout);\
+ } else {}
+#endif
+
+#endif /* _RES_DEBUG_H_ */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c b/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c
new file mode 100644
index 0000000000..431c0262c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c
@@ -0,0 +1,722 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_findzonecut.c,v 1.10 2005/10/11 00:10:16 marka Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/list.h>
+
+#include "port_after.h"
+
+#include <resolv_joy.h>
+
+/* Data structures. */
+
+typedef struct rr_a {
+ LINK(struct rr_a) link;
+ union res_sockaddr_union addr;
+} rr_a;
+typedef LIST(rr_a) rrset_a;
+
+typedef struct rr_ns {
+ LINK(struct rr_ns) link;
+ const char * name;
+ unsigned int flags;
+ rrset_a addrs;
+} rr_ns;
+typedef LIST(rr_ns) rrset_ns;
+
+#define RR_NS_HAVE_V4 0x01
+#define RR_NS_HAVE_V6 0x02
+
+/* Forward. */
+
+static int satisfy(res_state, const char *, rrset_ns *,
+ union res_sockaddr_union *, int);
+static int add_addrs(res_state, rr_ns *,
+ union res_sockaddr_union *, int);
+static int get_soa(res_state, const char *, ns_class, int,
+ char *, size_t, char *, size_t,
+ rrset_ns *);
+static int get_ns(res_state, const char *, ns_class, int, rrset_ns *);
+static int get_glue(res_state, ns_class, int, rrset_ns *);
+static int save_ns(res_state, ns_msg *, ns_sect,
+ const char *, ns_class, int, rrset_ns *);
+static int save_a(res_state, ns_msg *, ns_sect,
+ const char *, ns_class, int, rr_ns *);
+static void free_nsrrset(rrset_ns *);
+static void free_nsrr(rrset_ns *, rr_ns *);
+static rr_ns * find_ns(rrset_ns *, const char *);
+static int do_query(res_state, const char *, ns_class, ns_type,
+ u_char *, ns_msg *);
+static void res_dprintf(const char *, ...) ISC_FORMAT_PRINTF(1, 2);
+
+/* Macros. */
+
+#define DPRINTF(x) do {\
+ int save_errno = errno; \
+ if ((statp->options & RES_DEBUG) != 0U) res_dprintf x; \
+ errno = save_errno; \
+ } while (0)
+
+/* Public. */
+
+/*%
+ * find enclosing zone for a <dname,class>, and some server addresses
+ *
+ * parameters:
+ *\li res - resolver context to work within (is modified)
+ *\li dname - domain name whose enclosing zone is desired
+ *\li class - class of dname (and its enclosing zone)
+ *\li zname - found zone name
+ *\li zsize - allocated size of zname
+ *\li addrs - found server addresses
+ *\li naddrs - max number of addrs
+ *
+ * return values:
+ *\li < 0 - an error occurred (check errno)
+ *\li = 0 - zname is now valid, but addrs[] wasn't changed
+ *\li > 0 - zname is now valid, and return value is number of addrs[] found
+ *
+ * notes:
+ *\li this function calls res_nsend() which means it depends on correctly
+ * functioning recursive nameservers (usually defined in /etc/resolv.conf
+ * or its local equivilent).
+ *
+ *\li we start by asking for an SOA<dname,class>. if we get one as an
+ * answer, that just means <dname,class> is a zone top, which is fine.
+ * more than likely we'll be told to go pound sand, in the form of a
+ * negative answer.
+ *
+ *\li note that we are not prepared to deal with referrals since that would
+ * only come from authority servers and our correctly functioning local
+ * recursive server would have followed the referral and got us something
+ * more definite.
+ *
+ *\li if the authority section contains an SOA, this SOA should also be the
+ * closest enclosing zone, since any intermediary zone cuts would've been
+ * returned as referrals and dealt with by our correctly functioning local
+ * recursive name server. but an SOA in the authority section should NOT
+ * match our dname (since that would have been returned in the answer
+ * section). an authority section SOA has to be "above" our dname.
+ *
+ *\li however, since authority section SOA's were once optional, it's
+ * possible that we'll have to go hunting for the enclosing SOA by
+ * ripping labels off the front of our dname -- this is known as "doing
+ * it the hard way."
+ *
+ *\li ultimately we want some server addresses, which are ideally the ones
+ * pertaining to the SOA.MNAME, but only if there is a matching NS RR.
+ * so the second phase (after we find an SOA) is to go looking for the
+ * NS RRset for that SOA's zone.
+ *
+ *\li no answer section processed by this code is allowed to contain CNAME
+ * or DNAME RR's. for the SOA query this means we strip a label and
+ * keep going. for the NS and A queries this means we just give up.
+ */
+
+int
+res_findzonecut(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, struct in_addr *addrs, int naddrs)
+{
+ int result, i;
+ union res_sockaddr_union *u;
+
+
+ opts |= RES_IPV4ONLY;
+ opts &= ~RES_IPV6ONLY;
+
+ u = calloc(naddrs, sizeof(*u));
+ if (u == NULL)
+ return(-1);
+
+ result = res_findzonecut2(statp, dname, class, opts, zname, zsize,
+ u, naddrs);
+
+ for (i = 0; i < result; i++) {
+ addrs[i] = u[i].sin.sin_addr;
+ }
+ free(u);
+ return (result);
+}
+
+int
+res_findzonecut2(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, union res_sockaddr_union *addrs,
+ int naddrs)
+{
+ char mname[NS_MAXDNAME];
+ u_long save_pfcode;
+ rrset_ns nsrrs;
+ int n;
+
+ DPRINTF(("START dname='%s' class=%s, zsize=%ld, naddrs=%d",
+ dname, p_class(class), (long)zsize, naddrs));
+ save_pfcode = statp->pfcode;
+ statp->pfcode |= RES_PRF_HEAD2 | RES_PRF_HEAD1 | RES_PRF_HEADX |
+ RES_PRF_QUES | RES_PRF_ANS |
+ RES_PRF_AUTH | RES_PRF_ADD;
+ INIT_LIST(nsrrs);
+
+ DPRINTF(("get the soa, and see if it has enough glue"));
+ if ((n = get_soa(statp, dname, class, opts, zname, zsize,
+ mname, sizeof mname, &nsrrs)) < 0 ||
+ ((opts & RES_EXHAUSTIVE) == 0 &&
+ (n = satisfy(statp, mname, &nsrrs, addrs, naddrs)) > 0))
+ goto done;
+
+ DPRINTF(("get the ns rrset and see if it has enough glue"));
+ if ((n = get_ns(statp, zname, class, opts, &nsrrs)) < 0 ||
+ ((opts & RES_EXHAUSTIVE) == 0 &&
+ (n = satisfy(statp, mname, &nsrrs, addrs, naddrs)) > 0))
+ goto done;
+
+ DPRINTF(("get the missing glue and see if it's finally enough"));
+ if ((n = get_glue(statp, class, opts, &nsrrs)) >= 0)
+ n = satisfy(statp, mname, &nsrrs, addrs, naddrs);
+
+ done:
+ DPRINTF(("FINISH n=%d (%s)", n, (n < 0) ? strerror(errno) : "OK"));
+ free_nsrrset(&nsrrs);
+ statp->pfcode = save_pfcode;
+ return (n);
+}
+
+/* Private. */
+
+static int
+satisfy(res_state statp, const char *mname, rrset_ns *nsrrsp,
+ union res_sockaddr_union *addrs, int naddrs)
+{
+ rr_ns *nsrr;
+ int n, x;
+
+ n = 0;
+ nsrr = find_ns(nsrrsp, mname);
+ if (nsrr != NULL) {
+ x = add_addrs(statp, nsrr, addrs, naddrs);
+ addrs += x;
+ naddrs -= x;
+ n += x;
+ }
+ for (nsrr = HEAD(*nsrrsp);
+ nsrr != NULL && naddrs > 0;
+ nsrr = NEXT(nsrr, link))
+ if (ns_samename(nsrr->name, mname) != 1) {
+ x = add_addrs(statp, nsrr, addrs, naddrs);
+ addrs += x;
+ naddrs -= x;
+ n += x;
+ }
+ DPRINTF(("satisfy(%s): %d", mname, n));
+ return (n);
+}
+
+static int
+add_addrs(res_state statp, rr_ns *nsrr,
+ union res_sockaddr_union *addrs, int naddrs)
+{
+ rr_a *arr;
+ int n = 0;
+
+ for (arr = HEAD(nsrr->addrs); arr != NULL; arr = NEXT(arr, link)) {
+ if (naddrs <= 0)
+ return (0);
+ *addrs++ = arr->addr;
+ naddrs--;
+ n++;
+ }
+ DPRINTF(("add_addrs: %d", n));
+ return (n);
+}
+
+static int
+get_soa(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, char *mname, size_t msize,
+ rrset_ns *nsrrsp)
+{
+ char tname[NS_MAXDNAME];
+ u_char *resp = NULL;
+ int n, i, ancount, nscount;
+ ns_sect sect;
+ ns_msg msg;
+ u_int rcode;
+
+ /*
+ * Find closest enclosing SOA, even if it's for the root zone.
+ */
+
+ /* First canonicalize dname (exactly one unescaped trailing "."). */
+ if (ns_makecanon(dname, tname, sizeof tname) < 0)
+ goto cleanup;
+ dname = tname;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ goto cleanup;
+
+ /* Now grovel the subdomains, hunting for an SOA answer or auth. */
+ for (;;) {
+ /* Leading or inter-label '.' are skipped here. */
+ while (*dname == '.')
+ dname++;
+
+ /* Is there an SOA? */
+ n = do_query(statp, dname, class, ns_t_soa, resp, &msg);
+ if (n < 0) {
+ DPRINTF(("get_soa: do_query('%s', %s) failed (%d)",
+ dname, p_class(class), n));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF(("get_soa: CNAME or DNAME found"));
+ sect = ns_s_max, n = 0;
+ } else {
+ rcode = ns_msg_getflag(msg, ns_f_rcode);
+ ancount = ns_msg_count(msg, ns_s_an);
+ nscount = ns_msg_count(msg, ns_s_ns);
+ if (ancount > 0 && rcode == ns_r_noerror)
+ sect = ns_s_an, n = ancount;
+ else if (nscount > 0)
+ sect = ns_s_ns, n = nscount;
+ else
+ sect = ns_s_max, n = 0;
+ }
+ for (i = 0; i < n; i++) {
+ const char *t;
+ const u_char *rdata;
+ ns_rr rr;
+
+ if (ns_parserr(&msg, sect, i, &rr) < 0) {
+ DPRINTF(("get_soa: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ goto cleanup;
+ }
+ if (ns_rr_type(rr) == ns_t_cname ||
+ ns_rr_type(rr) == ns_t_dname)
+ break;
+ if (ns_rr_type(rr) != ns_t_soa ||
+ ns_rr_class(rr) != class)
+ continue;
+ t = ns_rr_name(rr);
+ switch (sect) {
+ case ns_s_an:
+ if (ns_samedomain(dname, t) == 0) {
+ DPRINTF(
+ ("get_soa: ns_samedomain('%s', '%s') == 0",
+ dname, t)
+ );
+ errno = EPROTOTYPE;
+ goto cleanup;
+ }
+ break;
+ case ns_s_ns:
+ if (ns_samename(dname, t) == 1 ||
+ ns_samedomain(dname, t) == 0) {
+ DPRINTF(
+ ("get_soa: ns_samename() || !ns_samedomain('%s', '%s')",
+ dname, t)
+ );
+ errno = EPROTOTYPE;
+ goto cleanup;
+ }
+ break;
+ default:
+ abort();
+ }
+ if (strlen(t) + 1 > zsize) {
+ DPRINTF(("get_soa: zname(%lu) too small (%lu)",
+ (unsigned long)zsize,
+ (unsigned long)strlen(t) + 1));
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ strcpy(zname, t);
+ rdata = ns_rr_rdata(rr);
+ if (ns_name_uncompress(resp, ns_msg_end(msg), rdata,
+ mname, msize) < 0) {
+ DPRINTF(("get_soa: ns_name_uncompress failed")
+ );
+ goto cleanup;
+ }
+ if (save_ns(statp, &msg, ns_s_ns,
+ zname, class, opts, nsrrsp) < 0) {
+ DPRINTF(("get_soa: save_ns failed"));
+ goto cleanup;
+ }
+ free(resp);
+ return (0);
+ }
+
+ /* If we're out of labels, then not even "." has an SOA! */
+ if (*dname == '\0')
+ break;
+
+ /* Find label-terminating "."; top of loop will skip it. */
+ while (*dname != '.') {
+ if (*dname == '\\')
+ if (*++dname == '\0') {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ dname++;
+ }
+ }
+ DPRINTF(("get_soa: out of labels"));
+ errno = EDESTADDRREQ;
+ cleanup:
+ if (resp != NULL)
+ free(resp);
+ return (-1);
+}
+
+static int
+get_ns(res_state statp, const char *zname, ns_class class, int opts,
+ rrset_ns *nsrrsp)
+{
+ u_char *resp;
+ ns_msg msg;
+ int n;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ return (-1);
+
+ /* Go and get the NS RRs for this zone. */
+ n = do_query(statp, zname, class, ns_t_ns, resp, &msg);
+ if (n != 0) {
+ DPRINTF(("get_ns: do_query('%s', %s) failed (%d)",
+ zname, p_class(class), n));
+ free(resp);
+ return (-1);
+ }
+
+ /* Remember the NS RRs and associated A RRs that came back. */
+ if (save_ns(statp, &msg, ns_s_an, zname, class, opts, nsrrsp) < 0) {
+ DPRINTF(("get_ns save_ns('%s', %s) failed",
+ zname, p_class(class)));
+ free(resp);
+ return (-1);
+ }
+
+ free(resp);
+ return (0);
+}
+
+static int
+get_glue(res_state statp, ns_class class, int opts, rrset_ns *nsrrsp) {
+ rr_ns *nsrr, *nsrr_n;
+ u_char *resp;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ return(-1);
+
+ /* Go and get the A RRs for each empty NS RR on our list. */
+ for (nsrr = HEAD(*nsrrsp); nsrr != NULL; nsrr = nsrr_n) {
+ ns_msg msg;
+ int n;
+
+ nsrr_n = NEXT(nsrr, link);
+
+ if ((nsrr->flags & RR_NS_HAVE_V4) == 0) {
+ n = do_query(statp, nsrr->name, class, ns_t_a,
+ resp, &msg);
+ if (n < 0) {
+ DPRINTF(
+ ("get_glue: do_query('%s', %s') failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF((
+ "get_glue: do_query('%s', %s') CNAME or DNAME found",
+ nsrr->name, p_class(class)));
+ }
+ if (save_a(statp, &msg, ns_s_an, nsrr->name, class,
+ opts, nsrr) < 0) {
+ DPRINTF(("get_glue: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ }
+
+ if ((nsrr->flags & RR_NS_HAVE_V6) == 0) {
+ n = do_query(statp, nsrr->name, class, ns_t_aaaa,
+ resp, &msg);
+ if (n < 0) {
+ DPRINTF(
+ ("get_glue: do_query('%s', %s') failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF((
+ "get_glue: do_query('%s', %s') CNAME or DNAME found",
+ nsrr->name, p_class(class)));
+ }
+ if (save_a(statp, &msg, ns_s_an, nsrr->name, class,
+ opts, nsrr) < 0) {
+ DPRINTF(("get_glue: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ }
+
+ /* If it's still empty, it's just chaff. */
+ if (EMPTY(nsrr->addrs)) {
+ DPRINTF(("get_glue: removing empty '%s' NS",
+ nsrr->name));
+ free_nsrr(nsrrsp, nsrr);
+ }
+ }
+ free(resp);
+ return (0);
+
+ cleanup:
+ free(resp);
+ return (-1);
+}
+
+static int
+save_ns(res_state statp, ns_msg *msg, ns_sect sect,
+ const char *owner, ns_class class, int opts,
+ rrset_ns *nsrrsp)
+{
+ int i;
+
+ for (i = 0; i < ns_msg_count(*msg, sect); i++) {
+ char tname[MAXDNAME];
+ const u_char *rdata;
+ rr_ns *nsrr;
+ ns_rr rr;
+
+ if (ns_parserr(msg, sect, i, &rr) < 0) {
+ DPRINTF(("save_ns: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ return (-1);
+ }
+ if (ns_rr_type(rr) != ns_t_ns ||
+ ns_rr_class(rr) != class ||
+ ns_samename(ns_rr_name(rr), owner) != 1)
+ continue;
+ nsrr = find_ns(nsrrsp, ns_rr_name(rr));
+ if (nsrr == NULL) {
+ nsrr = malloc(sizeof *nsrr);
+ if (nsrr == NULL) {
+ DPRINTF(("save_ns: malloc failed"));
+ return (-1);
+ }
+ rdata = ns_rr_rdata(rr);
+ if (ns_name_uncompress(ns_msg_base(*msg),
+ ns_msg_end(*msg), rdata,
+ tname, sizeof tname) < 0) {
+ DPRINTF(("save_ns: ns_name_uncompress failed")
+ );
+ free(nsrr);
+ return (-1);
+ }
+ nsrr->name = strdup(tname);
+ if (nsrr->name == NULL) {
+ DPRINTF(("save_ns: strdup failed"));
+ free(nsrr);
+ return (-1);
+ }
+ INIT_LINK(nsrr, link);
+ INIT_LIST(nsrr->addrs);
+ nsrr->flags = 0;
+ APPEND(*nsrrsp, nsrr, link);
+ }
+ if (save_a(statp, msg, ns_s_ar,
+ nsrr->name, class, opts, nsrr) < 0) {
+ DPRINTF(("save_ns: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+static int
+save_a(res_state statp, ns_msg *msg, ns_sect sect,
+ const char *owner, ns_class class, int opts,
+ rr_ns *nsrr)
+{
+ int i;
+
+ for (i = 0; i < ns_msg_count(*msg, sect); i++) {
+ ns_rr rr;
+ rr_a *arr;
+
+ if (ns_parserr(msg, sect, i, &rr) < 0) {
+ DPRINTF(("save_a: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ return (-1);
+ }
+ if ((ns_rr_type(rr) != ns_t_a &&
+ ns_rr_type(rr) != ns_t_aaaa) ||
+ ns_rr_class(rr) != class ||
+ ns_samename(ns_rr_name(rr), owner) != 1 ||
+ ns_rr_rdlen(rr) != NS_INADDRSZ)
+ continue;
+ if ((opts & RES_IPV6ONLY) != 0 && ns_rr_type(rr) != ns_t_aaaa)
+ continue;
+ if ((opts & RES_IPV4ONLY) != 0 && ns_rr_type(rr) != ns_t_a)
+ continue;
+ arr = malloc(sizeof *arr);
+ if (arr == NULL) {
+ DPRINTF(("save_a: malloc failed"));
+ return (-1);
+ }
+ INIT_LINK(arr, link);
+ memset(&arr->addr, 0, sizeof(arr->addr));
+ switch (ns_rr_type(rr)) {
+ case ns_t_a:
+ arr->addr.sin.sin_family = AF_INET;
+#ifdef HAVE_SA_LEN
+ arr->addr.sin.sin_len = sizeof(arr->addr.sin);
+#endif
+ memcpy(&arr->addr.sin.sin_addr, ns_rr_rdata(rr),
+ NS_INADDRSZ);
+ arr->addr.sin.sin_port = htons(NAMESERVER_PORT);
+ nsrr->flags |= RR_NS_HAVE_V4;
+ break;
+ case ns_t_aaaa:
+ arr->addr.sin6.sin6_family = AF_INET6;
+#ifdef HAVE_SA_LEN
+ arr->addr.sin6.sin6_len = sizeof(arr->addr.sin6);
+#endif
+ memcpy(&arr->addr.sin6.sin6_addr, ns_rr_rdata(rr), 16);
+ arr->addr.sin.sin_port = htons(NAMESERVER_PORT);
+ nsrr->flags |= RR_NS_HAVE_V6;
+ break;
+ default:
+ abort();
+ }
+ APPEND(nsrr->addrs, arr, link);
+ }
+ return (0);
+}
+
+static void
+free_nsrrset(rrset_ns *nsrrsp) {
+ rr_ns *nsrr;
+
+ while ((nsrr = HEAD(*nsrrsp)) != NULL)
+ free_nsrr(nsrrsp, nsrr);
+}
+
+static void
+free_nsrr(rrset_ns *nsrrsp, rr_ns *nsrr) {
+ rr_a *arr;
+ char *tmp;
+
+ while ((arr = HEAD(nsrr->addrs)) != NULL) {
+ UNLINK(nsrr->addrs, arr, link);
+ free(arr);
+ }
+ DE_CONST(nsrr->name, tmp);
+ free(tmp);
+ UNLINK(*nsrrsp, nsrr, link);
+ free(nsrr);
+}
+
+static rr_ns *
+find_ns(rrset_ns *nsrrsp, const char *dname) {
+ rr_ns *nsrr;
+
+ for (nsrr = HEAD(*nsrrsp); nsrr != NULL; nsrr = NEXT(nsrr, link))
+ if (ns_samename(nsrr->name, dname) == 1)
+ return (nsrr);
+ return (NULL);
+}
+
+static int
+do_query(res_state statp, const char *dname, ns_class class, ns_type qtype,
+ u_char *resp, ns_msg *msg)
+{
+ u_char req[NS_PACKETSZ];
+ int i, n;
+
+ n = res_nmkquery(statp, ns_o_query, dname, class, qtype,
+ NULL, 0, NULL, req, NS_PACKETSZ);
+ if (n < 0) {
+ DPRINTF(("do_query: res_nmkquery failed"));
+ return (-1);
+ }
+ n = res_nsend(statp, req, n, resp, NS_MAXMSG);
+ if (n < 0) {
+ DPRINTF(("do_query: res_nsend failed"));
+ return (-1);
+ }
+ if (n == 0) {
+ DPRINTF(("do_query: res_nsend returned 0"));
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (ns_initparse(resp, n, msg) < 0) {
+ DPRINTF(("do_query: ns_initparse failed"));
+ return (-1);
+ }
+ n = 0;
+ for (i = 0; i < ns_msg_count(*msg, ns_s_an); i++) {
+ ns_rr rr;
+
+ if (ns_parserr(msg, ns_s_an, i, &rr) < 0) {
+ DPRINTF(("do_query: ns_parserr failed"));
+ return (-1);
+ }
+ n += (ns_rr_class(rr) == class &&
+ (ns_rr_type(rr) == ns_t_cname ||
+ ns_rr_type(rr) == ns_t_dname));
+ }
+ return (n);
+}
+
+static void
+res_dprintf(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ fputs(";; res_findzonecut: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_init.c b/usr/src/lib/libresolv2_joy/common/resolv/res_init.c
new file mode 100644
index 0000000000..10254d1931
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_init.c
@@ -0,0 +1,958 @@
+/*
+ * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+/*
+ * Copyright (c) 1985, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_init.c 8.1 (Berkeley) 6/7/93";
+static const char rcsid[] = "$Id: res_init.c,v 1.26 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <netdb.h>
+
+#ifndef HAVE_MD5
+# include "../dst/md5.h"
+#else
+# ifdef SOLARIS2
+# include <sys/md5.h>
+# endif
+#endif
+#ifndef _MD5_H_
+# define _MD5_H_ 1 /*%< make sure we do not include rsaref md5.h file */
+#endif
+
+
+#include "port_after.h"
+
+/* ensure that sockaddr_in6 and IN6ADDR_ANY_INIT are declared / defined */
+#include <resolv_joy.h>
+
+/* ISC purposely put port_after.h before <resolv.h> to force in6 stuff
+ * (above) so we explicitly include port_resolv.h here */
+#include "port_resolv.h"
+
+#include "res_private.h"
+
+/*% Options. Should all be left alone. */
+#define RESOLVSORT
+#define DEBUG
+
+#ifdef SUNW_INITCHKIF
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <sys/sockio.h>
+#define MAXIFS 8192
+#endif /* SUNW_INITCHKIF */
+
+#ifdef SOLARIS2
+#include <sys/systeminfo.h>
+#endif
+
+static void res_setoptions __P((res_state, const char *, const char *));
+
+#ifdef RESOLVSORT
+static const char sort_mask[] = "/&";
+#define ISSORTMASK(ch) (strchr(sort_mask, ch) != NULL)
+static u_int32_t net_mask __P((struct in_addr));
+#endif
+
+#if !defined(isascii) /*%< XXX - could be a function */
+# define isascii(c) (!(c & 0200))
+#endif
+
+/*
+ * Resolver state default settings.
+ */
+
+/*%
+ * Set up default settings. If the configuration file exist, the values
+ * there will have precedence. Otherwise, the server address is set to
+ * INADDR_ANY and the default domain name comes from the gethostname().
+ *
+ * An interrim version of this code (BIND 4.9, pre-4.4BSD) used 127.0.0.1
+ * rather than INADDR_ANY ("0.0.0.0") as the default name server address
+ * since it was noted that INADDR_ANY actually meant ``the first interface
+ * you "ifconfig"'d at boot time'' and if this was a SLIP or PPP interface,
+ * it had to be "up" in order for you to reach your own name server. It
+ * was later decided that since the recommended practice is to always
+ * install local static routes through 127.0.0.1 for all your network
+ * interfaces, that we could solve this problem without a code change.
+ *
+ * The configuration file should always be used, since it is the only way
+ * to specify a default domain. If you are running a server on your local
+ * machine, you should say "nameserver 0.0.0.0" or "nameserver 127.0.0.1"
+ * in the configuration file.
+ *
+ * Return 0 if completes successfully, -1 on error
+ */
+int
+res_ninit(res_state statp) {
+ extern int __res_vinit(res_state, int);
+ return (__res_vinit(statp, 0));
+}
+
+/*% This function has to be reachable by res_data.c but not publically. */
+int
+__res_vinit(res_state statp, int preinit) {
+ register FILE *fp;
+ register char *cp, **pp;
+ register int n;
+ char buf[BUFSIZ];
+ int nserv = 0; /*%< number of nameserver records read from file */
+ int haveenv = 0;
+ int havesearch = 0;
+#ifdef RESOLVSORT
+ int nsort = 0;
+ char *net;
+#endif
+ int dots;
+ union res_sockaddr_union u[2];
+ int maxns = MAXNS;
+
+ RES_SET_H_ERRNO(statp, 0);
+ if (statp->_u._ext.ext != NULL)
+ res_ndestroy(statp);
+
+ if (!preinit) {
+ statp->retrans = RES_TIMEOUT;
+ statp->retry = RES_DFLRETRY;
+ statp->options = RES_DEFAULT;
+ res_rndinit(statp);
+ statp->id = res_nrandomid(statp);
+ }
+
+ memset(u, 0, sizeof(u));
+#ifdef USELOOPBACK
+ u[nserv].sin.sin_addr = inet_makeaddr(IN_LOOPBACKNET, 1);
+#else
+ u[nserv].sin.sin_addr.s_addr = INADDR_ANY;
+#endif
+ u[nserv].sin.sin_family = AF_INET;
+ u[nserv].sin.sin_port = htons(NAMESERVER_PORT);
+#ifdef HAVE_SA_LEN
+ u[nserv].sin.sin_len = sizeof(struct sockaddr_in);
+#endif
+ nserv++;
+#ifdef HAS_INET6_STRUCTS
+#ifdef USELOOPBACK
+ u[nserv].sin6.sin6_addr = in6addr_loopback;
+#else
+ u[nserv].sin6.sin6_addr = in6addr_any;
+#endif
+ u[nserv].sin6.sin6_family = AF_INET6;
+ u[nserv].sin6.sin6_port = htons(NAMESERVER_PORT);
+#ifdef HAVE_SA_LEN
+ u[nserv].sin6.sin6_len = sizeof(struct sockaddr_in6);
+#endif
+ nserv++;
+#endif
+ statp->nscount = 0;
+ statp->ndots = 1;
+ statp->pfcode = 0;
+ statp->_vcsock = -1;
+ statp->_flags = 0;
+ statp->qhook = NULL;
+ statp->rhook = NULL;
+ statp->_u._ext.nscount = 0;
+ statp->_u._ext.ext = malloc(sizeof(*statp->_u._ext.ext));
+ if (statp->_u._ext.ext != NULL) {
+ memset(statp->_u._ext.ext, 0, sizeof(*statp->_u._ext.ext));
+ statp->_u._ext.ext->nsaddrs[0].sin = statp->nsaddr;
+ strcpy(statp->_u._ext.ext->nsuffix, "ip6.arpa");
+ strcpy(statp->_u._ext.ext->nsuffix2, "ip6.int");
+ } else {
+ /*
+ * Historically res_init() rarely, if at all, failed.
+ * Examples and applications exist which do not check
+ * our return code. Furthermore several applications
+ * simply call us to get the systems domainname. So
+ * rather then immediately fail here we store the
+ * failure, which is returned later, in h_errno. And
+ * prevent the collection of 'nameserver' information
+ * by setting maxns to 0. Thus applications that fail
+ * to check our return code wont be able to make
+ * queries anyhow.
+ */
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
+ maxns = 0;
+ }
+#ifdef RESOLVSORT
+ statp->nsort = 0;
+#endif
+ res_setservers(statp, u, nserv);
+
+#ifdef SUNW_INITCHKIF
+/*
+ * Short circuit res_init() if no non-loopback interfaces are up. This is
+ * done to avoid boot delays if "dns" comes before "files" in nsswitch.conf.
+ * An additional fix has been added to this code, to count all external
+ * interfaces, which includes the IPv6 interfaces. If no external interfaces
+ * are found, an additional check is carried out to determine if any deprecated
+ * interfaces are up.
+ */
+ {
+ int s;
+ struct lifnum lifn;
+
+ if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("res_init: socket");
+ goto freedata;
+ }
+ lifn.lifn_family = AF_UNSPEC;
+ lifn.lifn_flags = LIFC_EXTERNAL_SOURCE;
+ if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
+ close(s);
+ goto freedata;
+ }
+ if (lifn.lifn_count == 0) {
+ /*
+ * Check if there are any deprecated interfaces up
+ */
+ struct lifconf lifc;
+ uchar_t *buf;
+ int buflen, i, int_up = 0;
+
+ lifn.lifn_flags = 0;
+ if ((ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) ||
+ (lifn.lifn_count < 1)) {
+ close(s);
+ goto freedata;
+ }
+
+ buflen = lifn.lifn_count * sizeof (struct lifreq);
+ buf = (uchar_t *)malloc(buflen);
+ if (buf == NULL) {
+ close(s);
+ goto freedata;
+ }
+
+ lifc.lifc_family = AF_UNSPEC;
+ lifc.lifc_flags = 0;
+ lifc.lifc_len = buflen;
+ lifc.lifc_lifcu.lifcu_buf = (caddr_t)buf;
+ if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
+ close(s);
+ free(buf);
+ goto freedata;
+ }
+
+ for (i = 0; i < lifn.lifn_count; ++i) {
+ struct lifreq *lreqp, lreq;
+
+ lreqp = (struct lifreq *)&lifc.lifc_req[i];
+ strlcpy(lreq.lifr_name, lreqp->lifr_name,
+ sizeof (lreq.lifr_name));
+ if (ioctl(s, SIOCGLIFFLAGS, &lreq) < 0) {
+ close(s);
+ free(buf);
+ goto freedata;
+ }
+ if ((lreq.lifr_flags & IFF_UP) &&
+ !(lreq.lifr_flags & IFF_NOLOCAL) &&
+ !(lreq.lifr_flags & IFF_NOXMIT) &&
+ !(lreq.lifr_flags & IFF_LOOPBACK)) {
+ int_up = 1;
+ break;
+ }
+ }
+ free(buf);
+
+ if (!int_up) {
+ close(s);
+ goto freedata;
+ }
+ }
+ close(s);
+ }
+#endif /* SUNW_INITCHKIF */
+
+#ifdef SOLARIS2
+ /*
+ * The old libresolv derived the defaultdomain from NIS/NIS+.
+ * We want to keep this behaviour
+ */
+ {
+ char buf[sizeof(statp->defdname)], *cp;
+ int ret;
+
+ if ((ret = sysinfo(SI_SRPC_DOMAIN, buf, sizeof(buf))) > 0 &&
+ (unsigned int)ret <= sizeof(buf)) {
+ if (buf[0] == '+')
+ buf[0] = '.';
+ cp = strchr(buf, '.');
+ cp = (cp == NULL) ? buf : (cp + 1);
+ strncpy(statp->defdname, cp,
+ sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ }
+ }
+#endif /* SOLARIS2 */
+
+ /* Allow user to override the local domain definition */
+ if ((cp = getenv("LOCALDOMAIN")) != NULL) {
+ (void)strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ haveenv++;
+
+ /*
+ * Set search list to be blank-separated strings
+ * from rest of env value. Permits users of LOCALDOMAIN
+ * to still have a search list, and anyone to set the
+ * one that they want to use as an individual (even more
+ * important now that the rfc1535 stuff restricts searches)
+ */
+ cp = statp->defdname;
+ pp = statp->dnsrch;
+ *pp++ = cp;
+ for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) {
+ if (*cp == '\n') /*%< silly backwards compat */
+ break;
+ else if (*cp == ' ' || *cp == '\t') {
+ *cp = 0;
+ n = 1;
+ } else if (n) {
+ *pp++ = cp;
+ n = 0;
+ havesearch = 1;
+ }
+ }
+ /* null terminate last domain if there are excess */
+ while (*cp != '\0' && *cp != ' ' && *cp != '\t' && *cp != '\n')
+ cp++;
+ *cp = '\0';
+ *pp++ = 0;
+ }
+
+#define MATCH(line, name) \
+ (!strncmp(line, name, sizeof(name) - 1) && \
+ (line[sizeof(name) - 1] == ' ' || \
+ line[sizeof(name) - 1] == '\t'))
+
+ nserv = 0;
+ if ((fp = fopen(_PATH_RESCONF, "r")) != NULL) {
+ /* read the config file */
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ /* skip comments */
+ if (*buf == ';' || *buf == '#')
+ continue;
+ /* read default domain name */
+ if (MATCH(buf, "domain")) {
+ if (haveenv) /*%< skip if have from environ */
+ continue;
+ cp = buf + sizeof("domain") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if ((*cp == '\0') || (*cp == '\n'))
+ continue;
+ strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ if ((cp = strpbrk(statp->defdname, " \t\n")) != NULL)
+ *cp = '\0';
+ havesearch = 0;
+ continue;
+ }
+ /* set search list */
+ if (MATCH(buf, "search")) {
+ if (haveenv) /*%< skip if have from environ */
+ continue;
+ cp = buf + sizeof("search") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if ((*cp == '\0') || (*cp == '\n'))
+ continue;
+ strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ if ((cp = strchr(statp->defdname, '\n')) != NULL)
+ *cp = '\0';
+ /*
+ * Set search list to be blank-separated strings
+ * on rest of line.
+ */
+ cp = statp->defdname;
+ pp = statp->dnsrch;
+ *pp++ = cp;
+ for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) {
+ if (*cp == ' ' || *cp == '\t') {
+ *cp = 0;
+ n = 1;
+ } else if (n) {
+ *pp++ = cp;
+ n = 0;
+ }
+ }
+ /* null terminate last domain if there are excess */
+ while (*cp != '\0' && *cp != ' ' && *cp != '\t')
+ cp++;
+ *cp = '\0';
+ *pp++ = 0;
+ havesearch = 1;
+ continue;
+ }
+ /* read nameservers to query */
+ if (MATCH(buf, "nameserver") && nserv < maxns) {
+ struct addrinfo hints, *ai;
+ char sbuf[NI_MAXSERV];
+ const size_t minsiz =
+ sizeof(statp->_u._ext.ext->nsaddrs[0]);
+
+ cp = buf + sizeof("nameserver") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ cp[strcspn(cp, ";# \t\n")] = '\0';
+ if ((*cp != '\0') && (*cp != '\n')) {
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM; /*dummy*/
+ hints.ai_flags = AI_NUMERICHOST;
+ sprintf(sbuf, "%u", NAMESERVER_PORT);
+ if (getaddrinfo(cp, sbuf, &hints, &ai) == 0 &&
+ ai->ai_addrlen <= minsiz) {
+ if (statp->_u._ext.ext != NULL) {
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ }
+ if (ai->ai_addrlen <=
+ sizeof(statp->nsaddr_list[nserv])) {
+ memcpy(&statp->nsaddr_list[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ } else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ freeaddrinfo(ai);
+ nserv++;
+ }
+ }
+ continue;
+ }
+#ifdef RESOLVSORT
+ if (MATCH(buf, "sortlist")) {
+ struct in_addr a;
+
+ cp = buf + sizeof("sortlist") - 1;
+ while (nsort < MAXRESOLVSORT) {
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if (*cp == '\0' || *cp == '\n' || *cp == ';')
+ break;
+ net = cp;
+ while (*cp && !ISSORTMASK(*cp) && *cp != ';' &&
+ isascii(*cp) && !isspace((unsigned char)*cp))
+ cp++;
+ n = *cp;
+ *cp = 0;
+ if (inet_aton(net, &a)) {
+ statp->sort_list[nsort].addr = a;
+ if (ISSORTMASK(n)) {
+ *cp++ = n;
+ net = cp;
+ while (*cp && *cp != ';' &&
+ isascii(*cp) &&
+ !isspace((unsigned char)*cp))
+ cp++;
+ n = *cp;
+ *cp = 0;
+ if (inet_aton(net, &a)) {
+ statp->sort_list[nsort].mask = a.s_addr;
+ } else {
+ statp->sort_list[nsort].mask =
+ net_mask(statp->sort_list[nsort].addr);
+ }
+ } else {
+ statp->sort_list[nsort].mask =
+ net_mask(statp->sort_list[nsort].addr);
+ }
+ nsort++;
+ }
+ *cp = n;
+ }
+ continue;
+ }
+#endif
+ if (MATCH(buf, "options")) {
+ res_setoptions(statp, buf + sizeof("options") - 1, "conf");
+ continue;
+ }
+ }
+ if (nserv > 0)
+ statp->nscount = nserv;
+#ifdef RESOLVSORT
+ statp->nsort = nsort;
+#endif
+ (void) fclose(fp);
+ }
+/*
+ * Last chance to get a nameserver. This should not normally
+ * be necessary
+ */
+#ifdef NO_RESOLV_CONF
+ if(nserv == 0)
+ nserv = get_nameservers(statp);
+#endif
+
+ if (statp->defdname[0] == 0 &&
+ gethostname(buf, sizeof(statp->defdname) - 1) == 0 &&
+ (cp = strchr(buf, '.')) != NULL)
+ strcpy(statp->defdname, cp + 1);
+
+ /* find components of local domain that might be searched */
+ if (havesearch == 0) {
+ pp = statp->dnsrch;
+ *pp++ = statp->defdname;
+ *pp = NULL;
+
+ dots = 0;
+ for (cp = statp->defdname; *cp; cp++)
+ dots += (*cp == '.');
+
+ cp = statp->defdname;
+ while (pp < statp->dnsrch + MAXDFLSRCH) {
+ if (dots < LOCALDOMAINPARTS)
+ break;
+ cp = strchr(cp, '.') + 1; /*%< we know there is one */
+ *pp++ = cp;
+ dots--;
+ }
+ *pp = NULL;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG) {
+ printf(";; res_init()... default dnsrch list:\n");
+ for (pp = statp->dnsrch; *pp; pp++)
+ printf(";;\t%s\n", *pp);
+ printf(";;\t..END..\n");
+ }
+#endif
+ }
+
+ if ((cp = getenv("RES_OPTIONS")) != NULL)
+ res_setoptions(statp, cp, "env");
+ statp->options |= RES_INIT;
+ return (statp->res_h_errno);
+#ifdef SUNW_INITCHKIF
+freedata:
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
+ if (statp->_u._ext.ext != NULL) {
+ free(statp->_u._ext.ext);
+ statp->_u._ext.ext = NULL;
+ }
+ return (-1);
+#endif /* SUNW_INITCHKIF */
+
+}
+
+static void
+res_setoptions(res_state statp, const char *options, const char *source)
+{
+ const char *cp = options;
+ int i;
+ struct __res_state_ext *ext = statp->_u._ext.ext;
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_setoptions(\"%s\", \"%s\")...\n",
+ options, source);
+#endif
+ while (*cp) {
+ /* skip leading and inner runs of spaces */
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ /* search for and process individual options */
+ if (!strncmp(cp, "ndots:", sizeof("ndots:") - 1)) {
+ i = atoi(cp + sizeof("ndots:") - 1);
+ if (i <= RES_MAXNDOTS)
+ statp->ndots = i;
+ else
+ statp->ndots = RES_MAXNDOTS;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\tndots=%d\n", statp->ndots);
+#endif
+ } else if (!strncmp(cp, "timeout:", sizeof("timeout:") - 1)) {
+ i = atoi(cp + sizeof("timeout:") - 1);
+ if (i <= RES_MAXRETRANS)
+ statp->retrans = i;
+ else
+ statp->retrans = RES_MAXRETRANS;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\ttimeout=%d\n", statp->retrans);
+#endif
+#ifdef SOLARIS2
+ } else if (!strncmp(cp, "retrans:", sizeof("retrans:") - 1)) {
+ /*
+ * For backward compatibility, 'retrans' is
+ * supported as an alias for 'timeout', though
+ * without an imposed maximum.
+ */
+ statp->retrans = atoi(cp + sizeof("retrans:") - 1);
+ } else if (!strncmp(cp, "retry:", sizeof("retry:") - 1)){
+ /*
+ * For backward compatibility, 'retry' is
+ * supported as an alias for 'attempts', though
+ * without an imposed maximum.
+ */
+ statp->retry = atoi(cp + sizeof("retry:") - 1);
+#endif /* SOLARIS2 */
+ } else if (!strncmp(cp, "attempts:", sizeof("attempts:") - 1)){
+ i = atoi(cp + sizeof("attempts:") - 1);
+ if (i <= RES_MAXRETRY)
+ statp->retry = i;
+ else
+ statp->retry = RES_MAXRETRY;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\tattempts=%d\n", statp->retry);
+#endif
+ } else if (!strncmp(cp, "debug", sizeof("debug") - 1)) {
+#ifdef DEBUG
+ if (!(statp->options & RES_DEBUG)) {
+ printf(";; res_setoptions(\"%s\", \"%s\")..\n",
+ options, source);
+ statp->options |= RES_DEBUG;
+ }
+ printf(";;\tdebug\n");
+#endif
+ } else if (!strncmp(cp, "no_tld_query",
+ sizeof("no_tld_query") - 1) ||
+ !strncmp(cp, "no-tld-query",
+ sizeof("no-tld-query") - 1)) {
+ statp->options |= RES_NOTLDQUERY;
+ } else if (!strncmp(cp, "inet6", sizeof("inet6") - 1)) {
+ statp->options |= RES_USE_INET6;
+ } else if (!strncmp(cp, "rotate", sizeof("rotate") - 1)) {
+ statp->options |= RES_ROTATE;
+ } else if (!strncmp(cp, "no-check-names",
+ sizeof("no-check-names") - 1)) {
+ statp->options |= RES_NOCHECKNAME;
+ }
+#ifdef RES_USE_EDNS0
+ else if (!strncmp(cp, "edns0", sizeof("edns0") - 1)) {
+ statp->options |= RES_USE_EDNS0;
+ }
+#endif
+ else if (!strncmp(cp, "dname", sizeof("dname") - 1)) {
+ statp->options |= RES_USE_DNAME;
+ }
+ else if (!strncmp(cp, "nibble:", sizeof("nibble:") - 1)) {
+ if (ext == NULL)
+ goto skip;
+ cp += sizeof("nibble:") - 1;
+ i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix) - 1);
+ strncpy(ext->nsuffix, cp, i);
+ ext->nsuffix[i] = '\0';
+ }
+ else if (!strncmp(cp, "nibble2:", sizeof("nibble2:") - 1)) {
+ if (ext == NULL)
+ goto skip;
+ cp += sizeof("nibble2:") - 1;
+ i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix2) - 1);
+ strncpy(ext->nsuffix2, cp, i);
+ ext->nsuffix2[i] = '\0';
+ }
+ else if (!strncmp(cp, "v6revmode:", sizeof("v6revmode:") - 1)) {
+ cp += sizeof("v6revmode:") - 1;
+ /* "nibble" and "bitstring" used to be valid */
+ if (!strncmp(cp, "single", sizeof("single") - 1)) {
+ statp->options |= RES_NO_NIBBLE2;
+ } else if (!strncmp(cp, "both", sizeof("both") - 1)) {
+ statp->options &=
+ ~RES_NO_NIBBLE2;
+ }
+ }
+ else {
+ /* XXX - print a warning here? */
+ }
+ skip:
+ /* skip to next run of spaces */
+ while (*cp && *cp != ' ' && *cp != '\t')
+ cp++;
+ }
+}
+
+#ifdef RESOLVSORT
+/* XXX - should really support CIDR which means explicit masks always. */
+static u_int32_t
+net_mask(in) /*!< XXX - should really use system's version of this */
+ struct in_addr in;
+{
+ register u_int32_t i = ntohl(in.s_addr);
+
+ if (IN_CLASSA(i))
+ return (htonl(IN_CLASSA_NET));
+ else if (IN_CLASSB(i))
+ return (htonl(IN_CLASSB_NET));
+ return (htonl(IN_CLASSC_NET));
+}
+#endif
+
+void
+res_rndinit(res_state statp)
+{
+ struct timeval now;
+ u_int32_t u32;
+ u_int16_t u16;
+
+ gettimeofday(&now, NULL);
+ u32 = now.tv_sec;
+ memcpy(statp->_u._ext._rnd, &u32, 4);
+ u32 = now.tv_usec;
+ memcpy(statp->_u._ext._rnd + 4, &u32, 4);
+ u32 += now.tv_sec;
+ memcpy(statp->_u._ext._rnd + 8, &u32, 4);
+ u16 = getpid();
+ memcpy(statp->_u._ext._rnd + 12, &u16, 2);
+
+}
+
+u_int
+res_nrandomid(res_state statp) {
+ struct timeval now;
+ u_int16_t u16;
+ MD5_CTX ctx;
+
+ gettimeofday(&now, NULL);
+ u16 = (u_int16_t) (now.tv_sec ^ now.tv_usec);
+
+ memcpy(statp->_u._ext._rnd + 14, &u16, 2);
+#ifndef HAVE_MD5
+ MD5_Init(&ctx);
+ MD5_Update(&ctx, statp->_u._ext._rnd, 16);
+ MD5_Final(statp->_u._ext._rnd, &ctx);
+#else
+ MD5Init(&ctx);
+ MD5Update(&ctx, statp->_u._ext._rnd, 16);
+ MD5Final(statp->_u._ext._rnd, &ctx);
+#endif
+ memcpy(&u16, statp->_u._ext._rnd + 14, 2);
+ return ((u_int) u16);
+}
+
+/*%
+ * This routine is for closing the socket if a virtual circuit is used and
+ * the program wants to close it. This provides support for endhostent()
+ * which expects to close the socket.
+ *
+ * This routine is not expected to be user visible.
+ */
+void
+res_nclose(res_state statp) {
+ int ns;
+
+ if (statp->_vcsock >= 0) {
+ (void) close(statp->_vcsock);
+ statp->_vcsock = -1;
+ statp->_flags &= ~(RES_F_VC | RES_F_CONN);
+ }
+ for (ns = 0; ns < statp->_u._ext.nscount; ns++) {
+ if (statp->_u._ext.nssocks[ns] != -1) {
+ (void) close(statp->_u._ext.nssocks[ns]);
+ statp->_u._ext.nssocks[ns] = -1;
+ }
+ }
+}
+
+void
+res_ndestroy(res_state statp) {
+ res_nclose(statp);
+ if (statp->_u._ext.ext != NULL)
+ free(statp->_u._ext.ext);
+ statp->options &= ~RES_INIT;
+ statp->_u._ext.ext = NULL;
+}
+
+const char *
+res_get_nibblesuffix(res_state statp) {
+ if (statp->_u._ext.ext)
+ return (statp->_u._ext.ext->nsuffix);
+ return ("ip6.arpa");
+}
+
+const char *
+res_get_nibblesuffix2(res_state statp) {
+ if (statp->_u._ext.ext)
+ return (statp->_u._ext.ext->nsuffix2);
+ return ("ip6.int");
+}
+
+void
+res_setservers(res_state statp, const union res_sockaddr_union *set, int cnt) {
+ int i, nserv;
+ size_t size;
+
+ /* close open servers */
+ res_nclose(statp);
+
+ /* cause rtt times to be forgotten */
+ statp->_u._ext.nscount = 0;
+
+ nserv = 0;
+ for (i = 0; i < cnt && nserv < MAXNS; i++) {
+ switch (set->sin.sin_family) {
+ case AF_INET:
+ size = sizeof(set->sin);
+ if (statp->_u._ext.ext)
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ &set->sin, size);
+ if (size <= sizeof(statp->nsaddr_list[nserv]))
+ memcpy(&statp->nsaddr_list[nserv],
+ &set->sin, size);
+ else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ nserv++;
+ break;
+
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ size = sizeof(set->sin6);
+ if (statp->_u._ext.ext)
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ &set->sin6, size);
+ if (size <= sizeof(statp->nsaddr_list[nserv]))
+ memcpy(&statp->nsaddr_list[nserv],
+ &set->sin6, size);
+ else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ nserv++;
+ break;
+#endif
+
+ default:
+ break;
+ }
+ set++;
+ }
+ statp->nscount = nserv;
+
+}
+
+int
+res_getservers(res_state statp, union res_sockaddr_union *set, int cnt) {
+ int i;
+ size_t size;
+ u_int16_t family;
+
+ for (i = 0; i < statp->nscount && i < cnt; i++) {
+ if (statp->_u._ext.ext)
+ family = statp->_u._ext.ext->nsaddrs[i].sin.sin_family;
+ else
+ family = statp->nsaddr_list[i].sin_family;
+
+ switch (family) {
+ case AF_INET:
+ size = sizeof(set->sin);
+ if (statp->_u._ext.ext)
+ memcpy(&set->sin,
+ &statp->_u._ext.ext->nsaddrs[i],
+ size);
+ else
+ memcpy(&set->sin, &statp->nsaddr_list[i],
+ size);
+ break;
+
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ size = sizeof(set->sin6);
+ if (statp->_u._ext.ext)
+ memcpy(&set->sin6,
+ &statp->_u._ext.ext->nsaddrs[i],
+ size);
+ else
+ memcpy(&set->sin6, &statp->nsaddr_list[i],
+ size);
+ break;
+#endif
+
+ default:
+ set->sin.sin_family = 0;
+ break;
+ }
+ set++;
+ }
+ return (statp->nscount);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c b/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c
new file mode 100644
index 0000000000..cf36855e9d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996, 1997, 1988, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_mkquery.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_mkquery.c,v 1.10 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#ifdef SUNW_CONFCHECK
+#include <sys/socket.h>
+#include <errno.h>
+#include <sys/stat.h>
+#endif /* SUNW_CONFCHECK */
+
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+
+extern const char *_res_opcodes[];
+
+#ifdef SUNW_CONFCHECK
+static int _confcheck(res_state statp);
+#endif /* SUNW_CONFCHECK */
+
+
+/*%
+ * Form all types of queries.
+ * Returns the size of the result or -1.
+ */
+int
+res_nmkquery(res_state statp,
+ int op, /*!< opcode of query */
+ const char *dname, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ const u_char *data, /*!< resource record data */
+ int datalen, /*!< length of data */
+ const u_char *newrr_in, /*!< new rr for modify or append */
+ u_char *buf, /*!< buffer to put query */
+ int buflen) /*!< size of buffer */
+{
+ register HEADER *hp;
+ register u_char *cp, *ep;
+ register int n;
+ u_char *dnptrs[20], **dpp, **lastdnptr;
+
+ UNUSED(newrr_in);
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nmkquery(%s, %s, %s, %s)\n",
+ _res_opcodes[op], dname, p_class(class), p_type(type));
+#endif
+
+#ifdef SUNW_CONFCHECK
+ /*
+ * 1247019, 1265838, and 4034368: Check to see if we can
+ * bailout quickly.
+ */
+ if (_confcheck(statp) == -1) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return(-1);
+ }
+#endif /* SUNW_CONFCHECK */
+
+ /*
+ * Initialize header fields.
+ */
+ if ((buf == NULL) || (buflen < HFIXEDSZ))
+ return (-1);
+ memset(buf, 0, HFIXEDSZ);
+ hp = (HEADER *) buf;
+ statp->id = res_nrandomid(statp);
+ hp->id = htons(statp->id);
+ hp->opcode = op;
+ hp->rd = (statp->options & RES_RECURSE) != 0U;
+ hp->rcode = NOERROR;
+ cp = buf + HFIXEDSZ;
+ ep = buf + buflen;
+ dpp = dnptrs;
+ *dpp++ = buf;
+ *dpp++ = NULL;
+ lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+ /*
+ * perform opcode specific processing
+ */
+ switch (op) {
+ case QUERY: /*FALLTHROUGH*/
+ case NS_NOTIFY_OP:
+ if (ep - cp < QFIXEDSZ)
+ return (-1);
+ if ((n = dn_comp(dname, cp, ep - cp - QFIXEDSZ, dnptrs,
+ lastdnptr)) < 0)
+ return (-1);
+ cp += n;
+ ns_put16(type, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ hp->qdcount = htons(1);
+ if (op == QUERY || data == NULL)
+ break;
+ /*
+ * Make an additional record for completion domain.
+ */
+ if ((ep - cp) < RRFIXEDSZ)
+ return (-1);
+ n = dn_comp((const char *)data, cp, ep - cp - RRFIXEDSZ,
+ dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ns_put16(T_NULL, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ ns_put32(0, cp);
+ cp += INT32SZ;
+ ns_put16(0, cp);
+ cp += INT16SZ;
+ hp->arcount = htons(1);
+ break;
+
+ case IQUERY:
+ /*
+ * Initialize answer section
+ */
+ if (ep - cp < 1 + RRFIXEDSZ + datalen)
+ return (-1);
+ *cp++ = '\0'; /*%< no domain name */
+ ns_put16(type, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ ns_put32(0, cp);
+ cp += INT32SZ;
+ ns_put16(datalen, cp);
+ cp += INT16SZ;
+ if (datalen) {
+ memcpy(cp, data, datalen);
+ cp += datalen;
+ }
+ hp->ancount = htons(1);
+ break;
+
+ default:
+ return (-1);
+ }
+ return (cp - buf);
+}
+
+#ifdef RES_USE_EDNS0
+/* attach OPT pseudo-RR, as documented in RFC2671 (EDNS0). */
+
+int
+res_nopt(res_state statp,
+ int n0, /*%< current offset in buffer */
+ u_char *buf, /*%< buffer to put query */
+ int buflen, /*%< size of buffer */
+ int anslen) /*%< UDP answer buffer size */
+{
+ register HEADER *hp;
+ register u_char *cp, *ep;
+ u_int16_t flags = 0;
+
+#ifdef DEBUG
+ if ((statp->options & RES_DEBUG) != 0U)
+ printf(";; res_nopt()\n");
+#endif
+
+ hp = (HEADER *) buf;
+ cp = buf + n0;
+ ep = buf + buflen;
+
+ if ((ep - cp) < 1 + RRFIXEDSZ)
+ return (-1);
+
+ *cp++ = 0; /*%< "." */
+ ns_put16(ns_t_opt, cp); /*%< TYPE */
+ cp += INT16SZ;
+ ns_put16(anslen & 0xffff, cp); /*%< CLASS = UDP payload size */
+ cp += INT16SZ;
+ *cp++ = NOERROR; /*%< extended RCODE */
+ *cp++ = 0; /*%< EDNS version */
+
+ if (statp->options & RES_USE_DNSSEC) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_opt()... ENDS0 DNSSEC\n");
+#endif
+ flags |= NS_OPT_DNSSEC_OK;
+ }
+ ns_put16(flags, cp);
+ cp += INT16SZ;
+
+ ns_put16(0U, cp); /*%< RDLEN */
+ cp += INT16SZ;
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+
+ return (cp - buf);
+}
+
+/*
+ * Construct variable data (RDATA) block for OPT psuedo-RR, append it
+ * to the buffer, then update the RDLEN field (previously set to zero by
+ * res_nopt()) with the new RDATA length.
+ */
+int
+res_nopt_rdata(res_state statp,
+ int n0, /*%< current offset in buffer */
+ u_char *buf, /*%< buffer to put query */
+ int buflen, /*%< size of buffer */
+ u_char *rdata, /*%< ptr to start of opt rdata */
+ u_short code, /*%< OPTION-CODE */
+ u_short len, /*%< OPTION-LENGTH */
+ u_char *data) /*%< OPTION_DATA */
+{
+ register u_char *cp, *ep;
+
+#ifdef DEBUG
+ if ((statp->options & RES_DEBUG) != 0U)
+ printf(";; res_nopt_rdata()\n");
+#endif
+
+ cp = buf + n0;
+ ep = buf + buflen;
+
+ if ((ep - cp) < (4 + len))
+ return (-1);
+
+ if (rdata < (buf + 2) || rdata >= ep)
+ return (-1);
+
+ ns_put16(code, cp);
+ cp += INT16SZ;
+
+ ns_put16(len, cp);
+ cp += INT16SZ;
+
+ memcpy(cp, data, len);
+ cp += len;
+
+ len = cp - rdata;
+ ns_put16(len, rdata - 2); /* Update RDLEN field */
+
+ return (cp - buf);
+}
+#endif
+
+#ifdef SUNW_CONFCHECK
+
+/*
+ * Time out quickly if there is no /etc/resolv.conf and a TCP connection
+ * to the local DNS server fails.
+ */
+static int _confcheck(res_state statp)
+{
+ int ns;
+ struct stat rc_stat;
+ struct sockaddr_in ns_sin;
+
+ /* First, we check to see if /etc/resolv.conf exists.
+ * If it doesn't, then it is likely that the localhost is
+ * the nameserver.
+ */
+ if (stat(_PATH_RESCONF, &rc_stat) == -1 && errno == ENOENT) {
+
+ /* Next, we check to see if _res.nsaddr is set to loopback.
+ * If it isn't, it has been altered by the application
+ * explicitly and we then want to bail with success.
+ */
+ if (statp->nsaddr.sin_addr.S_un.S_addr ==
+ htonl(INADDR_LOOPBACK)) {
+
+ /* Lastly, we try to connect to the TCP port of the
+ * nameserver. If this fails, then we know that
+ * DNS is misconfigured and we can quickly exit.
+ */
+ ns = socket(AF_INET, SOCK_STREAM, 0);
+ IN_SET_LOOPBACK_ADDR(&ns_sin);
+ ns_sin.sin_port = htons(NAMESERVER_PORT);
+ if (connect(ns, (struct sockaddr *) &ns_sin,
+ sizeof ns_sin) == -1) {
+ close(ns);
+ return(-1);
+ }
+ else {
+ close(ns);
+
+ return(0);
+ }
+ }
+
+ return(0);
+ }
+
+ return (0);
+}
+#endif /* SUNW_CONFCHECK */
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c
new file mode 100644
index 0000000000..8f73e281d0
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * Based on the Dynamic DNS reference implementation by Viraj Bais
+ * &lt;viraj_bais@ccm.fm.intel.com>
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_mkupdate.c,v 1.10 2008/12/11 09:59:00 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <res_update.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+#define MAXPORT 1024
+
+static int getnum_str(u_char **, u_char *);
+static int gethexnum_str(u_char **, u_char *);
+static int getword_str(char *, int, u_char **, u_char *);
+static int getstr_str(char *, int, u_char **, u_char *);
+
+#define ShrinkBuffer(x) if ((buflen -= x) < 0) return (-2);
+
+/* Forward. */
+
+int res_protocolnumber(const char *);
+int res_servicenumber(const char *);
+
+/*%
+ * Form update packets.
+ * Returns the size of the resulting packet if no error
+ *
+ * On error,
+ * returns
+ *\li -1 if error in reading a word/number in rdata
+ * portion for update packets
+ *\li -2 if length of buffer passed is insufficient
+ *\li -3 if zone section is not the first section in
+ * the linked list, or section order has a problem
+ *\li -4 on a number overflow
+ *\li -5 unknown operation or no records
+ */
+int
+res_nmkupdate(res_state statp, ns_updrec *rrecp_in, u_char *buf, int buflen) {
+ ns_updrec *rrecp_start = rrecp_in;
+ HEADER *hp;
+ u_char *cp, *sp2, *startp, *endp;
+ int n, i, soanum, multiline;
+ ns_updrec *rrecp;
+ struct in_addr ina;
+ struct in6_addr in6a;
+ char buf2[MAXDNAME];
+ u_char buf3[MAXDNAME];
+ int section, numrrs = 0, counts[ns_s_max];
+ u_int16_t rtype, rclass;
+ u_int32_t n1, rttl;
+ u_char *dnptrs[20], **dpp, **lastdnptr;
+ int siglen, keylen, certlen;
+
+ /*
+ * Initialize header fields.
+ */
+ if ((buf == NULL) || (buflen < HFIXEDSZ))
+ return (-1);
+ memset(buf, 0, HFIXEDSZ);
+ hp = (HEADER *) buf;
+ statp->id = res_nrandomid(statp);
+ hp->id = htons(statp->id);
+ hp->opcode = ns_o_update;
+ hp->rcode = NOERROR;
+ cp = buf + HFIXEDSZ;
+ buflen -= HFIXEDSZ;
+ dpp = dnptrs;
+ *dpp++ = buf;
+ *dpp++ = NULL;
+ lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+
+ if (rrecp_start == NULL)
+ return (-5);
+ else if (rrecp_start->r_section != S_ZONE)
+ return (-3);
+
+ memset(counts, 0, sizeof counts);
+ for (rrecp = rrecp_start; rrecp; rrecp = NEXT(rrecp, r_glink)) {
+ numrrs++;
+ section = rrecp->r_section;
+ if (section < 0 || section >= ns_s_max)
+ return (-1);
+ counts[section]++;
+ for (i = section + 1; i < ns_s_max; i++)
+ if (counts[i])
+ return (-3);
+ rtype = rrecp->r_type;
+ rclass = rrecp->r_class;
+ rttl = rrecp->r_ttl;
+ /* overload class and type */
+ if (section == S_PREREQ) {
+ rttl = 0;
+ switch (rrecp->r_opcode) {
+ case YXDOMAIN:
+ rclass = C_ANY;
+ rtype = T_ANY;
+ rrecp->r_size = 0;
+ break;
+ case NXDOMAIN:
+ rclass = C_NONE;
+ rtype = T_ANY;
+ rrecp->r_size = 0;
+ break;
+ case NXRRSET:
+ rclass = C_NONE;
+ rrecp->r_size = 0;
+ break;
+ case YXRRSET:
+ if (rrecp->r_size == 0)
+ rclass = C_ANY;
+ break;
+ default:
+ fprintf(stderr,
+ "res_mkupdate: incorrect opcode: %d\n",
+ rrecp->r_opcode);
+ fflush(stderr);
+ return (-1);
+ }
+ } else if (section == S_UPDATE) {
+ switch (rrecp->r_opcode) {
+ case DELETE:
+ rclass = rrecp->r_size == 0 ? C_ANY : C_NONE;
+ break;
+ case ADD:
+ break;
+ default:
+ fprintf(stderr,
+ "res_mkupdate: incorrect opcode: %d\n",
+ rrecp->r_opcode);
+ fflush(stderr);
+ return (-1);
+ }
+ }
+
+ /*
+ * XXX appending default domain to owner name is omitted,
+ * fqdn must be provided
+ */
+ if ((n = dn_comp(rrecp->r_dname, cp, buflen, dnptrs,
+ lastdnptr)) < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n + 2*INT16SZ);
+ PUTSHORT(rtype, cp);
+ PUTSHORT(rclass, cp);
+ if (section == S_ZONE) {
+ if (numrrs != 1 || rrecp->r_type != T_SOA)
+ return (-3);
+ continue;
+ }
+ ShrinkBuffer(INT32SZ + INT16SZ);
+ PUTLONG(rttl, cp);
+ sp2 = cp; /*%< save pointer to length byte */
+ cp += INT16SZ;
+ if (rrecp->r_size == 0) {
+ if (section == S_UPDATE && rclass != C_ANY)
+ return (-1);
+ else {
+ PUTSHORT(0, sp2);
+ continue;
+ }
+ }
+ startp = rrecp->r_data;
+ endp = startp + rrecp->r_size - 1;
+ /* XXX this should be done centrally. */
+ switch (rrecp->r_type) {
+ case T_A:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (!inet_aton(buf2, &ina))
+ return (-1);
+ n1 = ntohl(ina.s_addr);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(n1, cp);
+ break;
+ case T_CNAME:
+ case T_MB:
+ case T_MG:
+ case T_MR:
+ case T_NS:
+ case T_PTR:
+ case ns_t_dname:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_MINFO:
+ case T_SOA:
+ case T_RP:
+ for (i = 0; i < 2; i++) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen,
+ dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ }
+ if (rrecp->r_type == T_SOA) {
+ ShrinkBuffer(5 * INT32SZ);
+ while (isspace(*startp) || !*startp)
+ startp++;
+ if (*startp == '(') {
+ multiline = 1;
+ startp++;
+ } else
+ multiline = 0;
+ /* serial, refresh, retry, expire, minimum */
+ for (i = 0; i < 5; i++) {
+ soanum = getnum_str(&startp, endp);
+ if (soanum < 0)
+ return (-1);
+ PUTLONG(soanum, cp);
+ }
+ if (multiline) {
+ while (isspace(*startp) || !*startp)
+ startp++;
+ if (*startp != ')')
+ return (-1);
+ }
+ }
+ break;
+ case T_MX:
+ case T_AFSDB:
+ case T_RT:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_SRV:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_PX:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ PUTSHORT(n, cp);
+ ShrinkBuffer(INT16SZ);
+ for (i = 0; i < 2; i++) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs,
+ lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ }
+ break;
+ case T_WKS: {
+ char bm[MAXPORT/8];
+ unsigned int maxbm = 0;
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (!inet_aton(buf2, &ina))
+ return (-1);
+ n1 = ntohl(ina.s_addr);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(n1, cp);
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if ((i = res_protocolnumber(buf2)) < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = i & 0xff;
+
+ for (i = 0; i < MAXPORT/8 ; i++)
+ bm[i] = 0;
+
+ while (getword_str(buf2, sizeof buf2, &startp, endp)) {
+ if ((n = res_servicenumber(buf2)) <= 0)
+ return (-1);
+
+ if (n < MAXPORT) {
+ bm[n/8] |= (0x80>>(n%8));
+ if ((unsigned)n > maxbm)
+ maxbm = n;
+ } else
+ return (-1);
+ }
+ maxbm = maxbm/8 + 1;
+ ShrinkBuffer(maxbm);
+ memcpy(cp, bm, maxbm);
+ cp += maxbm;
+ break;
+ }
+ case T_HINFO:
+ for (i = 0; i < 2; i++) {
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ }
+ break;
+ case T_TXT:
+ for (;;) {
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ if (cp != (sp2 + INT16SZ))
+ break;
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ }
+ break;
+ case T_X25:
+ /* RFC1183 */
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ return (-1);
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ break;
+ case T_ISDN:
+ /* RFC1183 */
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ return (-1);
+ if ((n > 255) || (n == 0))
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ n = 0;
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ break;
+ case T_NSAP:
+ if ((n = inet_nsap_addr((char *)startp, (u_char *)buf2, sizeof(buf2))) != 0) {
+ ShrinkBuffer(n);
+ memcpy(cp, buf2, n);
+ cp += n;
+ } else {
+ return (-1);
+ }
+ break;
+ case T_LOC:
+ if ((n = loc_aton((char *)startp, (u_char *)buf2)) != 0) {
+ ShrinkBuffer(n);
+ memcpy(cp, buf2, n);
+ cp += n;
+ } else
+ return (-1);
+ break;
+ case ns_t_sig:
+ {
+ int sig_type, success, dateerror;
+ u_int32_t exptime, timesigned;
+
+ /* type */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ sig_type = sym_ston(__p_type_syms, buf2, &success);
+ if (!success || sig_type == ns_t_any)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(sig_type, cp);
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* labels */
+ n = getnum_str(&startp, endp);
+ if (n <= 0 || n > 255)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* ottl & expire */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ exptime = ns_datetosecs(buf2, &dateerror);
+ if (!dateerror) {
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(rttl, cp);
+ }
+ else {
+ char *ulendp;
+ u_int32_t ottl;
+
+ errno = 0;
+ ottl = strtoul(buf2, &ulendp, 10);
+ if (errno != 0 ||
+ (ulendp != NULL && *ulendp != '\0'))
+ return (-1);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(ottl, cp);
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ exptime = ns_datetosecs(buf2, &dateerror);
+ if (dateerror)
+ return (-1);
+ }
+ /* expire */
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(exptime, cp);
+ /* timesigned */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ timesigned = ns_datetosecs(buf2, &dateerror);
+ if (!dateerror) {
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(timesigned, cp);
+ }
+ else
+ return (-1);
+ /* footprint */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* signer name */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ /* sig */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ siglen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (siglen < 0)
+ return (-1);
+ ShrinkBuffer(siglen);
+ memcpy(cp, buf3, siglen);
+ cp += siglen;
+ break;
+ }
+ case ns_t_key:
+ /* flags */
+ n = gethexnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* proto */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* key */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ keylen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (keylen < 0)
+ return (-1);
+ ShrinkBuffer(keylen);
+ memcpy(cp, buf3, keylen);
+ cp += keylen;
+ break;
+ case ns_t_nxt:
+ {
+ int success, nxt_type;
+ u_char data[32];
+ int maxtype;
+
+ /* next name */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ maxtype = 0;
+ memset(data, 0, sizeof data);
+ for (;;) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ break;
+ nxt_type = sym_ston(__p_type_syms, buf2,
+ &success);
+ if (!success || !ns_t_rr_p(nxt_type))
+ return (-1);
+ NS_NXT_BIT_SET(nxt_type, data);
+ if (nxt_type > maxtype)
+ maxtype = nxt_type;
+ }
+ n = maxtype/NS_NXT_BITS+1;
+ ShrinkBuffer(n);
+ memcpy(cp, data, n);
+ cp += n;
+ break;
+ }
+ case ns_t_cert:
+ /* type */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* key tag */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* cert */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ certlen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (certlen < 0)
+ return (-1);
+ ShrinkBuffer(certlen);
+ memcpy(cp, buf3, certlen);
+ cp += certlen;
+ break;
+ case ns_t_aaaa:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (inet_pton(AF_INET6, buf2, &in6a) <= 0)
+ return (-1);
+ ShrinkBuffer(NS_IN6ADDRSZ);
+ memcpy(cp, &in6a, NS_IN6ADDRSZ);
+ cp += NS_IN6ADDRSZ;
+ break;
+ case ns_t_naptr:
+ /* Order Preference Flags Service Replacement Regexp */
+ /* Order */
+ n = getnum_str(&startp, endp);
+ if (n < 0 || n > 65535)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* Preference */
+ n = getnum_str(&startp, endp);
+ if (n < 0 || n > 65535)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* Flags */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Service Classes */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Pattern */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Replacement */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ default:
+ return (-1);
+ } /*switch*/
+ n = (u_int16_t)((cp - sp2) - INT16SZ);
+ PUTSHORT(n, sp2);
+ } /*for*/
+
+ hp->qdcount = htons(counts[0]);
+ hp->ancount = htons(counts[1]);
+ hp->nscount = htons(counts[2]);
+ hp->arcount = htons(counts[3]);
+ return (cp - buf);
+}
+
+/*%
+ * Get a whitespace delimited word from a string (not file)
+ * into buf. modify the start pointer to point after the
+ * word in the string.
+ */
+static int
+getword_str(char *buf, int size, u_char **startpp, u_char *endp) {
+ char *cp;
+ int c;
+
+ for (cp = buf; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (cp != buf) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ (*startpp)++;
+ if (cp >= buf+size-1)
+ break;
+ *cp++ = (u_char)c;
+ }
+ *cp = '\0';
+ return (cp != buf);
+}
+
+/*%
+ * get a white spae delimited string from memory. Process quoted strings
+ * and \\DDD escapes. Return length or -1 on error. Returned string may
+ * contain nulls.
+ */
+static char digits[] = "0123456789";
+static int
+getstr_str(char *buf, int size, u_char **startpp, u_char *endp) {
+ char *cp;
+ int c, c1 = 0;
+ int inquote = 0;
+ int seen_quote = 0;
+ int escape = 0;
+ int dig = 0;
+
+ for (cp = buf; *startpp <= endp; ) {
+ if ((c = **startpp) == '\0')
+ break;
+ /* leading white space */
+ if ((cp == buf) && !seen_quote && isspace(c)) {
+ (*startpp)++;
+ continue;
+ }
+
+ switch (c) {
+ case '\\':
+ if (!escape) {
+ escape = 1;
+ dig = 0;
+ c1 = 0;
+ (*startpp)++;
+ continue;
+ }
+ goto do_escape;
+ case '"':
+ if (!escape) {
+ inquote = !inquote;
+ seen_quote = 1;
+ (*startpp)++;
+ continue;
+ }
+ /* fall through */
+ default:
+ do_escape:
+ if (escape) {
+ switch (c) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ c1 = c1 * 10 +
+ (strchr(digits, c) - digits);
+
+ if (++dig == 3) {
+ c = c1 &0xff;
+ break;
+ }
+ (*startpp)++;
+ continue;
+ }
+ escape = 0;
+ } else if (!inquote && isspace(c))
+ goto done;
+ if (cp >= buf+size-1)
+ goto done;
+ *cp++ = (u_char)c;
+ (*startpp)++;
+ }
+ }
+ done:
+ *cp = '\0';
+ return ((cp == buf)? (seen_quote? 0: -1): (cp - buf));
+}
+
+/*%
+ * Get a whitespace delimited base 16 number from a string (not file) into buf
+ * update the start pointer to point after the number in the string.
+ */
+static int
+gethexnum_str(u_char **startpp, u_char *endp) {
+ int c, n;
+ int seendigit = 0;
+ int m = 0;
+
+ if (*startpp + 2 >= endp || strncasecmp((char *)*startpp, "0x", 2) != 0)
+ return getnum_str(startpp, endp);
+ (*startpp)+=2;
+ for (n = 0; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (seendigit) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ if (c == ';') {
+ while ((*startpp <= endp) &&
+ ((c = **startpp) != '\n'))
+ (*startpp)++;
+ if (seendigit)
+ break;
+ continue;
+ }
+ if (!isxdigit(c)) {
+ if (c == ')' && seendigit) {
+ (*startpp)--;
+ break;
+ }
+ return (-1);
+ }
+ (*startpp)++;
+ if (isdigit(c))
+ n = n * 16 + (c - '0');
+ else
+ n = n * 16 + (tolower(c) - 'a' + 10);
+ seendigit = 1;
+ }
+ return (n + m);
+}
+
+/*%
+ * Get a whitespace delimited base 10 number from a string (not file) into buf
+ * update the start pointer to point after the number in the string.
+ */
+static int
+getnum_str(u_char **startpp, u_char *endp) {
+ int c, n;
+ int seendigit = 0;
+ int m = 0;
+
+ for (n = 0; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (seendigit) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ if (c == ';') {
+ while ((*startpp <= endp) &&
+ ((c = **startpp) != '\n'))
+ (*startpp)++;
+ if (seendigit)
+ break;
+ continue;
+ }
+ if (!isdigit(c)) {
+ if (c == ')' && seendigit) {
+ (*startpp)--;
+ break;
+ }
+ return (-1);
+ }
+ (*startpp)++;
+ n = n * 10 + (c - '0');
+ seendigit = 1;
+ }
+ return (n + m);
+}
+
+/*%
+ * Allocate a resource record buffer & save rr info.
+ */
+ns_updrec *
+res_mkupdrec(int section, const char *dname,
+ u_int class, u_int type, u_long ttl) {
+ ns_updrec *rrecp = (ns_updrec *)calloc(1, sizeof(ns_updrec));
+
+ if (!rrecp || !(rrecp->r_dname = strdup(dname))) {
+ if (rrecp)
+ free((char *)rrecp);
+ return (NULL);
+ }
+ INIT_LINK(rrecp, r_link);
+ INIT_LINK(rrecp, r_glink);
+ rrecp->r_class = (ns_class)class;
+ rrecp->r_type = (ns_type)type;
+ rrecp->r_ttl = ttl;
+ rrecp->r_section = (ns_sect)section;
+ return (rrecp);
+}
+
+/*%
+ * Free a resource record buffer created by res_mkupdrec.
+ */
+void
+res_freeupdrec(ns_updrec *rrecp) {
+ /* Note: freeing r_dp is the caller's responsibility. */
+ if (rrecp->r_dname != NULL)
+ free(rrecp->r_dname);
+ free(rrecp);
+}
+
+struct valuelist {
+ struct valuelist * next;
+ struct valuelist * prev;
+ char * name;
+ char * proto;
+ int port;
+};
+static struct valuelist *servicelist, *protolist;
+
+static void
+res_buildservicelist() {
+ struct servent *sp;
+ struct valuelist *slp;
+
+#ifdef MAYBE_HESIOD
+ setservent(0);
+#else
+ setservent(1);
+#endif
+ while ((sp = getservent()) != NULL) {
+ slp = (struct valuelist *)malloc(sizeof(struct valuelist));
+ if (!slp)
+ break;
+ slp->name = strdup(sp->s_name);
+ slp->proto = strdup(sp->s_proto);
+ if ((slp->name == NULL) || (slp->proto == NULL)) {
+ if (slp->name) free(slp->name);
+ if (slp->proto) free(slp->proto);
+ free(slp);
+ break;
+ }
+ slp->port = ntohs((u_int16_t)sp->s_port); /*%< host byt order */
+ slp->next = servicelist;
+ slp->prev = NULL;
+ if (servicelist)
+ servicelist->prev = slp;
+ servicelist = slp;
+ }
+ endservent();
+}
+
+void
+res_destroyservicelist() {
+ struct valuelist *slp, *slp_next;
+
+ for (slp = servicelist; slp != NULL; slp = slp_next) {
+ slp_next = slp->next;
+ free(slp->name);
+ free(slp->proto);
+ free(slp);
+ }
+ servicelist = (struct valuelist *)0;
+}
+
+void
+res_buildprotolist(void) {
+ struct protoent *pp;
+ struct valuelist *slp;
+
+#ifdef MAYBE_HESIOD
+ setprotoent(0);
+#else
+ setprotoent(1);
+#endif
+ while ((pp = getprotoent()) != NULL) {
+ slp = (struct valuelist *)malloc(sizeof(struct valuelist));
+ if (!slp)
+ break;
+ slp->name = strdup(pp->p_name);
+ if (slp->name == NULL) {
+ free(slp);
+ break;
+ }
+ slp->port = pp->p_proto; /*%< host byte order */
+ slp->next = protolist;
+ slp->prev = NULL;
+ if (protolist)
+ protolist->prev = slp;
+ protolist = slp;
+ }
+ endprotoent();
+}
+
+void
+res_destroyprotolist(void) {
+ struct valuelist *plp, *plp_next;
+
+ for (plp = protolist; plp != NULL; plp = plp_next) {
+ plp_next = plp->next;
+ free(plp->name);
+ free(plp);
+ }
+ protolist = (struct valuelist *)0;
+}
+
+static int
+findservice(const char *s, struct valuelist **list) {
+ struct valuelist *lp = *list;
+ int n;
+
+ for (; lp != NULL; lp = lp->next)
+ if (strcasecmp(lp->name, s) == 0) {
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ return (lp->port); /*%< host byte order */
+ }
+ if (sscanf(s, "%d", &n) != 1 || n <= 0)
+ n = -1;
+ return (n);
+}
+
+/*%
+ * Convert service name or (ascii) number to int.
+ */
+int
+res_servicenumber(const char *p) {
+ if (servicelist == (struct valuelist *)0)
+ res_buildservicelist();
+ return (findservice(p, &servicelist));
+}
+
+/*%
+ * Convert protocol name or (ascii) number to int.
+ */
+int
+res_protocolnumber(const char *p) {
+ if (protolist == (struct valuelist *)0)
+ res_buildprotolist();
+ return (findservice(p, &protolist));
+}
+
+static struct servent *
+cgetservbyport(u_int16_t port, const char *proto) { /*%< Host byte order. */
+ struct valuelist **list = &servicelist;
+ struct valuelist *lp = *list;
+ static struct servent serv;
+
+ port = ntohs(port);
+ for (; lp != NULL; lp = lp->next) {
+ if (port != (u_int16_t)lp->port) /*%< Host byte order. */
+ continue;
+ if (strcasecmp(lp->proto, proto) == 0) {
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ serv.s_name = lp->name;
+ serv.s_port = htons((u_int16_t)lp->port);
+ serv.s_proto = lp->proto;
+ return (&serv);
+ }
+ }
+ return (0);
+}
+
+static struct protoent *
+cgetprotobynumber(int proto) { /*%< Host byte order. */
+ struct valuelist **list = &protolist;
+ struct valuelist *lp = *list;
+ static struct protoent prot;
+
+ for (; lp != NULL; lp = lp->next)
+ if (lp->port == proto) { /*%< Host byte order. */
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ prot.p_name = lp->name;
+ prot.p_proto = lp->port; /*%< Host byte order. */
+ return (&prot);
+ }
+ return (0);
+}
+
+const char *
+res_protocolname(int num) {
+ static char number[8];
+ struct protoent *pp;
+
+ if (protolist == (struct valuelist *)0)
+ res_buildprotolist();
+ pp = cgetprotobynumber(num);
+ if (pp == 0) {
+ (void) sprintf(number, "%d", num);
+ return (number);
+ }
+ return (pp->p_name);
+}
+
+const char *
+res_servicename(u_int16_t port, const char *proto) { /*%< Host byte order. */
+ static char number[8];
+ struct servent *ss;
+
+ if (servicelist == (struct valuelist *)0)
+ res_buildservicelist();
+ ss = cgetservbyport(htons(port), proto);
+ if (ss == 0) {
+ (void) sprintf(number, "%d", port);
+ return (number);
+ }
+ return (ss->s_name);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h
new file mode 100644
index 0000000000..96c452d89e
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _RES_MKUPDATE_H_
+#define _RES_MKUPDATE_H_
+
+__BEGIN_DECLS
+__END_DECLS
+
+#endif /* _RES_MKUPDATE_H_ */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_private.h b/usr/src/lib/libresolv2_joy/common/resolv/res_private.h
new file mode 100644
index 0000000000..4e98157ced
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_private.h
@@ -0,0 +1,22 @@
+#ifndef res_private_h
+#define res_private_h
+
+struct __res_state_ext {
+ union res_sockaddr_union nsaddrs[MAXNS];
+ struct sort_list {
+ int af;
+ union {
+ struct in_addr ina;
+ struct in6_addr in6a;
+ } addr, mask;
+ } sort_list[MAXRESOLVSORT];
+ char nsuffix[64];
+ char nsuffix2[64];
+};
+
+extern int
+res_ourserver_p(const res_state statp, const struct sockaddr *sa);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_query.c b/usr/src/lib/libresolv2_joy/common/resolv/res_query.c
new file mode 100644
index 0000000000..09df3da1fc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_query.c
@@ -0,0 +1,440 @@
+/*
+ * Portions Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_query.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_query.c,v 1.11 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+
+#if PACKETSZ > 1024
+#define MAXPACKET PACKETSZ
+#else
+#define MAXPACKET 1024
+#endif
+
+/*%
+ * Formulate a normal query, send, and await answer.
+ * Returned answer is placed in supplied buffer "answer".
+ * Perform preliminary check of answer, returning success only
+ * if no error is indicated and the answer count is nonzero.
+ * Return the size of the response on success, -1 on error.
+ * Error number is left in H_ERRNO.
+ *
+ * Caller must parse answer and determine whether it answers the question.
+ */
+int
+res_nquery(res_state statp,
+ const char *name, /*%< domain name */
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer buffer */
+{
+ u_char buf[MAXPACKET];
+ HEADER *hp = (HEADER *) answer;
+ u_int oflags;
+ u_char *rdata;
+ int n;
+
+ oflags = statp->_flags;
+
+again:
+ hp->rcode = NOERROR; /*%< default */
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query(%s, %d, %d)\n", name, class, type);
+#endif
+
+ n = res_nmkquery(statp, QUERY, name, class, type, NULL, 0, NULL,
+ buf, sizeof(buf));
+#ifdef RES_USE_EDNS0
+ if (n > 0 && (statp->_flags & RES_F_EDNS0ERR) == 0 &&
+ (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC|RES_NSID))) {
+ n = res_nopt(statp, n, buf, sizeof(buf), anslen);
+ rdata = &buf[n];
+ if (n > 0 && (statp->options & RES_NSID) != 0U) {
+ n = res_nopt_rdata(statp, n, buf, sizeof(buf), rdata,
+ NS_OPT_NSID, 0, NULL);
+ }
+ }
+#endif
+ if (n <= 0) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query: mkquery failed\n");
+#endif
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (n);
+ }
+
+ n = res_nsend(statp, buf, n, answer, anslen);
+ if (n < 0) {
+#ifdef RES_USE_EDNS0
+ /* if the query choked with EDNS0, retry without EDNS0 */
+ if ((statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0U &&
+ ((oflags ^ statp->_flags) & RES_F_EDNS0ERR) != 0) {
+ statp->_flags |= RES_F_EDNS0ERR;
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nquery: retry without EDNS0\n");
+ goto again;
+ }
+#endif
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query: send error\n");
+#endif
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (n);
+ }
+
+ if (hp->rcode != NOERROR || ntohs(hp->ancount) == 0) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; rcode = (%s), counts = an:%d ns:%d ar:%d\n",
+ p_rcode(hp->rcode),
+ ntohs(hp->ancount),
+ ntohs(hp->nscount),
+ ntohs(hp->arcount));
+#endif
+ switch (hp->rcode) {
+ case NXDOMAIN:
+ RES_SET_H_ERRNO(statp, HOST_NOT_FOUND);
+ break;
+ case SERVFAIL:
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ break;
+ case NOERROR:
+ RES_SET_H_ERRNO(statp, NO_DATA);
+ break;
+ case FORMERR:
+ case NOTIMP:
+ case REFUSED:
+ default:
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ break;
+ }
+ return (-1);
+ }
+ return (n);
+}
+
+/*%
+ * Formulate a normal query, send, and retrieve answer in supplied buffer.
+ * Return the size of the response on success, -1 on error.
+ * If enabled, implement search rules until answer or unrecoverable failure
+ * is detected. Error code, if any, is left in H_ERRNO.
+ */
+int
+res_nsearch(res_state statp,
+ const char *name, /*%< domain name */
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer */
+{
+ const char *cp, * const *domain;
+ HEADER *hp = (HEADER *) answer;
+ char tmp[NS_MAXDNAME];
+ u_int dots;
+ int trailing_dot, ret, saved_herrno;
+ int got_nodata = 0, got_servfail = 0, root_on_list = 0;
+ int tried_as_is = 0;
+ int searched = 0;
+
+ errno = 0;
+ RES_SET_H_ERRNO(statp, HOST_NOT_FOUND); /*%< True if we never query. */
+ dots = 0;
+ for (cp = name; *cp != '\0'; cp++)
+ dots += (*cp == '.');
+ trailing_dot = 0;
+ if (cp > name && *--cp == '.')
+ trailing_dot++;
+
+ /* If there aren't any dots, it could be a user-level alias. */
+ if (!dots && (cp = res_hostalias(statp, name, tmp, sizeof tmp))!= NULL)
+ return (res_nquery(statp, cp, class, type, answer, anslen));
+
+ /*
+ * If there are enough dots in the name, let's just give it a
+ * try 'as is'. The threshold can be set with the "ndots" option.
+ * Also, query 'as is', if there is a trailing dot in the name.
+ */
+ saved_herrno = -1;
+ if (dots >= statp->ndots || trailing_dot) {
+ ret = res_nquerydomain(statp, name, NULL, class, type,
+ answer, anslen);
+ if (ret > 0 || trailing_dot)
+ return (ret);
+ saved_herrno = statp->res_h_errno;
+ tried_as_is++;
+ }
+
+ /*
+ * We do at least one level of search if
+ * - there is no dot and RES_DEFNAME is set, or
+ * - there is at least one dot, there is no trailing dot,
+ * and RES_DNSRCH is set.
+ */
+ if ((!dots && (statp->options & RES_DEFNAMES) != 0U) ||
+ (dots && !trailing_dot && (statp->options & RES_DNSRCH) != 0U)) {
+ int done = 0;
+
+ for (domain = (const char * const *)statp->dnsrch;
+ *domain && !done;
+ domain++) {
+ searched = 1;
+
+ if (domain[0][0] == '\0' ||
+ (domain[0][0] == '.' && domain[0][1] == '\0'))
+ root_on_list++;
+
+ ret = res_nquerydomain(statp, name, *domain,
+ class, type,
+ answer, anslen);
+ if (ret > 0)
+ return (ret);
+
+ /*
+ * If no server present, give up.
+ * If name isn't found in this domain,
+ * keep trying higher domains in the search list
+ * (if that's enabled).
+ * On a NO_DATA error, keep trying, otherwise
+ * a wildcard entry of another type could keep us
+ * from finding this entry higher in the domain.
+ * If we get some other error (negative answer or
+ * server failure), then stop searching up,
+ * but try the input name below in case it's
+ * fully-qualified.
+ */
+ if (errno == ECONNREFUSED) {
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (-1);
+ }
+
+ switch (statp->res_h_errno) {
+ case NO_DATA:
+ got_nodata++;
+ /* FALLTHROUGH */
+ case HOST_NOT_FOUND:
+ /* keep trying */
+ break;
+ case TRY_AGAIN:
+ if (hp->rcode == SERVFAIL) {
+ /* try next search element, if any */
+ got_servfail++;
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ /* anything else implies that we're done */
+ done++;
+ }
+
+ /* if we got here for some reason other than DNSRCH,
+ * we only wanted one iteration of the loop, so stop.
+ */
+ if ((statp->options & RES_DNSRCH) == 0U)
+ done++;
+ }
+ }
+
+ /*
+ * If the query has not already been tried as is then try it
+ * unless RES_NOTLDQUERY is set and there were no dots.
+ */
+ if ((dots || !searched || (statp->options & RES_NOTLDQUERY) == 0U) &&
+ !(tried_as_is || root_on_list)) {
+ ret = res_nquerydomain(statp, name, NULL, class, type,
+ answer, anslen);
+ if (ret > 0)
+ return (ret);
+ }
+
+ /* if we got here, we didn't satisfy the search.
+ * if we did an initial full query, return that query's H_ERRNO
+ * (note that we wouldn't be here if that query had succeeded).
+ * else if we ever got a nodata, send that back as the reason.
+ * else send back meaningless H_ERRNO, that being the one from
+ * the last DNSRCH we did.
+ */
+ if (saved_herrno != -1)
+ RES_SET_H_ERRNO(statp, saved_herrno);
+ else if (got_nodata)
+ RES_SET_H_ERRNO(statp, NO_DATA);
+ else if (got_servfail)
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (-1);
+}
+
+/*%
+ * Perform a call on res_query on the concatenation of name and domain,
+ * removing a trailing dot from name if domain is NULL.
+ */
+int
+res_nquerydomain(res_state statp,
+ const char *name,
+ const char *domain,
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer */
+{
+ char nbuf[MAXDNAME];
+ const char *longname = nbuf;
+ int n, d;
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nquerydomain(%s, %s, %d, %d)\n",
+ name, domain?domain:"<Nil>", class, type);
+#endif
+ if (domain == NULL) {
+ /*
+ * Check for trailing '.';
+ * copy without '.' if present.
+ */
+ n = strlen(name);
+ if (n >= MAXDNAME) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (-1);
+ }
+ n--;
+ if (n >= 0 && name[n] == '.') {
+ strncpy(nbuf, name, n);
+ nbuf[n] = '\0';
+ } else
+ longname = name;
+ } else {
+ n = strlen(name);
+ d = strlen(domain);
+ if (n + d + 1 >= MAXDNAME) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (-1);
+ }
+ sprintf(nbuf, "%s.%s", name, domain);
+ }
+ return (res_nquery(statp, longname, class, type, answer, anslen));
+}
+
+const char *
+res_hostalias(const res_state statp, const char *name, char *dst, size_t siz) {
+ char *file, *cp1, *cp2;
+ char buf[BUFSIZ];
+ FILE *fp;
+
+ if (statp->options & RES_NOALIASES)
+ return (NULL);
+ file = getenv("HOSTALIASES");
+ if (file == NULL || (fp = fopen(file, "r")) == NULL)
+ return (NULL);
+ setbuf(fp, NULL);
+ buf[sizeof(buf) - 1] = '\0';
+ while (fgets(buf, sizeof(buf), fp)) {
+ for (cp1 = buf; *cp1 && !isspace((unsigned char)*cp1); ++cp1)
+ ;
+ if (!*cp1)
+ break;
+ *cp1 = '\0';
+ if (ns_samename(buf, name) == 1) {
+ while (isspace((unsigned char)*++cp1))
+ ;
+ if (!*cp1)
+ break;
+ for (cp2 = cp1 + 1; *cp2 &&
+ !isspace((unsigned char)*cp2); ++cp2)
+ ;
+ *cp2 = '\0';
+ strncpy(dst, cp1, siz - 1);
+ dst[siz - 1] = '\0';
+ fclose(fp);
+ return (dst);
+ }
+ }
+ fclose(fp);
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_send.c b/usr/src/lib/libresolv2_joy/common/resolv/res_send.c
new file mode 100644
index 0000000000..289db4e77d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_send.c
@@ -0,0 +1,1120 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_send.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_send.c,v 1.22 2009/01/22 23:49:23 tbox Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*! \file
+ * \brief
+ * Send query to name server and wait for reply.
+ */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+
+#include "port_after.h"
+
+#ifdef USE_POLL
+#ifdef HAVE_STROPTS_H
+#include <stropts.h>
+#endif
+#include <poll.h>
+#endif /* USE_POLL */
+
+/* Options. Leave them on. */
+#define DEBUG
+#include "res_debug.h"
+#include "res_private.h"
+
+#define EXT(res) ((res)->_u._ext)
+
+#ifndef USE_POLL
+static const int highestFD = FD_SETSIZE - 1;
+#else
+static int highestFD = 0;
+#endif
+
+/* Forward. */
+
+static int get_salen __P((const struct sockaddr *));
+static struct sockaddr * get_nsaddr __P((res_state, size_t));
+static int send_vc(res_state, const u_char *, int,
+ u_char *, int, int *, int);
+static int send_dg(res_state, const u_char *, int,
+ u_char *, int, int *, int, int,
+ int *, int *);
+static void Aerror(const res_state, FILE *, const char *, int,
+ const struct sockaddr *, int);
+static void Perror(const res_state, FILE *, const char *, int);
+static int sock_eq(struct sockaddr *, struct sockaddr *);
+#if defined(NEED_PSELECT) && !defined(USE_POLL)
+static int pselect(int, void *, void *, void *,
+ struct timespec *,
+ const sigset_t *);
+#endif
+void res_pquery(const res_state, const u_char *, int, FILE *);
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __res_nameinquery = res_nameinquery
+#pragma weak __res_queriesmatch = res_queriesmatch
+#pragma weak res_nisourserver = res_ourserver_p
+#endif /* ORIGINAL_ISC_CODE */
+
+static const int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
+
+/* Public. */
+
+/*%
+ * looks up "ina" in _res.ns_addr_list[]
+ *
+ * returns:
+ *\li 0 : not found
+ *\li >0 : found
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_ourserver_p(const res_state statp, const struct sockaddr *sa) {
+ const struct sockaddr_in *inp, *srv;
+ const struct sockaddr_in6 *in6p, *srv6;
+ int ns;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ inp = (const struct sockaddr_in *)sa;
+ for (ns = 0; ns < statp->nscount; ns++) {
+ srv = (struct sockaddr_in *)get_nsaddr(statp, ns);
+ if (srv->sin_family == inp->sin_family &&
+ srv->sin_port == inp->sin_port &&
+ (srv->sin_addr.s_addr == INADDR_ANY ||
+ srv->sin_addr.s_addr == inp->sin_addr.s_addr))
+ return (1);
+ }
+ break;
+ case AF_INET6:
+ if (EXT(statp).ext == NULL)
+ break;
+ in6p = (const struct sockaddr_in6 *)sa;
+ for (ns = 0; ns < statp->nscount; ns++) {
+ srv6 = (struct sockaddr_in6 *)get_nsaddr(statp, ns);
+ if (srv6->sin6_family == in6p->sin6_family &&
+ srv6->sin6_port == in6p->sin6_port &&
+#ifdef HAVE_SIN6_SCOPE_ID
+ (srv6->sin6_scope_id == 0 ||
+ srv6->sin6_scope_id == in6p->sin6_scope_id) &&
+#endif
+ (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
+ IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
+ return (1);
+ }
+ break;
+ default:
+ break;
+ }
+ return (0);
+}
+
+/*%
+ * look for (name,type,class) in the query section of packet (buf,eom)
+ *
+ * requires:
+ *\li buf + HFIXEDSZ <= eom
+ *
+ * returns:
+ *\li -1 : format error
+ *\li 0 : not found
+ *\li >0 : found
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_nameinquery(const char *name, int type, int class,
+ const u_char *buf, const u_char *eom)
+{
+ const u_char *cp = buf + HFIXEDSZ;
+ int qdcount = ntohs(((const HEADER*)buf)->qdcount);
+
+ while (qdcount-- > 0) {
+ char tname[MAXDNAME+1];
+ int n, ttype, tclass;
+
+ n = dn_expand(buf, eom, cp, tname, sizeof tname);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ if (cp + 2 * INT16SZ > eom)
+ return (-1);
+ ttype = ns_get16(cp); cp += INT16SZ;
+ tclass = ns_get16(cp); cp += INT16SZ;
+ if (ttype == type && tclass == class &&
+ ns_samename(tname, name) == 1)
+ return (1);
+ }
+ return (0);
+}
+
+/*%
+ * is there a 1:1 mapping of (name,type,class)
+ * in (buf1,eom1) and (buf2,eom2)?
+ *
+ * returns:
+ *\li -1 : format error
+ *\li 0 : not a 1:1 mapping
+ *\li >0 : is a 1:1 mapping
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_queriesmatch(const u_char *buf1, const u_char *eom1,
+ const u_char *buf2, const u_char *eom2)
+{
+ const u_char *cp = buf1 + HFIXEDSZ;
+ int qdcount = ntohs(((const HEADER*)buf1)->qdcount);
+
+ if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2)
+ return (-1);
+
+ /*
+ * Only header section present in replies to
+ * dynamic update packets.
+ */
+ if ((((const HEADER *)buf1)->opcode == ns_o_update) &&
+ (((const HEADER *)buf2)->opcode == ns_o_update))
+ return (1);
+
+ if (qdcount != ntohs(((const HEADER*)buf2)->qdcount))
+ return (0);
+ while (qdcount-- > 0) {
+ char tname[MAXDNAME+1];
+ int n, ttype, tclass;
+
+ n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ if (cp + 2 * INT16SZ > eom1)
+ return (-1);
+ ttype = ns_get16(cp); cp += INT16SZ;
+ tclass = ns_get16(cp); cp += INT16SZ;
+ if (!res_nameinquery(tname, ttype, tclass, buf2, eom2))
+ return (0);
+ }
+ return (1);
+}
+
+int
+res_nsend(res_state statp,
+ const u_char *buf, int buflen, u_char *ans, int anssiz)
+{
+ int gotsomewhere, terrno, tries, v_circuit, resplen, ns, n;
+ char abuf[NI_MAXHOST];
+
+#ifdef USE_POLL
+ highestFD = sysconf(_SC_OPEN_MAX) - 1;
+#endif
+
+ /* No name servers or res_init() failure */
+ if (statp->nscount == 0 || EXT(statp).ext == NULL) {
+ errno = ESRCH;
+ return (-1);
+ }
+ if (anssiz < HFIXEDSZ) {
+ errno = EINVAL;
+ return (-1);
+ }
+ DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_QUERY),
+ (stdout, ";; res_send()\n"), buf, buflen);
+ v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ;
+ gotsomewhere = 0;
+ terrno = ETIMEDOUT;
+
+ /*
+ * If the ns_addr_list in the resolver context has changed, then
+ * invalidate our cached copy and the associated timing data.
+ */
+ if (EXT(statp).nscount != 0) {
+ int needclose = 0;
+ struct sockaddr_storage peer;
+ ISC_SOCKLEN_T peerlen;
+
+ if (EXT(statp).nscount != statp->nscount)
+ needclose++;
+ else
+ for (ns = 0; ns < statp->nscount; ns++) {
+ if (statp->nsaddr_list[ns].sin_family &&
+ !sock_eq((struct sockaddr *)&statp->nsaddr_list[ns],
+ (struct sockaddr *)&EXT(statp).ext->nsaddrs[ns])) {
+ needclose++;
+ break;
+ }
+
+ if (EXT(statp).nssocks[ns] == -1)
+ continue;
+ peerlen = sizeof(peer);
+ if (getpeername(EXT(statp).nssocks[ns],
+ (struct sockaddr *)&peer, &peerlen) < 0) {
+ needclose++;
+ break;
+ }
+ if (!sock_eq((struct sockaddr *)&peer,
+ get_nsaddr(statp, ns))) {
+ needclose++;
+ break;
+ }
+ }
+ if (needclose) {
+ res_nclose(statp);
+ EXT(statp).nscount = 0;
+ }
+ }
+
+ /*
+ * Maybe initialize our private copy of the ns_addr_list.
+ */
+ if (EXT(statp).nscount == 0) {
+ for (ns = 0; ns < statp->nscount; ns++) {
+ EXT(statp).nstimes[ns] = RES_MAXTIME;
+ EXT(statp).nssocks[ns] = -1;
+ if (!statp->nsaddr_list[ns].sin_family)
+ continue;
+ EXT(statp).ext->nsaddrs[ns].sin =
+ statp->nsaddr_list[ns];
+ }
+ EXT(statp).nscount = statp->nscount;
+ }
+
+ /*
+ * Some resolvers want to even out the load on their nameservers.
+ * Note that RES_BLAST overrides RES_ROTATE.
+ */
+ if ((statp->options & RES_ROTATE) != 0U &&
+ (statp->options & RES_BLAST) == 0U) {
+ union res_sockaddr_union inu;
+ struct sockaddr_in ina;
+ int lastns = statp->nscount - 1;
+ int fd;
+ u_int16_t nstime;
+
+ if (EXT(statp).ext != NULL)
+ inu = EXT(statp).ext->nsaddrs[0];
+ ina = statp->nsaddr_list[0];
+ fd = EXT(statp).nssocks[0];
+ nstime = EXT(statp).nstimes[0];
+ for (ns = 0; ns < lastns; ns++) {
+ if (EXT(statp).ext != NULL)
+ EXT(statp).ext->nsaddrs[ns] =
+ EXT(statp).ext->nsaddrs[ns + 1];
+ statp->nsaddr_list[ns] = statp->nsaddr_list[ns + 1];
+ EXT(statp).nssocks[ns] = EXT(statp).nssocks[ns + 1];
+ EXT(statp).nstimes[ns] = EXT(statp).nstimes[ns + 1];
+ }
+ if (EXT(statp).ext != NULL)
+ EXT(statp).ext->nsaddrs[lastns] = inu;
+ statp->nsaddr_list[lastns] = ina;
+ EXT(statp).nssocks[lastns] = fd;
+ EXT(statp).nstimes[lastns] = nstime;
+ }
+
+ /*
+ * Send request, RETRY times, or until successful.
+ */
+ for (tries = 0; tries < statp->retry; tries++) {
+ for (ns = 0; ns < statp->nscount; ns++) {
+ struct sockaddr *nsap;
+ int nsaplen;
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+ statp->_flags &= ~RES_F_LASTMASK;
+ statp->_flags |= (ns << RES_F_LASTSHIFT);
+ same_ns:
+ if (statp->qhook) {
+ int done = 0, loops = 0;
+
+ do {
+ res_sendhookact act;
+
+ act = (*statp->qhook)(&nsap, &buf, &buflen,
+ ans, anssiz, &resplen);
+ switch (act) {
+ case res_goahead:
+ done = 1;
+ break;
+ case res_nextns:
+ res_nclose(statp);
+ goto next_ns;
+ case res_done:
+ return (resplen);
+ case res_modified:
+ /* give the hook another try */
+ if (++loops < 42) /*doug adams*/
+ break;
+ /*FALLTHROUGH*/
+ case res_error:
+ /*FALLTHROUGH*/
+ default:
+ goto fail;
+ }
+ } while (!done);
+ }
+
+ Dprint(((statp->options & RES_DEBUG) &&
+ getnameinfo(nsap, nsaplen, abuf, sizeof(abuf),
+ NULL, 0, niflags) == 0),
+ (stdout, ";; Querying server (# %d) address = %s\n",
+ ns + 1, abuf));
+
+
+ if (v_circuit) {
+ /* Use VC; at most one attempt per server. */
+ tries = statp->retry;
+ n = send_vc(statp, buf, buflen, ans, anssiz, &terrno,
+ ns);
+ if (n < 0)
+ goto fail;
+ if (n == 0)
+ goto next_ns;
+ resplen = n;
+ } else {
+ /* Use datagrams. */
+ n = send_dg(statp, buf, buflen, ans, anssiz, &terrno,
+ ns, tries, &v_circuit, &gotsomewhere);
+ if (n < 0)
+ goto fail;
+ if (n == 0)
+ goto next_ns;
+ if (v_circuit)
+ goto same_ns;
+ resplen = n;
+ }
+
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+
+ /*
+ * If we have temporarily opened a virtual circuit,
+ * or if we haven't been asked to keep a socket open,
+ * close the socket.
+ */
+ if ((v_circuit && (statp->options & RES_USEVC) == 0U) ||
+ (statp->options & RES_STAYOPEN) == 0U) {
+ res_nclose(statp);
+ }
+ if (statp->rhook) {
+ int done = 0, loops = 0;
+
+ do {
+ res_sendhookact act;
+
+ act = (*statp->rhook)(nsap, buf, buflen,
+ ans, anssiz, &resplen);
+ switch (act) {
+ case res_goahead:
+ case res_done:
+ done = 1;
+ break;
+ case res_nextns:
+ res_nclose(statp);
+ goto next_ns;
+ case res_modified:
+ /* give the hook another try */
+ if (++loops < 42) /*doug adams*/
+ break;
+ /*FALLTHROUGH*/
+ case res_error:
+ /*FALLTHROUGH*/
+ default:
+ goto fail;
+ }
+ } while (!done);
+
+ }
+ return (resplen);
+ next_ns: ;
+ } /*foreach ns*/
+ } /*foreach retry*/
+ res_nclose(statp);
+ if (!v_circuit) {
+ if (!gotsomewhere)
+ errno = ECONNREFUSED; /*%< no nameservers found */
+ else
+ errno = ETIMEDOUT; /*%< no answer obtained */
+ } else
+ errno = terrno;
+ return (-1);
+ fail:
+ res_nclose(statp);
+ return (-1);
+}
+
+/* Private */
+
+static int
+get_salen(sa)
+ const struct sockaddr *sa;
+{
+
+#ifdef HAVE_SA_LEN
+ /* There are people do not set sa_len. Be forgiving to them. */
+ if (sa->sa_len)
+ return (sa->sa_len);
+#endif
+
+ if (sa->sa_family == AF_INET)
+ return (sizeof(struct sockaddr_in));
+ else if (sa->sa_family == AF_INET6)
+ return (sizeof(struct sockaddr_in6));
+ else
+ return (0); /*%< unknown, die on connect */
+}
+
+/*%
+ * pick appropriate nsaddr_list for use. see res_init() for initialization.
+ */
+static struct sockaddr *
+get_nsaddr(statp, n)
+ res_state statp;
+ size_t n;
+{
+
+ if (!statp->nsaddr_list[n].sin_family && EXT(statp).ext) {
+ /*
+ * - EXT(statp).ext->nsaddrs[n] holds an address that is larger
+ * than struct sockaddr, and
+ * - user code did not update statp->nsaddr_list[n].
+ */
+ return (struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[n];
+ } else {
+ /*
+ * - user code updated statp->nsaddr_list[n], or
+ * - statp->nsaddr_list[n] has the same content as
+ * EXT(statp).ext->nsaddrs[n].
+ */
+ return (struct sockaddr *)(void *)&statp->nsaddr_list[n];
+ }
+}
+
+static int
+send_vc(res_state statp,
+ const u_char *buf, int buflen, u_char *ans, int anssiz,
+ int *terrno, int ns)
+{
+ const HEADER *hp = (const HEADER *) buf;
+ HEADER *anhp = (HEADER *) ans;
+ struct sockaddr *nsap;
+ int nsaplen;
+ int truncating, connreset, resplen, n;
+ struct iovec iov[2];
+ u_short len;
+ u_char *cp;
+ void *tmp;
+#ifdef SO_NOSIGPIPE
+ int on = 1;
+#endif
+
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+
+ connreset = 0;
+ same_ns:
+ truncating = 0;
+
+ /* Are we still talking to whom we want to talk to? */
+ if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
+ struct sockaddr_storage peer;
+ ISC_SOCKLEN_T size = sizeof peer;
+
+ if (getpeername(statp->_vcsock,
+ (struct sockaddr *)&peer, &size) < 0 ||
+ !sock_eq((struct sockaddr *)&peer, nsap)) {
+ res_nclose(statp);
+ statp->_flags &= ~RES_F_VC;
+ }
+ }
+
+ if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
+ if (statp->_vcsock >= 0)
+ res_nclose(statp);
+
+ statp->_vcsock = socket(nsap->sa_family, SOCK_STREAM, 0);
+ if (statp->_vcsock > highestFD) {
+ res_nclose(statp);
+ errno = ENOTSOCK;
+ }
+ if (statp->_vcsock < 0) {
+ switch (errno) {
+ case EPROTONOSUPPORT:
+#ifdef EPFNOSUPPORT
+ case EPFNOSUPPORT:
+#endif
+ case EAFNOSUPPORT:
+ Perror(statp, stderr, "socket(vc)", errno);
+ return (0);
+ default:
+ *terrno = errno;
+ Perror(statp, stderr, "socket(vc)", errno);
+ return (-1);
+ }
+ }
+#ifdef SO_NOSIGPIPE
+ /*
+ * Disable generation of SIGPIPE when writing to a closed
+ * socket. Write should return -1 and set errno to EPIPE
+ * instead.
+ *
+ * Push on even if setsockopt(SO_NOSIGPIPE) fails.
+ */
+ (void)setsockopt(statp->_vcsock, SOL_SOCKET, SO_NOSIGPIPE, &on,
+ sizeof(on));
+#endif
+ errno = 0;
+ if (connect(statp->_vcsock, nsap, nsaplen) < 0) {
+ *terrno = errno;
+ Aerror(statp, stderr, "connect/vc", errno, nsap,
+ nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+ statp->_flags |= RES_F_VC;
+ }
+
+ /*
+ * Send length & message
+ */
+ ns_put16((u_short)buflen, (u_char*)&len);
+ iov[0] = evConsIovec(&len, INT16SZ);
+ DE_CONST(buf, tmp);
+ iov[1] = evConsIovec(tmp, buflen);
+ if (writev(statp->_vcsock, iov, 2) != (INT16SZ + buflen)) {
+ *terrno = errno;
+ Perror(statp, stderr, "write failed", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ /*
+ * Receive length & response
+ */
+ read_len:
+ cp = ans;
+ len = INT16SZ;
+ while ((n = read(statp->_vcsock, (char *)cp, (int)len)) > 0) {
+ cp += n;
+ if ((len -= n) == 0)
+ break;
+ }
+ if (n <= 0) {
+ *terrno = errno;
+ Perror(statp, stderr, "read failed", errno);
+ res_nclose(statp);
+ /*
+ * A long running process might get its TCP
+ * connection reset if the remote server was
+ * restarted. Requery the server instead of
+ * trying a new one. When there is only one
+ * server, this means that a query might work
+ * instead of failing. We only allow one reset
+ * per query to prevent looping.
+ */
+ if (*terrno == ECONNRESET && !connreset) {
+ connreset = 1;
+ res_nclose(statp);
+ goto same_ns;
+ }
+ res_nclose(statp);
+ return (0);
+ }
+ resplen = ns_get16(ans);
+ if (resplen > anssiz) {
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; response truncated\n")
+ );
+ truncating = 1;
+ len = anssiz;
+ } else
+ len = resplen;
+ if (len < HFIXEDSZ) {
+ /*
+ * Undersized message.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; undersized: %d\n", len));
+ *terrno = EMSGSIZE;
+ res_nclose(statp);
+ return (0);
+ }
+ cp = ans;
+ while (len != 0 && (n = read(statp->_vcsock, (char *)cp, (int)len)) > 0){
+ cp += n;
+ len -= n;
+ }
+ if (n <= 0) {
+ *terrno = errno;
+ Perror(statp, stderr, "read(vc)", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ if (truncating) {
+ /*
+ * Flush rest of answer so connection stays in synch.
+ */
+ anhp->tc = 1;
+ len = resplen - anssiz;
+ while (len != 0) {
+ char junk[PACKETSZ];
+
+ n = read(statp->_vcsock, junk,
+ (len > sizeof junk) ? sizeof junk : len);
+ if (n > 0)
+ len -= n;
+ else
+ break;
+ }
+ }
+ /*
+ * If the calling applicating has bailed out of
+ * a previous call and failed to arrange to have
+ * the circuit closed or the server has got
+ * itself confused, then drop the packet and
+ * wait for the correct one.
+ */
+ if (hp->id != anhp->id) {
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; old answer (unexpected):\n"),
+ ans, (resplen > anssiz) ? anssiz: resplen);
+ goto read_len;
+ }
+
+ /*
+ * All is well, or the error is fatal. Signal that the
+ * next nameserver ought not be tried.
+ */
+ return (resplen);
+}
+
+static int
+send_dg(res_state statp, const u_char *buf, int buflen, u_char *ans,
+ int anssiz, int *terrno, int ns, int tries, int *v_circuit,
+ int *gotsomewhere)
+{
+ const HEADER *hp = (const HEADER *) buf;
+ HEADER *anhp = (HEADER *) ans;
+ const struct sockaddr *nsap;
+ int nsaplen;
+ struct timespec now, timeout, finish;
+ struct sockaddr_storage from;
+ ISC_SOCKLEN_T fromlen;
+ int resplen, seconds, n, s;
+#ifdef USE_POLL
+ int polltimeout;
+ struct pollfd pollfd;
+#else
+ fd_set dsmask;
+#endif
+
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+ if (EXT(statp).nssocks[ns] == -1) {
+ EXT(statp).nssocks[ns] = socket(nsap->sa_family, SOCK_DGRAM, 0);
+ if (EXT(statp).nssocks[ns] > highestFD) {
+ res_nclose(statp);
+ errno = ENOTSOCK;
+ }
+ if (EXT(statp).nssocks[ns] < 0) {
+ switch (errno) {
+ case EPROTONOSUPPORT:
+#ifdef EPFNOSUPPORT
+ case EPFNOSUPPORT:
+#endif
+ case EAFNOSUPPORT:
+ Perror(statp, stderr, "socket(dg)", errno);
+ return (0);
+ default:
+ *terrno = errno;
+ Perror(statp, stderr, "socket(dg)", errno);
+ return (-1);
+ }
+ }
+#ifndef CANNOT_CONNECT_DGRAM
+ /*
+ * On a 4.3BSD+ machine (client and server,
+ * actually), sending to a nameserver datagram
+ * port with no nameserver will cause an
+ * ICMP port unreachable message to be returned.
+ * If our datagram socket is "connected" to the
+ * server, we get an ECONNREFUSED error on the next
+ * socket operation, and select returns if the
+ * error message is received. We can thus detect
+ * the absence of a nameserver without timing out.
+ */
+ if (connect(EXT(statp).nssocks[ns], nsap, nsaplen) < 0) {
+ Aerror(statp, stderr, "connect(dg)", errno, nsap,
+ nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+#endif /* !CANNOT_CONNECT_DGRAM */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; new DG socket\n"))
+ }
+ s = EXT(statp).nssocks[ns];
+#ifndef CANNOT_CONNECT_DGRAM
+ if (send(s, (const char*)buf, buflen, 0) != buflen) {
+ Perror(statp, stderr, "send", errno);
+ res_nclose(statp);
+ return (0);
+ }
+#else /* !CANNOT_CONNECT_DGRAM */
+ if (sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen)
+ {
+ Aerror(statp, stderr, "sendto", errno, nsap, nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+#endif /* !CANNOT_CONNECT_DGRAM */
+
+ /*
+ * Wait for reply.
+ */
+ seconds = (statp->retrans << tries);
+ if (ns > 0)
+ seconds /= statp->nscount;
+ if (seconds <= 0)
+ seconds = 1;
+ now = evNowTime();
+ timeout = evConsTime(seconds, 0);
+ finish = evAddTime(now, timeout);
+ goto nonow;
+ wait:
+ now = evNowTime();
+ nonow:
+#ifndef USE_POLL
+ FD_ZERO(&dsmask);
+ FD_SET(s, &dsmask);
+ if (evCmpTime(finish, now) > 0)
+ timeout = evSubTime(finish, now);
+ else
+ timeout = evConsTime(0, 0);
+ n = pselect(s + 1, &dsmask, NULL, NULL, &timeout, NULL);
+#else
+ timeout = evSubTime(finish, now);
+ if (timeout.tv_sec < 0)
+ timeout = evConsTime(0, 0);
+ polltimeout = 1000*timeout.tv_sec +
+ timeout.tv_nsec/1000000;
+ pollfd.fd = s;
+ pollfd.events = POLLRDNORM;
+ n = poll(&pollfd, 1, polltimeout);
+#endif /* USE_POLL */
+
+ if (n == 0) {
+ Dprint(statp->options & RES_DEBUG, (stdout, ";; timeout\n"));
+ *gotsomewhere = 1;
+ return (0);
+ }
+ if (n < 0) {
+ if (errno == EINTR)
+ goto wait;
+#ifndef USE_POLL
+ Perror(statp, stderr, "select", errno);
+#else
+ Perror(statp, stderr, "poll", errno);
+#endif /* USE_POLL */
+ res_nclose(statp);
+ return (0);
+ }
+ errno = 0;
+ fromlen = sizeof(from);
+ resplen = recvfrom(s, (char*)ans, anssiz,0,
+ (struct sockaddr *)&from, &fromlen);
+ if (resplen <= 0) {
+ Perror(statp, stderr, "recvfrom", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ *gotsomewhere = 1;
+ if (resplen < HFIXEDSZ) {
+ /*
+ * Undersized message.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; undersized: %d\n",
+ resplen));
+ *terrno = EMSGSIZE;
+ res_nclose(statp);
+ return (0);
+ }
+ if (hp->id != anhp->id) {
+ /*
+ * response from old query, ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; old answer:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+ if (!(statp->options & RES_INSECURE1) &&
+ !res_ourserver_p(statp, (struct sockaddr *)&from)) {
+ /*
+ * response from wrong server? ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; not our server:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+#ifdef RES_USE_EDNS0
+ if (anhp->rcode == FORMERR && (statp->options & RES_USE_EDNS0) != 0U) {
+ /*
+ * Do not retry if the server do not understand EDNS0.
+ * The case has to be captured here, as FORMERR packet do not
+ * carry query section, hence res_queriesmatch() returns 0.
+ */
+ DprintQ(statp->options & RES_DEBUG,
+ (stdout, "server rejected query with EDNS0:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ /* record the error */
+ statp->_flags |= RES_F_EDNS0ERR;
+ res_nclose(statp);
+ return (0);
+ }
+#endif
+ if (!(statp->options & RES_INSECURE2) &&
+ !res_queriesmatch(buf, buf + buflen,
+ ans, ans + anssiz)) {
+ /*
+ * response contains wrong query? ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; wrong query name:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+ if (anhp->rcode == SERVFAIL ||
+ anhp->rcode == NOTIMP ||
+ anhp->rcode == REFUSED) {
+ DprintQ(statp->options & RES_DEBUG,
+ (stdout, "server rejected query:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ res_nclose(statp);
+ /* don't retry if called from dig */
+ if (!statp->pfcode)
+ return (0);
+ }
+ if (!(statp->options & RES_IGNTC) && anhp->tc) {
+ /*
+ * To get the rest of answer,
+ * use TCP with same server.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; truncated answer\n"));
+ *v_circuit = 1;
+ res_nclose(statp);
+ return (1);
+ }
+ /*
+ * All is well, or the error is fatal. Signal that the
+ * next nameserver ought not be tried.
+ */
+ return (resplen);
+}
+
+static void
+Aerror(const res_state statp, FILE *file, const char *string, int error,
+ const struct sockaddr *address, int alen)
+{
+ int save = errno;
+ char hbuf[NI_MAXHOST];
+ char sbuf[NI_MAXSERV];
+
+ alen = alen;
+
+ if ((statp->options & RES_DEBUG) != 0U) {
+ if (getnameinfo(address, alen, hbuf, sizeof(hbuf),
+ sbuf, sizeof(sbuf), niflags)) {
+ strncpy(hbuf, "?", sizeof(hbuf) - 1);
+ hbuf[sizeof(hbuf) - 1] = '\0';
+ strncpy(sbuf, "?", sizeof(sbuf) - 1);
+ sbuf[sizeof(sbuf) - 1] = '\0';
+ }
+ fprintf(file, "res_send: %s ([%s].%s): %s\n",
+ string, hbuf, sbuf, strerror(error));
+ }
+ errno = save;
+}
+
+static void
+Perror(const res_state statp, FILE *file, const char *string, int error) {
+ int save = errno;
+
+ if ((statp->options & RES_DEBUG) != 0U)
+ fprintf(file, "res_send: %s: %s\n",
+ string, strerror(error));
+ errno = save;
+}
+
+static int
+sock_eq(struct sockaddr *a, struct sockaddr *b) {
+ struct sockaddr_in *a4, *b4;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (a->sa_family != b->sa_family)
+ return 0;
+ switch (a->sa_family) {
+ case AF_INET:
+ a4 = (struct sockaddr_in *)a;
+ b4 = (struct sockaddr_in *)b;
+ return a4->sin_port == b4->sin_port &&
+ a4->sin_addr.s_addr == b4->sin_addr.s_addr;
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)a;
+ b6 = (struct sockaddr_in6 *)b;
+ return a6->sin6_port == b6->sin6_port &&
+#ifdef HAVE_SIN6_SCOPE_ID
+ a6->sin6_scope_id == b6->sin6_scope_id &&
+#endif
+ IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
+ default:
+ return 0;
+ }
+}
+
+#if defined(NEED_PSELECT) && !defined(USE_POLL)
+/* XXX needs to move to the porting library. */
+static int
+pselect(int nfds, void *rfds, void *wfds, void *efds,
+ struct timespec *tsp, const sigset_t *sigmask)
+{
+ struct timeval tv, *tvp;
+ sigset_t sigs;
+ int n;
+
+ if (tsp) {
+ tvp = &tv;
+ tv = evTimeVal(*tsp);
+ } else
+ tvp = NULL;
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, sigmask, &sigs);
+ n = select(nfds, rfds, wfds, efds, tvp);
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, &sigs, NULL);
+ if (tsp)
+ *tsp = evTimeSpec(tv);
+ return (n);
+}
+#endif
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c b/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c
new file mode 100644
index 0000000000..5ebc1a70eb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c
@@ -0,0 +1,170 @@
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/dst.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+#define DEBUG
+#include "res_debug.h"
+
+
+/*% res_nsendsigned */
+int
+res_nsendsigned(res_state statp, const u_char *msg, int msglen,
+ ns_tsig_key *key, u_char *answer, int anslen)
+{
+ res_state nstatp;
+ DST_KEY *dstkey;
+ int usingTCP = 0;
+ u_char *newmsg;
+ int newmsglen, bufsize, siglen;
+ u_char sig[64];
+ HEADER *hp;
+ time_t tsig_time;
+ int ret;
+ int len;
+
+ dst_init();
+
+ nstatp = (res_state) malloc(sizeof(*statp));
+ if (nstatp == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memcpy(nstatp, statp, sizeof(*statp));
+
+ bufsize = msglen + 1024;
+ newmsg = (u_char *) malloc(bufsize);
+ if (newmsg == NULL) {
+ free(nstatp);
+ errno = ENOMEM;
+ return (-1);
+ }
+ memcpy(newmsg, msg, msglen);
+ newmsglen = msglen;
+
+ if (ns_samename(key->alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ dstkey = NULL;
+ else
+ dstkey = dst_buffer_to_key(key->name, KEY_HMAC_MD5,
+ NS_KEY_TYPE_AUTH_ONLY,
+ NS_KEY_PROT_ANY,
+ key->data, key->len);
+ if (dstkey == NULL) {
+ errno = EINVAL;
+ free(nstatp);
+ free(newmsg);
+ return (-1);
+ }
+
+ nstatp->nscount = 1;
+ siglen = sizeof(sig);
+ ret = ns_sign(newmsg, &newmsglen, bufsize, NOERROR, dstkey, NULL, 0,
+ sig, &siglen, 0);
+ if (ret < 0) {
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ if (ret == NS_TSIG_ERROR_NO_SPACE)
+ errno = EMSGSIZE;
+ else if (ret == -1)
+ errno = EINVAL;
+ return (ret);
+ }
+
+ if (newmsglen > PACKETSZ || nstatp->options & RES_USEVC)
+ usingTCP = 1;
+ if (usingTCP == 0)
+ nstatp->options |= RES_IGNTC;
+ else
+ nstatp->options |= RES_USEVC;
+ /*
+ * Stop res_send printing the answer.
+ */
+ nstatp->options &= ~RES_DEBUG;
+ nstatp->pfcode &= ~RES_PRF_REPLY;
+
+retry:
+
+ len = res_nsend(nstatp, newmsg, newmsglen, answer, anslen);
+ if (len < 0) {
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ return (len);
+ }
+
+ ret = ns_verify(answer, &len, dstkey, sig, siglen,
+ NULL, NULL, &tsig_time, nstatp->options & RES_KEEPTSIG);
+ if (ret != 0) {
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ answer, (anslen > len) ? len : anslen);
+
+ if (ret > 0) {
+ Dprint(statp->pfcode & RES_PRF_REPLY,
+ (stdout, ";; server rejected TSIG (%s)\n",
+ p_rcode(ret)));
+ } else {
+ Dprint(statp->pfcode & RES_PRF_REPLY,
+ (stdout, ";; TSIG invalid (%s)\n",
+ p_rcode(-ret)));
+ }
+
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ if (ret == -1)
+ errno = EINVAL;
+ else
+ errno = ENOTTY;
+ return (-1);
+ }
+
+ hp = (HEADER *) answer;
+ if (hp->tc && !usingTCP && (statp->options & RES_IGNTC) == 0U) {
+ nstatp->options &= ~RES_IGNTC;
+ usingTCP = 1;
+ goto retry;
+ }
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ answer, (anslen > len) ? len : anslen);
+
+ Dprint(statp->pfcode & RES_PRF_REPLY, (stdout, ";; TSIG ok\n"));
+
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ return (len);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_update.c b/usr/src/lib/libresolv2_joy/common/resolv/res_update.c
new file mode 100644
index 0000000000..df24aee3bd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_update.c
@@ -0,0 +1,213 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_update.c,v 1.13 2005/04/27 04:56:43 sra Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * Based on the Dynamic DNS reference implementation by Viraj Bais
+ * &lt;viraj_bais@ccm.fm.intel.com>
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <res_update.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/list.h>
+#include <resolv_joy.h>
+
+#include "port_after.h"
+#include "res_private.h"
+
+/*%
+ * Separate a linked list of records into groups so that all records
+ * in a group will belong to a single zone on the nameserver.
+ * Create a dynamic update packet for each zone and send it to the
+ * nameservers for that zone, and await answer.
+ * Abort if error occurs in updating any zone.
+ * Return the number of zones updated on success, < 0 on error.
+ *
+ * On error, caller must deal with the unsynchronized zones
+ * eg. an A record might have been successfully added to the forward
+ * zone but the corresponding PTR record would be missing if error
+ * was encountered while updating the reverse zone.
+ */
+
+struct zonegrp {
+ char z_origin[MAXDNAME];
+ ns_class z_class;
+ union res_sockaddr_union z_nsaddrs[MAXNS];
+ int z_nscount;
+ int z_flags;
+ LIST(ns_updrec) z_rrlist;
+ LINK(struct zonegrp) z_link;
+};
+
+#define ZG_F_ZONESECTADDED 0x0001
+
+/* Forward. */
+
+static void res_dprintf(const char *, ...) ISC_FORMAT_PRINTF(1, 2);
+
+/* Macros. */
+
+#define DPRINTF(x) do {\
+ int save_errno = errno; \
+ if ((statp->options & RES_DEBUG) != 0U) res_dprintf x; \
+ errno = save_errno; \
+ } while (0)
+
+/* Public. */
+
+int
+res_nupdate(res_state statp, ns_updrec *rrecp_in, ns_tsig_key *key) {
+ ns_updrec *rrecp;
+ u_char answer[PACKETSZ];
+ u_char *packet;
+ struct zonegrp *zptr, tgrp;
+ LIST(struct zonegrp) zgrps;
+ int nzones = 0, nscount = 0, n;
+ union res_sockaddr_union nsaddrs[MAXNS];
+
+ packet = malloc(NS_MAXMSG);
+ if (packet == NULL) {
+ DPRINTF(("malloc failed"));
+ return (0);
+ }
+ /* Thread all of the updates onto a list of groups. */
+ INIT_LIST(zgrps);
+ memset(&tgrp, 0, sizeof (tgrp));
+ for (rrecp = rrecp_in; rrecp;
+ rrecp = LINKED(rrecp, r_link) ? NEXT(rrecp, r_link) : NULL) {
+ int nscnt;
+ /* Find the origin for it if there is one. */
+ tgrp.z_class = rrecp->r_class;
+ nscnt = res_findzonecut2(statp, rrecp->r_dname, tgrp.z_class,
+ RES_EXHAUSTIVE, tgrp.z_origin,
+ sizeof tgrp.z_origin,
+ tgrp.z_nsaddrs, MAXNS);
+ if (nscnt <= 0) {
+ DPRINTF(("res_findzonecut failed (%d)", nscnt));
+ goto done;
+ }
+ tgrp.z_nscount = nscnt;
+ /* Find the group for it if there is one. */
+ for (zptr = HEAD(zgrps); zptr != NULL; zptr = NEXT(zptr, z_link))
+ if (ns_samename(tgrp.z_origin, zptr->z_origin) == 1 &&
+ tgrp.z_class == zptr->z_class)
+ break;
+ /* Make a group for it if there isn't one. */
+ if (zptr == NULL) {
+ zptr = malloc(sizeof *zptr);
+ if (zptr == NULL) {
+ DPRINTF(("malloc failed"));
+ goto done;
+ }
+ *zptr = tgrp;
+ zptr->z_flags = 0;
+ INIT_LINK(zptr, z_link);
+ INIT_LIST(zptr->z_rrlist);
+ APPEND(zgrps, zptr, z_link);
+ }
+ /* Thread this rrecp onto the right group. */
+ APPEND(zptr->z_rrlist, rrecp, r_glink);
+ }
+
+ for (zptr = HEAD(zgrps); zptr != NULL; zptr = NEXT(zptr, z_link)) {
+ /* Construct zone section and prepend it. */
+ rrecp = res_mkupdrec(ns_s_zn, zptr->z_origin,
+ zptr->z_class, ns_t_soa, 0);
+ if (rrecp == NULL) {
+ DPRINTF(("res_mkupdrec failed"));
+ goto done;
+ }
+ PREPEND(zptr->z_rrlist, rrecp, r_glink);
+ zptr->z_flags |= ZG_F_ZONESECTADDED;
+
+ /* Marshall the update message. */
+ n = res_nmkupdate(statp, HEAD(zptr->z_rrlist),
+ packet, NS_MAXMSG);
+ DPRINTF(("res_mkupdate -> %d", n));
+ if (n < 0)
+ goto done;
+
+ /* Temporarily replace the resolver's nameserver set. */
+ nscount = res_getservers(statp, nsaddrs, MAXNS);
+ res_setservers(statp, zptr->z_nsaddrs, zptr->z_nscount);
+
+ /* Send the update and remember the result. */
+ if (key != NULL)
+ n = res_nsendsigned(statp, packet, n, key,
+ answer, sizeof answer);
+ else
+ n = res_nsend(statp, packet, n, answer, sizeof answer);
+ if (n < 0) {
+ DPRINTF(("res_nsend: send error, n=%d (%s)\n",
+ n, strerror(errno)));
+ goto done;
+ }
+ if (((HEADER *)answer)->rcode == NOERROR)
+ nzones++;
+
+ /* Restore resolver's nameserver set. */
+ res_setservers(statp, nsaddrs, nscount);
+ nscount = 0;
+ }
+ done:
+ while (!EMPTY(zgrps)) {
+ zptr = HEAD(zgrps);
+ if ((zptr->z_flags & ZG_F_ZONESECTADDED) != 0)
+ res_freeupdrec(HEAD(zptr->z_rrlist));
+ UNLINK(zgrps, zptr, z_link);
+ free(zptr);
+ }
+ if (nscount != 0)
+ res_setservers(statp, nsaddrs, nscount);
+
+ free(packet);
+ return (nzones);
+}
+
+/* Private. */
+
+static void
+res_dprintf(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ fputs(";; res_nupdate: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c
new file mode 100644
index 0000000000..cc2a485ede
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <port_before.h>
+#include <thread.h>
+#include <errno.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <string.h>
+#include <resolv_mt.h>
+#include <irs.h>
+#include <port_after.h>
+
+#pragma redefine_extname __h_errno __joy_h_errno
+
+/*
+ * much of the original version of sunw_mtxtxres.c was incorporated into
+ * ISC libbind as resolv/mtctxres.c. The following bits have not yet made
+ * it into ISC libbind.
+ */
+
+/*
+ * There used to be a private, MT-safe resolver interface that used TSD
+ * to store per-thread _res, h_errno, etc. We continue to provide the
+ * access functions __res_get_res() and __res_get_h_errno() so that binaries
+ * that used the private interface will continue to work.
+ */
+
+#ifdef _res
+#undef _res
+#endif
+
+extern struct __res_state *__res_state(void);
+
+struct __res_state *
+__res_get_res(void) {
+ return (__res_state());
+}
+
+
+#ifdef h_errno
+#undef h_errno
+#endif
+
+extern int *__h_errno(void);
+
+int *
+__res_get_h_errno(void) {
+ return (__h_errno());
+}
+
+
+#ifdef SUNW_HOSTS_FALLBACK
+
+/*
+ * When the name service switch calls libresolv, it doesn't want fallback
+ * to /etc/hosts, so we provide a method to turn it off.
+ */
+
+void
+__joy_res_set_no_hosts_fallback(void) {
+ ___mtctxres()->no_hosts_fallback_private = 1;
+}
+
+void
+__joy_res_unset_no_hosts_fallback(void) {
+ ___mtctxres()->no_hosts_fallback_private = 0;
+}
+
+int
+__res_no_hosts_fallback(void) {
+ return (___mtctxres()->no_hosts_fallback_private);
+}
+
+#endif /* SUNW_HOSTS_FALLBACK */
+
+#ifdef SUNW_OVERRIDE_RETRY
+
+/*
+ * The NS switch wants to be able to override the number of retries.
+ */
+
+int
+__joy_res_override_retry(int retry) {
+ ___mtctxres()->retry_private = retry;
+ /*
+ * This function doesn't really need a return value; saving the
+ * old retry setting, and restoring it, is handled by __res_retry()
+ * and __res_retry_reset() below. However, the nss_dns library
+ * must have a private version of this function to be used when
+ * running with an old libresolv. That private nss_dns function
+ * needs a return value, and a function pointer is used to select
+ * the right function at runtime. Thus, __res_override_retry
+ * must have a function prototype consistent with the private
+ * nss_dns function, i.e., one that returns an int.
+ *
+ * Given that we do have a return value, that value must be zero.
+ * That's because retry_private == 0 is used to indicate that
+ * no override retry value is in effect, and the way we expect
+ * nss_dns to call us is:
+ *
+ * int oldretry = __res_override_retry(N);
+ * <whatever>
+ * (void)__res_override_retry(old_retry);
+ */
+ return (0);
+}
+
+int
+__res_retry(int retry) {
+ mtctxres_t *mt = ___mtctxres();
+
+ mt->retry_save = retry;
+ return ((mt->retry_private != 0) ? mt->retry_private : retry);
+}
+
+int
+__res_retry_reset(void) {
+ mtctxres_t *mt = ___mtctxres();
+
+ return (mt->retry_save);
+}
+
+#endif /* SUNW_OVERRIDE_RETRY */
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c
new file mode 100644
index 0000000000..3b0ffc49df
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * As of BIND 8.2.2, ISC (a) removed res_mkupdate(), res_update(), and
+ * res_mkupdrec() from what they consider the supported interface. The
+ * functions still exist, but their calling interface has changed, since
+ * the ns_updrec structure has changed.
+ *
+ * It seems probable that res_mkupdate() etc. will return, though possibly
+ * with other changes, in some future BIND release. In order to avoid
+ * going to PSARC twice (once to remove the functions, and then again to
+ * add them back), we retain the old interface as a wrapper around the
+ * new one.
+ */
+
+#include <port_before.h>
+
+#include <malloc.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+/* get the Solaris ns_updrec before any renaming happens */
+#include <arpa/nameser.h>
+
+/* get the __ISC_ns_updrec */
+#include <res_update.h>
+
+#include <port_after.h>
+
+/* un-rename ns_updrec and res_* functions so we can wrap them */
+#undef ns_updrec
+#undef res_mkupdate
+#undef res_update
+#undef res_mkupdrec
+#undef res_freeupdrec
+#undef res_nmkupdate
+#undef res_nupdate
+
+void res_freeupdrec(ns_updrec *);
+
+static int
+old2new(ns_updrec *old, __ISC_ns_updrec *new) {
+
+ if (old->r_dname != 0) {
+ if ((new->r_dname = strdup(old->r_dname)) == 0)
+ return (-1);
+ } else {
+ new->r_dname = 0;
+ }
+
+ new->r_glink.prev =
+ new->r_glink.next =
+ new->r_link.prev =
+ new->r_link.next = 0;
+
+ new->r_section = old->r_section;
+ new->r_class = old->r_class;
+ new->r_type = old->r_type;
+ new->r_ttl = old->r_ttl;
+ new->r_data = old->r_data;
+ new->r_size = old->r_size;
+ new->r_opcode = old->r_opcode;
+ new->r_dp = old->r_dp;
+ new->r_deldp = old->r_deldp;
+ new->r_zone = old->r_zone;
+
+ return (0);
+}
+
+
+static int
+new2old(__ISC_ns_updrec *new, ns_updrec *old) {
+ /* XXX r_prev and r_next unchanged */
+ if (new->r_dname != 0) {
+ if ((old->r_dname = strdup(new->r_dname)) == 0)
+ return (-1);
+ } else {
+ old->r_dname = 0;
+ }
+ old->r_section = new->r_section;
+ old->r_class = new->r_class;
+ old->r_type = new->r_type;
+ old->r_ttl = new->r_ttl;
+ old->r_data = new->r_data;
+ old->r_size = new->r_size;
+ old->r_opcode = new->r_opcode;
+ old->r_grpnext = 0; /* XXX */
+ old->r_dp = new->r_dp;
+ old->r_deldp = new->r_deldp;
+ old->r_zone = new->r_zone;
+
+ return (0);
+}
+
+
+static void
+delete_list(__ISC_ns_updrec *list) {
+
+ __ISC_ns_updrec *next;
+
+ for (; list != 0; list = next) {
+ next = list->r_link.next;
+ __ISC_res_freeupdrec(list);
+ }
+}
+
+
+static __ISC_ns_updrec *
+copy_list(ns_updrec *old, int do_glink) {
+
+ __ISC_ns_updrec *list = 0, *r, *p;
+
+ if (old == 0)
+ return (0);
+
+ for (p = 0; old != 0; old = old->r_next, p = r) {
+ if ((r = calloc(1, sizeof (*r))) == 0 ||
+ old2new(old, r) != 0) {
+ free(r);
+ delete_list(list);
+ return (0);
+ }
+ r->r_link.prev = p;
+ r->r_link.next = 0;
+ /* res_update and res_nupdate want r_glink set up like this */
+ if (do_glink) {
+ r->r_glink.prev = p;
+ r->r_glink.next = 0;
+ } else {
+ r->r_glink.prev = (void *)-1;
+ r->r_glink.next = (void *)-1;
+ }
+ if (p != 0) {
+ p->r_link.next = r;
+ if (do_glink) {
+ p->r_glink.next = r;
+ }
+ } else {
+ list = r;
+ }
+ }
+ return (list);
+}
+
+
+int
+res_mkupdate(ns_updrec *rrecp_in, uchar_t *buf, int length) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 1)) == 0)
+ return (-1);
+
+ ret = __ISC_res_mkupdate(r, buf, length);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+int
+res_nmkupdate(res_state statp, ns_updrec *rrecp_in, uchar_t *buf, int length) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 1)) == 0)
+ return (-1);
+
+ ret = __ISC_res_nmkupdate(statp, r, buf, length);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+
+int
+res_update(ns_updrec *rrecp_in) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 0)) == 0)
+ return (-1);
+
+ ret = __ISC_res_update(r);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+int
+res_nupdate(res_state statp, ns_updrec *rrecp_in, ns_tsig_key *key) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 0)) == 0)
+ return (-1);
+
+ ret = __ISC_res_nupdate(statp, r, key);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+
+
+ns_updrec *
+res_mkupdrec(int section, const char *dname, uint_t class, uint_t type,
+ uint_t ttl) {
+
+ __ISC_ns_updrec *n;
+ ns_updrec *o;
+
+ n = __ISC_res_mkupdrec(section, dname, class, type, ttl);
+ if (n == 0)
+ return (0);
+
+ if ((o = calloc(1, sizeof (*o))) != 0) {
+ if (new2old(n, o) != 0) {
+ res_freeupdrec(o);
+ o = 0;
+ }
+ }
+
+ __ISC_res_freeupdrec(n);
+
+ return (o);
+}
+
+
+void
+res_freeupdrec(ns_updrec *rrecp) {
+ if (rrecp == 0)
+ return;
+ /* Note: freeing r_dp is the caller's responsibility. */
+ if (rrecp->r_dname != NULL)
+ free(rrecp->r_dname);
+ free(rrecp);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c
new file mode 100644
index 0000000000..55bbe07024
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <port_before.h>
+#include <resolv_joy.h>
+#include <arpa/inet.h>
+#include <port_after.h>
+
+#undef p_option
+/* extern const char * isc_p_option(); */
+const char *p_option(uint_t option) {
+ return (isc_p_option((ulong_t)option));
+}
+#pragma weak __p_option = p_option
+
+#undef p_secstodate
+/* extern char * isc_p_secstodate (); */
+char *p_secstodate(uint_t secs) {
+ return (isc_p_secstodate((ulong_t)secs));
+}
+#pragma weak __p_secstodate = p_secstodate
diff --git a/usr/src/lib/libresolv2_joy/i386/Makefile b/usr/src/lib/libresolv2_joy/i386/Makefile
new file mode 100644
index 0000000000..a333224278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libresolv2_joy/include/Makefile b/usr/src/lib/libresolv2_joy/include/Makefile
new file mode 100644
index 0000000000..8bff3c3188
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/Makefile
@@ -0,0 +1,60 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.master
+
+HDRS= os_version.h port_ipv6.h
+TMPHDRS= new_os_version.h new_port_ipv6.h
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+
+.KEEP_STATE:
+
+all lint: $(HDRS)
+
+install: all
+
+clean:
+ $(RM) $(HDRS) $(TMPHDRS)
+
+clobber: clean
+
+# os_version.h and port_ipv6.h should be rebuilt when you change OS
+# revision. Since that's not easily expressed as a dependency, we
+# rebuild them every time.
+
+os_version.h: make_os_version FRC
+ ./make_os_version
+
+port_ipv6.h: probe_ipv6 FRC
+ CC="$(CC)" ./probe_ipv6
+
+FRC:
diff --git a/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h b/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h
new file mode 100644
index 0000000000..5eb1787f56
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ *
+ * All rights reserved.
+ */
+
+#ifndef _ARPA_PORT_INET_H
+#define _ARPA_PORT_INET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * these are libresolv2 functions that were made local in previous versions
+ * we rename them res_* because they conflict with libnsl or libsocket
+ */
+
+#define inet_lnaof res_inet_lnaof /* libsocket */
+ulong_t inet_lnaof(struct in_addr in);
+
+#define inet_makeaddr res_inet_makeaddr /* libsocket */
+struct in_addr inet_makeaddr(ulong_t net, ulong_t host);
+
+#define inet_netof res_inet_netof /* libnsl */
+ulong_t inet_netof(struct in_addr in);
+
+#define inet_network res_inet_network /* libsocket */
+ulong_t inet_network(register const char *cp);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* _ARPA_PORT_INET_H */
diff --git a/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h b/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h
new file mode 100644
index 0000000000..b40ea0d163
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ARPA_PORT_NAMESER_H
+#define _ARPA_PORT_NAMESER_H
+
+/*
+ * ISC changed the ns_updrec structure. However, it's a public interface
+ * in Solaris, so we rename it here and wrap in sunw_updrec.c
+ */
+#define ns_updrec __ISC_ns_updrec
+
+
+/*
+ * Due to the above, the following functions need to be renamed and
+ * wrapped in sunw_updrec.c.
+ *
+ * For BIND 8.2.2, ISC removed the dynamic update functions, and the
+ * definition of the ns_updrec structure, from the public include files
+ * (<resolv.h>, <arpa/nameser.h>. However, res_update(), res_mkupdate(),
+ * and res_mkupdrec() are in the public libresolv interface in Solaris,
+ * so we can't easily remove them. Thus, ISC's new versions of res_mkupdate()
+ * etc. can't be exposed under their original names.
+ *
+ * res_nmkupdate() and res_nupdate are new. We could either change them
+ * to accept the <arpa/nameser.h> ns_updrec, or leave them unchanged and
+ * undocumented. Since ISC may change ns_updrec again, we pick the latter
+ * solution for now.
+ */
+#define res_mkupdate __ISC_res_mkupdate
+#define res_update __ISC_res_update
+#define res_mkupdrec __ISC_res_mkupdrec
+#define res_freeupdrec __ISC_res_freeupdrec
+#define res_nmkupdate __ISC_res_nmkupdate
+#define res_nupdate __ISC_res_nupdate
+
+
+#endif /* _ARPA_PORT_NAMESER_H */
diff --git a/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h b/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h
new file mode 100644
index 0000000000..b75ff9d878
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNOPTIONS_H
+#define _SUNOPTIONS_H
+
+#define USELOOPBACK /* Resolver library defaults to 127.0.0.1 */
+
+/* Additions for Solaris 2 */
+
+#define SUNW_INITCHKIF /* Check if any non-loopback interface is up */
+#define SUNW_CONFCHECK /* Abort quickly if no /etc/resolv.conf or */
+ /* local named */
+#define SUNW_HOSTS_FALLBACK /* Configurable /etc/hosts fallback */
+#define SUNW_HNOK_UNDERSCORE /* Allow underscore in hostnames (libresolv) */
+#define SUNW_MT_RESOLVER /* MT hot extensions (libresolv) */
+#define SUNW_SETHERRNO /* ISC does not set h_errno in gethostbyname */
+#define SUNW_OVERRIDE_RETRY /* Allow NS switch to override res->retry */
+#define SUNW_LIBMD5 /* Use md5(3EXT) instead of internal implementation */
+
+/* If compiling an MT warm libresolv, we also need reentrancy */
+#if defined(SUNW_MT_RESOLVER) && !defined(_REENTRANT)
+#define _REENTRANT
+#endif
+
+/* End additions for Solaris 2 */
+
+#endif /* _SUNOPTIONS_H */
diff --git a/usr/src/lib/libresolv2_joy/include/config.h b/usr/src/lib/libresolv2_joy/include/config.h
new file mode 100644
index 0000000000..35fb115a0f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/config.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/* config.h. Generated from config.h.in by configure. */
+/* #undef _SOCKADDR_LEN */
+#define HAVE_FCNTL_H 1
+/* #undef HAVE_PATHS_H */
+#define HAVE_INTTYPES_H 1
+#define HAVE_STROPTS_H 1
+/* #undef HAVE_SYS_TIMERS_H */
+#define HAVE_SYS_SELECT_H 1
+#define HAVE_MEMORY_H 1
+/* #undef SYS_CDEFS_H */
+#define _POSIX_PTHREAD_SEMANTICS 1
+#define POSIX_GETPWUID_R 1
+#define POSIX_GETPWNAM_R 1
+#define POSIX_GETGRGID_R 1
+#define POSIX_GETGRNAM_R 1
+#define HAVE_MEMMOVE 1
+#define HAVE_MEMCHR 1
+/* #undef SPRINTF_CHAR */
+/* #undef VSPRINTF_CHAR */
+#define USE_SYSERROR_LIST 1
+/* #undef NEED_STRTOUL */
+/* #undef NEED_SUN4PROTOS */
+/* #undef REENABLE_SEND */
+
+#define NEED_SETGROUPENT 1
+#define NEED_GETGROUPLIST 1
+
+/* define if prototype for getgrnam_r() is required */
+/* #undef NEED_GETGRNAM_R */
+/* #undef NEED_GETGRGID_R */
+/* #undef NEED_GETGRENT_R */
+#define NEED_SETGRENT_R 1
+#define NEED_ENDGRENT_R 1
+
+#define NEED_INNETGR_R 1
+/* #undef NEED_SETNETGRENT_R */
+#define NEED_ENDNETGRENT_R 1
+
+/* #undef NEED_GETPWNAM_R */
+/* #undef NEED_GETPWUID_R */
+#define NEED_SETPWENT_R 1
+#define NEED_SETPASSENT_R 1
+#define NEED_SETPWENT_R 1
+/* #undef NEED_GETPWENT_R */
+#define NEED_ENDPWENT_R 1
+
+#define NEED_SETPASSENT 1
+
+/* #undef HAS_PW_CLASS */
+
+/* #undef ssize_t */
+/* #undef uintptr_t */
+
+/* Shut up warnings about sputaux in stdio.h on BSD/OS pre-4.1 */
+/* #undef SHUTUP_SPUTAUX */
+#ifdef SHUTUP_SPUTAUX
+struct __sFILE;
+extern __inline int __sputaux(int _c, struct __sFILE *_p);
+#endif
+#define BROKEN_IN6ADDR_INIT_MACROS 1
+#define HAVE_STRLCAT 1
+/* Shut up warnings about missing braces */
+/* #undef SHUTUP_MUTEX_INITIALIZER */
+#ifdef SHUTUP_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER }
+#else
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
diff --git a/usr/src/lib/libresolv2_joy/include/err.h b/usr/src/lib/libresolv2_joy/include/err.h
new file mode 100644
index 0000000000..45992ea336
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/err.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2001 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)err.h 8.1 (Berkeley) 6/2/93
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef _ERR_H_
+#define _ERR_H_
+
+#include <sys/cdefs.h>
+#include <stdarg.h>
+
+__BEGIN_DECLS
+__dead void err __P((int, const char *, ...)) __attribute__((__volatile));
+__dead void verr __P((int, const char *, va_list))
+ __attribute__((__volatile));
+__dead void errx __P((int, const char *, ...)) __attribute__((__volatile));
+__dead void verrx __P((int, const char *, va_list))
+ __attribute__((__volatile));
+void warn __P((const char *, ...));
+void vwarn __P((const char *, va_list));
+void warnx __P((const char *, ...));
+void vwarnx __P((const char *, va_list));
+__END_DECLS
+
+#endif /* !_ERR_H_ */
diff --git a/usr/src/lib/libresolv2_joy/include/fd_setsize.h b/usr/src/lib/libresolv2_joy/include/fd_setsize.h
new file mode 100644
index 0000000000..0e21049742
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/fd_setsize.h
@@ -0,0 +1,10 @@
+#ifndef _FD_SETSIZE_H
+#define _FD_SETSIZE_H
+
+/*%
+ * If you need a bigger FD_SETSIZE, this is NOT the place to set it.
+ * This file is a fallback for BIND ports which don't specify their own.
+ */
+
+#endif /* _FD_SETSIZE_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/hesiod.h b/usr/src/lib/libresolv2_joy/include/hesiod.h
new file mode 100644
index 0000000000..d64c0c5e80
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/hesiod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * This file is primarily maintained by <tytso@mit.edu> and <ghudson@mit.edu>.
+ */
+
+/*
+ * $Id: hesiod.h,v 1.4 2005/04/27 04:56:14 sra Exp $
+ */
+
+#ifndef _HESIOD_H_INCLUDED
+#define _HESIOD_H_INCLUDED
+
+int hesiod_init __P((void **));
+void hesiod_end __P((void *));
+char * hesiod_to_bind __P((void *, const char *, const char *));
+char ** hesiod_resolve __P((void *, const char *, const char *));
+void hesiod_free_list __P((void *, char **));
+struct __res_state * __hesiod_res_get __P((void *));
+void __hesiod_res_set __P((void *, struct __res_state *,
+ void (*)(void *)));
+
+#endif /*_HESIOD_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/include/irp.h b/usr/src/lib/libresolv2_joy/include/irp.h
new file mode 100644
index 0000000000..1290bd068f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/irp.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irp.h,v 1.4 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef _IRP_H_INCLUDED
+#define _IRP_H_INCLUDED
+
+/*! \file */
+
+#define IRPD_TIMEOUT 30 /*%< seconds */
+#define IRPD_MAXSESS 50 /*%< number of simultaneous sessions. */
+#define IRPD_PORT 6660 /*%< 10 times the number of the beast. */
+#define IRPD_PATH "/var/run/irpd" /*%< af_unix socket path */
+
+/* If sets the environment variable IRPDSERVER to an IP address
+ (e.g. "192.5.5.1"), then that's the host the client expects irpd to be
+ running on. */
+#define IRPD_HOST_ENV "IRPDSERVER"
+
+/* Protocol response codes. */
+#define IRPD_WELCOME_CODE 200
+#define IRPD_NOT_WELCOME_CODE 500
+
+#define IRPD_GETHOST_ERROR 510
+#define IRPD_GETHOST_NONE 210
+#define IRPD_GETHOST_OK 211
+#define IRPD_GETHOST_SETOK 212
+
+#define IRPD_GETNET_ERROR 520
+#define IRPD_GETNET_NONE 220
+#define IRPD_GETNET_OK 221
+#define IRPD_GETNET_SETOK 222
+
+#define IRPD_GETUSER_ERROR 530
+#define IRPD_GETUSER_NONE 230
+#define IRPD_GETUSER_OK 231
+#define IRPD_GETUSER_SETOK 232
+
+#define IRPD_GETGROUP_ERROR 540
+#define IRPD_GETGROUP_NONE 240
+#define IRPD_GETGROUP_OK 241
+#define IRPD_GETGROUP_SETOK 242
+
+#define IRPD_GETSERVICE_ERROR 550
+#define IRPD_GETSERVICE_NONE 250
+#define IRPD_GETSERVICE_OK 251
+#define IRPD_GETSERVICE_SETOK 252
+
+#define IRPD_GETPROTO_ERROR 560
+#define IRPD_GETPROTO_NONE 260
+#define IRPD_GETPROTO_OK 261
+#define IRPD_GETPROTO_SETOK 262
+
+#define IRPD_GETNETGR_ERROR 570
+#define IRPD_GETNETGR_NONE 270
+#define IRPD_GETNETGR_OK 271
+#define IRPD_GETNETGR_NOMORE 272
+#define IRPD_GETNETGR_MATCHES 273
+#define IRPD_GETNETGR_NOMATCH 274
+#define IRPD_GETNETGR_SETOK 275
+#define IRPD_GETNETGR_SETERR 276
+
+#define irs_irp_read_body __irs_irp_read_body
+#define irs_irp_read_response __irs_irp_read_response
+#define irs_irp_disconnect __irs_irp_disconnect
+#define irs_irp_connect __irs_irp_connect
+#define irs_irp_connection_setup __irs_irp_connection_setup
+#define irs_irp_send_command __irs_irp_send_command
+
+struct irp_p;
+
+char *irs_irp_read_body(struct irp_p *, size_t *);
+int irs_irp_read_response(struct irp_p *, char *, size_t);
+void irs_irp_disconnect(struct irp_p *);
+int irs_irp_connect(struct irp_p *);
+int irs_irp_is_connected(struct irp_p *);
+int irs_irp_connection_setup(struct irp_p *, int *);
+#ifdef __GNUC__
+int irs_irp_send_command(struct irp_p *, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+int irs_irp_send_command(struct irp_p *, const char *, ...);
+#endif
+int irs_irp_get_full_response(struct irp_p *, int *, char *, size_t,
+ char **, size_t *);
+int irs_irp_read_line(struct irp_p *, char *, int);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/irs.h b/usr/src/lib/libresolv2_joy/include/irs.h
new file mode 100644
index 0000000000..386e3cb3f6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/irs.h
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs.h,v 1.5 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef _IRS_H_INCLUDED
+#define _IRS_H_INCLUDED
+
+/*! \file */
+
+#include <sys/types.h>
+
+#include <arpa/nameser.h>
+
+#include <grp.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <pwd.h>
+
+/*%
+ * This is the group map class.
+ */
+struct irs_gr {
+ void * private;
+ void (*close) __P((struct irs_gr *));
+ struct group * (*next) __P((struct irs_gr *));
+ struct group * (*byname) __P((struct irs_gr *, const char *));
+ struct group * (*bygid) __P((struct irs_gr *, gid_t));
+ int (*list) __P((struct irs_gr *, const char *,
+ gid_t, gid_t *, int *));
+ void (*rewind) __P((struct irs_gr *));
+ void (*minimize) __P((struct irs_gr *));
+ struct __res_state * (*res_get) __P((struct irs_gr *));
+ void (*res_set) __P((struct irs_gr *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the password map class.
+ */
+struct irs_pw {
+ void * private;
+ void (*close) __P((struct irs_pw *));
+ struct passwd * (*next) __P((struct irs_pw *));
+ struct passwd * (*byname) __P((struct irs_pw *, const char *));
+ struct passwd * (*byuid) __P((struct irs_pw *, uid_t));
+ void (*rewind) __P((struct irs_pw *));
+ void (*minimize) __P((struct irs_pw *));
+ struct __res_state * (*res_get) __P((struct irs_pw *));
+ void (*res_set) __P((struct irs_pw *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the service map class.
+ */
+struct irs_sv {
+ void * private;
+ void (*close) __P((struct irs_sv *));
+ struct servent *(*byname) __P((struct irs_sv *,
+ const char *, const char *));
+ struct servent *(*byport) __P((struct irs_sv *, int, const char *));
+ struct servent *(*next) __P((struct irs_sv *));
+ void (*rewind) __P((struct irs_sv *));
+ void (*minimize) __P((struct irs_sv *));
+ struct __res_state * (*res_get) __P((struct irs_sv *));
+ void (*res_set) __P((struct irs_sv *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the protocols map class.
+ */
+struct irs_pr {
+ void * private;
+ void (*close) __P((struct irs_pr *));
+ struct protoent *(*byname) __P((struct irs_pr *, const char *));
+ struct protoent *(*bynumber) __P((struct irs_pr *, int));
+ struct protoent *(*next) __P((struct irs_pr *));
+ void (*rewind) __P((struct irs_pr *));
+ void (*minimize) __P((struct irs_pr *));
+ struct __res_state * (*res_get) __P((struct irs_pr *));
+ void (*res_set) __P((struct irs_pr *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the hosts map class.
+ */
+struct irs_ho {
+ void * private;
+ void (*close) __P((struct irs_ho *));
+ struct hostent *(*byname) __P((struct irs_ho *, const char *));
+ struct hostent *(*byname2) __P((struct irs_ho *, const char *, int));
+ struct hostent *(*byaddr) __P((struct irs_ho *,
+ const void *, int, int));
+ struct hostent *(*next) __P((struct irs_ho *));
+ void (*rewind) __P((struct irs_ho *));
+ void (*minimize) __P((struct irs_ho *));
+ struct __res_state * (*res_get) __P((struct irs_ho *));
+ void (*res_set) __P((struct irs_ho *, res_state,
+ void (*)(void *)));
+ struct addrinfo *(*addrinfo) __P((struct irs_ho *, const char *,
+ const struct addrinfo *));
+};
+
+/*%
+ * This is the networks map class.
+ */
+struct irs_nw {
+ void * private;
+ void (*close) __P((struct irs_nw *));
+ struct nwent * (*byname) __P((struct irs_nw *, const char *, int));
+ struct nwent * (*byaddr) __P((struct irs_nw *, void *, int, int));
+ struct nwent * (*next) __P((struct irs_nw *));
+ void (*rewind) __P((struct irs_nw *));
+ void (*minimize) __P((struct irs_nw *));
+ struct __res_state * (*res_get) __P((struct irs_nw *));
+ void (*res_set) __P((struct irs_nw *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the netgroups map class.
+ */
+struct irs_ng {
+ void * private;
+ void (*close) __P((struct irs_ng *));
+ int (*next) __P((struct irs_ng *, const char **,
+ const char **, const char **));
+ int (*test) __P((struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *));
+ void (*rewind) __P((struct irs_ng *, const char *));
+ void (*minimize) __P((struct irs_ng *));
+};
+
+/*%
+ * This is the generic map class, which copies the front of all others.
+ */
+struct irs_map {
+ void * private;
+ void (*close) __P((void *));
+};
+
+/*%
+ * This is the accessor class. It contains pointers to all of the
+ * initializers for the map classes for a particular accessor.
+ */
+struct irs_acc {
+ void * private;
+ void (*close) __P((struct irs_acc *));
+ struct irs_gr * (*gr_map) __P((struct irs_acc *));
+ struct irs_pw * (*pw_map) __P((struct irs_acc *));
+ struct irs_sv * (*sv_map) __P((struct irs_acc *));
+ struct irs_pr * (*pr_map) __P((struct irs_acc *));
+ struct irs_ho * (*ho_map) __P((struct irs_acc *));
+ struct irs_nw * (*nw_map) __P((struct irs_acc *));
+ struct irs_ng * (*ng_map) __P((struct irs_acc *));
+ struct __res_state * (*res_get) __P((struct irs_acc *));
+ void (*res_set) __P((struct irs_acc *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is because the official definition of "struct netent" has no
+ * concept of CIDR even though it allows variant address families (on
+ * output but not input). The compatibility stubs convert the structs
+ * below into "struct netent"'s.
+ */
+struct nwent {
+ char *n_name; /*%< official name of net */
+ char **n_aliases; /*%< alias list */
+ int n_addrtype; /*%< net address type */
+ void *n_addr; /*%< network address */
+ int n_length; /*%< address length, in bits */
+};
+
+/*%
+ * Hide external function names from POSIX.
+ */
+#define irs_gen_acc __irs_gen_acc
+#define irs_lcl_acc __irs_lcl_acc
+#define irs_dns_acc __irs_dns_acc
+#define irs_nis_acc __irs_nis_acc
+#define irs_irp_acc __irs_irp_acc
+#define irs_destroy __irs_destroy
+#define irs_dns_gr __irs_dns_gr
+#define irs_dns_ho __irs_dns_ho
+#define irs_dns_nw __irs_dns_nw
+#define irs_dns_pr __irs_dns_pr
+#define irs_dns_pw __irs_dns_pw
+#define irs_dns_sv __irs_dns_sv
+#define irs_gen_gr __irs_gen_gr
+#define irs_gen_ho __irs_gen_ho
+#define irs_gen_ng __irs_gen_ng
+#define irs_gen_nw __irs_gen_nw
+#define irs_gen_pr __irs_gen_pr
+#define irs_gen_pw __irs_gen_pw
+#define irs_gen_sv __irs_gen_sv
+#define irs_irp_get_full_response __irs_irp_get_full_response
+#define irs_irp_gr __irs_irp_gr
+#define irs_irp_ho __irs_irp_ho
+#define irs_irp_is_connected __irs_irp_is_connected
+#define irs_irp_ng __irs_irp_ng
+#define irs_irp_nw __irs_irp_nw
+#define irs_irp_pr __irs_irp_pr
+#define irs_irp_pw __irs_irp_pw
+#define irs_irp_read_line __irs_irp_read_line
+#define irs_irp_sv __irs_irp_sv
+#define irs_lcl_gr __irs_lcl_gr
+#define irs_lcl_ho __irs_lcl_ho
+#define irs_lcl_ng __irs_lcl_ng
+#define irs_lcl_nw __irs_lcl_nw
+#define irs_lcl_pr __irs_lcl_pr
+#define irs_lcl_pw __irs_lcl_pw
+#define irs_lcl_sv __irs_lcl_sv
+#define irs_nis_gr __irs_nis_gr
+#define irs_nis_ho __irs_nis_ho
+#define irs_nis_ng __irs_nis_ng
+#define irs_nis_nw __irs_nis_nw
+#define irs_nis_pr __irs_nis_pr
+#define irs_nis_pw __irs_nis_pw
+#define irs_nis_sv __irs_nis_sv
+#define net_data_create __net_data_create
+#define net_data_destroy __net_data_destroy
+#define net_data_minimize __net_data_minimize
+
+/*%
+ * Externs.
+ */
+extern struct irs_acc * irs_gen_acc __P((const char *, const char *));
+extern struct irs_acc * irs_lcl_acc __P((const char *));
+extern struct irs_acc * irs_dns_acc __P((const char *));
+extern struct irs_acc * irs_nis_acc __P((const char *));
+extern struct irs_acc * irs_irp_acc __P((const char *));
+
+extern void irs_destroy __P((void));
+
+/*%
+ * These forward declarations are for the semi-private functions in
+ * the get*.c files. Each of these funcs implements the real get*
+ * functionality and the standard versions are just wrappers that
+ * call these. Apart from the wrappers, only irpd is expected to
+ * call these directly, hence these decls are put here and not in
+ * the /usr/include replacements.
+ */
+
+struct net_data; /*%< forward */
+/*
+ * net_data_create gets a singleton net_data object. net_data_init
+ * creates as many net_data objects as times it is called. Clients using
+ * the default interface will use net_data_create by default. Servers will
+ * probably want net_data_init (one call per client)
+ */
+struct net_data *net_data_create __P((const char *));
+struct net_data *net_data_init __P((const char *));
+void net_data_destroy __P((void *));
+
+extern struct group *getgrent_p __P((struct net_data *));
+extern struct group *getgrnam_p __P((const char *, struct net_data *));
+extern struct group *getgrgid_p __P((gid_t, struct net_data *));
+extern int setgroupent_p __P((int, struct net_data *));
+extern void endgrent_p __P((struct net_data *));
+extern int getgrouplist_p __P((const char *, gid_t, gid_t *, int *,
+ struct net_data *));
+
+#ifdef SETGRENT_VOID
+extern void setgrent_p __P((struct net_data *));
+#else
+extern int setgrent_p __P((struct net_data *));
+#endif
+
+extern struct hostent *gethostbyname_p __P((const char *,
+ struct net_data *));
+extern struct hostent *gethostbyname2_p __P((const char *, int,
+ struct net_data *));
+extern struct hostent *gethostbyaddr_p __P((const char *, int, int,
+ struct net_data *));
+extern struct hostent *gethostent_p __P((struct net_data *));
+extern void sethostent_p __P((int, struct net_data *));
+extern void endhostent_p __P((struct net_data *));
+extern struct hostent *getipnodebyname_p __P((const char *, int, int, int *,
+ struct net_data *));
+extern struct hostent *getipnodebyaddr_p __P((const void *, size_t,
+ int, int *, struct net_data *));
+
+extern struct netent *getnetent_p __P((struct net_data *));
+extern struct netent *getnetbyname_p __P((const char *, struct net_data *));
+extern struct netent *getnetbyaddr_p __P((unsigned long, int,
+ struct net_data *));
+extern void setnetent_p __P((int, struct net_data *));
+extern void endnetent_p __P((struct net_data *));
+
+extern void setnetgrent_p __P((const char *, struct net_data *));
+extern void endnetgrent_p __P((struct net_data *));
+extern int innetgr_p __P((const char *, const char *, const char *,
+ const char *, struct net_data *));
+extern int getnetgrent_p __P((const char **, const char **,
+ const char **, struct net_data *));
+
+extern struct protoent *getprotoent_p __P((struct net_data *));
+extern struct protoent *getprotobyname_p __P((const char *,
+ struct net_data *));
+extern struct protoent *getprotobynumber_p __P((int, struct net_data *));
+extern void setprotoent_p __P((int, struct net_data *));
+extern void endprotoent_p __P((struct net_data *));
+
+
+extern struct passwd *getpwent_p __P((struct net_data *));
+extern struct passwd *getpwnam_p __P((const char *, struct net_data *));
+extern struct passwd *getpwuid_p __P((uid_t, struct net_data *));
+extern int setpassent_p __P((int, struct net_data *));
+extern void endpwent_p __P((struct net_data *));
+
+#ifdef SETPWENT_VOID
+extern void setpwent_p __P((struct net_data *));
+#else
+extern int setpwent_p __P((struct net_data *));
+#endif
+
+extern struct servent *getservent_p __P((struct net_data *));
+extern struct servent *getservbyname_p __P((const char *, const char *,
+ struct net_data *));
+extern struct servent *getservbyport_p __P((int, const char *,
+ struct net_data *));
+extern void setservent_p __P((int, struct net_data *));
+extern void endservent_p __P((struct net_data *));
+
+#endif /*_IRS_H_INCLUDED*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/assertions.h b/usr/src/lib/libresolv2_joy/include/isc/assertions.h
new file mode 100644
index 0000000000..68925e73b3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/assertions.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1997-2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: assertions.h,v 1.5 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef ASSERTIONS_H
+#define ASSERTIONS_H 1
+
+typedef enum {
+ assert_require, assert_ensure, assert_insist, assert_invariant
+} assertion_type;
+
+typedef void (*assertion_failure_callback)(const char *, int, assertion_type,
+ const char *, int);
+
+/* coverity[+kill] */
+extern assertion_failure_callback __assertion_failed;
+void set_assertion_failure_callback(assertion_failure_callback f);
+const char *assertion_type_to_text(assertion_type type);
+
+#if defined(CHECK_ALL) || defined(__COVERITY__)
+#define CHECK_REQUIRE 1
+#define CHECK_ENSURE 1
+#define CHECK_INSIST 1
+#define CHECK_INVARIANT 1
+#endif
+
+#if defined(CHECK_NONE) && !defined(__COVERITY__)
+#define CHECK_REQUIRE 0
+#define CHECK_ENSURE 0
+#define CHECK_INSIST 0
+#define CHECK_INVARIANT 0
+#endif
+
+#ifndef CHECK_REQUIRE
+#define CHECK_REQUIRE 1
+#endif
+
+#ifndef CHECK_ENSURE
+#define CHECK_ENSURE 1
+#endif
+
+#ifndef CHECK_INSIST
+#define CHECK_INSIST 1
+#endif
+
+#ifndef CHECK_INVARIANT
+#define CHECK_INVARIANT 1
+#endif
+
+#if CHECK_REQUIRE != 0
+#define REQUIRE(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_require, \
+ #cond, 0), 0)))
+#define REQUIRE_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_require, \
+ #cond, 1), 0)))
+#else
+#define REQUIRE(cond) ((void) (cond))
+#define REQUIRE_ERR(cond) ((void) (cond))
+#endif /* CHECK_REQUIRE */
+
+#if CHECK_ENSURE != 0
+#define ENSURE(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_ensure, \
+ #cond, 0), 0)))
+#define ENSURE_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_ensure, \
+ #cond, 1), 0)))
+#else
+#define ENSURE(cond) ((void) (cond))
+#define ENSURE_ERR(cond) ((void) (cond))
+#endif /* CHECK_ENSURE */
+
+#if CHECK_INSIST != 0
+#define INSIST(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_insist, \
+ #cond, 0), 0)))
+#define INSIST_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_insist, \
+ #cond, 1), 0)))
+#else
+#define INSIST(cond) ((void) (cond))
+#define INSIST_ERR(cond) ((void) (cond))
+#endif /* CHECK_INSIST */
+
+#if CHECK_INVARIANT != 0
+#define INVARIANT(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_invariant, \
+ #cond, 0), 0)))
+#define INVARIANT_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_invariant, \
+ #cond, 1), 0)))
+#else
+#define INVARIANT(cond) ((void) (cond))
+#define INVARIANT_ERR(cond) ((void) (cond))
+#endif /* CHECK_INVARIANT */
+#endif /* ASSERTIONS_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/ctl.h b/usr/src/lib/libresolv2_joy/include/isc/ctl.h
new file mode 100644
index 0000000000..e2ba20201d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/ctl.h
@@ -0,0 +1,112 @@
+#ifndef ISC_CTL_H
+#define ISC_CTL_H
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: ctl.h,v 1.5 2005/04/27 04:56:17 sra Exp $
+ */
+
+/*! \file */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <isc/eventlib.h>
+
+/* Macros. */
+
+#define CTL_MORE 0x0001 /*%< More will be / should be sent. */
+#define CTL_EXIT 0x0002 /*%< Close connection after this. */
+#define CTL_DATA 0x0004 /*%< Go into / this is DATA mode. */
+/* Types. */
+
+struct ctl_cctx;
+struct ctl_sctx;
+struct ctl_sess;
+struct ctl_verb;
+
+enum ctl_severity { ctl_debug, ctl_warning, ctl_error };
+
+typedef void (*ctl_logfunc)(enum ctl_severity, const char *, ...);
+
+typedef void (*ctl_verbfunc)(struct ctl_sctx *, struct ctl_sess *,
+ const struct ctl_verb *, const char *,
+ u_int, const void *, void *);
+
+typedef void (*ctl_srvrdone)(struct ctl_sctx *, struct ctl_sess *, void *);
+
+typedef void (*ctl_clntdone)(struct ctl_cctx *, void *, const char *, u_int);
+
+struct ctl_verb {
+ const char * name;
+ ctl_verbfunc func;
+ const char * help;
+};
+
+/* General symbols. */
+
+#define ctl_logger __ctl_logger
+
+#ifdef __GNUC__
+void ctl_logger(enum ctl_severity, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+void ctl_logger(enum ctl_severity, const char *, ...);
+#endif
+
+/* Client symbols. */
+
+#define ctl_client __ctl_client
+#define ctl_endclient __ctl_endclient
+#define ctl_command __ctl_command
+
+struct ctl_cctx * ctl_client(evContext, const struct sockaddr *, size_t,
+ const struct sockaddr *, size_t,
+ ctl_clntdone, void *,
+ u_int, ctl_logfunc);
+void ctl_endclient(struct ctl_cctx *);
+int ctl_command(struct ctl_cctx *, const char *, size_t,
+ ctl_clntdone, void *);
+
+/* Server symbols. */
+
+#define ctl_server __ctl_server
+#define ctl_endserver __ctl_endserver
+#define ctl_response __ctl_response
+#define ctl_sendhelp __ctl_sendhelp
+#define ctl_getcsctx __ctl_getcsctx
+#define ctl_setcsctx __ctl_setcsctx
+
+struct ctl_sctx * ctl_server(evContext, const struct sockaddr *, size_t,
+ const struct ctl_verb *,
+ u_int, u_int,
+ u_int, int, int,
+ ctl_logfunc, void *);
+void ctl_endserver(struct ctl_sctx *);
+void ctl_response(struct ctl_sess *, u_int,
+ const char *, u_int, const void *,
+ ctl_srvrdone, void *,
+ const char *, size_t);
+void ctl_sendhelp(struct ctl_sess *, u_int);
+void * ctl_getcsctx(struct ctl_sess *);
+void * ctl_setcsctx(struct ctl_sess *, void *);
+
+#endif /*ISC_CTL_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/dst.h b/usr/src/lib/libresolv2_joy/include/isc/dst.h
new file mode 100644
index 0000000000..90a9e67468
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/dst.h
@@ -0,0 +1,168 @@
+#ifndef DST_H
+#define DST_H
+
+#ifndef HAS_DST_KEY
+typedef struct dst_key {
+ char *dk_key_name; /*%< name of the key */
+ int dk_key_size; /*%< this is the size of the key in bits */
+ int dk_proto; /*%< what protocols this key can be used for */
+ int dk_alg; /*%< algorithm number from key record */
+ u_int32_t dk_flags; /*%< and the flags of the public key */
+ u_int16_t dk_id; /*%< identifier of the key */
+} DST_KEY;
+#endif /* HAS_DST_KEY */
+/*
+ * do not taint namespace
+ */
+#define dst_bsafe_init __dst_bsafe_init
+#define dst_buffer_to_key __dst_buffer_to_key
+#define dst_check_algorithm __dst_check_algorithm
+#define dst_compare_keys __dst_compare_keys
+#define dst_cylink_init __dst_cylink_init
+#define dst_dnskey_to_key __dst_dnskey_to_key
+#define dst_eay_dss_init __dst_eay_dss_init
+#define dst_free_key __dst_free_key
+#define dst_generate_key __dst_generate_key
+#define dst_hmac_md5_init __dst_hmac_md5_init
+#define dst_init __dst_init
+#define dst_key_to_buffer __dst_key_to_buffer
+#define dst_key_to_dnskey __dst_key_to_dnskey
+#define dst_read_key __dst_read_key
+#define dst_rsaref_init __dst_rsaref_init
+#define dst_s_build_filename __dst_s_build_filename
+#define dst_s_calculate_bits __dst_s_calculate_bits
+#define dst_s_conv_bignum_b64_to_u8 __dst_s_conv_bignum_b64_to_u8
+#define dst_s_conv_bignum_u8_to_b64 __dst_s_conv_bignum_u8_to_b64
+#define dst_s_dns_key_id __dst_s_dns_key_id
+#define dst_s_dump __dst_s_dump
+#define dst_s_filename_length __dst_s_filename_length
+#define dst_s_fopen __dst_s_fopen
+#define dst_s_get_int16 __dst_s_get_int16
+#define dst_s_get_int32 __dst_s_get_int32
+#define dst_s_id_calc __dst_s_id_calc
+#define dst_s_put_int16 __dst_s_put_int16
+#define dst_s_put_int32 __dst_s_put_int32
+#define dst_s_quick_random __dst_s_quick_random
+#define dst_s_quick_random_set __dst_s_quick_random_set
+#define dst_s_random __dst_s_random
+#define dst_s_semi_random __dst_s_semi_random
+#define dst_s_verify_str __dst_s_verify_str
+#define dst_sig_size __dst_sig_size
+#define dst_sign_data __dst_sign_data
+#define dst_verify_data __dst_verify_data
+#define dst_write_key __dst_write_key
+
+/*
+ * DST Crypto API defintions
+ */
+void dst_init(void);
+int dst_check_algorithm(const int);
+
+
+int dst_sign_data(const int, /*!< specifies INIT/UPDATE/FINAL/ALL */
+ DST_KEY *, /*!< the key to use */
+ void **, /*!< pointer to state structure */
+ const u_char *, /*!< data to be signed */
+ const int, /*!< length of input data */
+ u_char *, /*!< buffer to write signature to */
+ const int); /*!< size of output buffer */
+int dst_verify_data(const int, /*!< specifies INIT/UPDATE/FINAL/ALL */
+ DST_KEY *, /*!< the key to use */
+ void **, /*!< pointer to state structure */
+ const u_char *, /*!< data to be verified */
+ const int, /*!< length of input data */
+ const u_char *, /*!< buffer containing signature */
+ const int); /*!< length of signature */
+DST_KEY *dst_read_key(const char *, /*!< name of key */
+ const u_int16_t, /*!< key tag identifier */
+ const int, /*!< key algorithm */
+ const int); /*!< Private/PublicKey wanted */
+int dst_write_key(const DST_KEY *, /*!< key to write out */
+ const int); /*!< Public/Private */
+DST_KEY *dst_dnskey_to_key(const char *, /*!< KEY record name */
+ const u_char *, /*!< KEY RDATA */
+ const int); /*!< size of input buffer */
+int dst_key_to_dnskey(const DST_KEY *, /*!< key to translate */
+ u_char *, /*!< output buffer */
+ const int); /*!< size of out_storage */
+DST_KEY *dst_buffer_to_key(const char *, /*!< name of the key */
+ const int, /*!< algorithm */
+ const int, /*!< dns flags */
+ const int, /*!< dns protocol */
+ const u_char *, /*!< key in dns wire fmt */
+ const int); /*!< size of key */
+int dst_key_to_buffer(DST_KEY *, u_char *, int);
+
+DST_KEY *dst_generate_key(const char *, /*!< name of new key */
+ const int, /*!< key algorithm to generate */
+ const int, /*!< size of new key */
+ const int, /*!< alg dependent parameter */
+ const int, /*!< key DNS flags */
+ const int); /*!< key DNS protocol */
+DST_KEY *dst_free_key(DST_KEY *);
+int dst_compare_keys(const DST_KEY *, const DST_KEY *);
+
+int dst_sig_size(DST_KEY *);
+
+
+/* support for dns key tags/ids */
+u_int16_t dst_s_dns_key_id(const u_char *, const int);
+u_int16_t dst_s_id_calc(const u_char *, const int);
+
+/* Used by callers as well as by the library. */
+#define RAW_KEY_SIZE 8192 /*%< large enough to store any key */
+/* DST_API control flags */
+/* These are used used in functions dst_sign_data and dst_verify_data */
+#define SIG_MODE_INIT 1 /*%< initialize digest */
+#define SIG_MODE_UPDATE 2 /*%< add data to digest */
+#define SIG_MODE_FINAL 4 /*%< generate/verify signature */
+#define SIG_MODE_ALL (SIG_MODE_INIT|SIG_MODE_UPDATE|SIG_MODE_FINAL)
+
+/* Flags for dst_read_private_key() */
+#define DST_FORCE_READ 0x1000000
+#define DST_CAN_SIGN 0x010F
+#define DST_NO_AUTHEN 0x8000
+#define DST_EXTEND_FLAG 0x1000
+#define DST_STANDARD 0
+#define DST_PRIVATE 0x2000000
+#define DST_PUBLIC 0x4000000
+#define DST_RAND_SEMI 1
+#define DST_RAND_STD 2
+#define DST_RAND_KEY 3
+#define DST_RAND_DSS 4
+
+
+/* DST algorithm codes */
+#define KEY_RSA 1
+#define KEY_DH 2
+#define KEY_DSA 3
+#define KEY_PRIVATE 254
+#define KEY_EXPAND 255
+#define KEY_HMAC_MD5 157
+#define KEY_HMAC_SHA1 158
+#define UNKNOWN_KEYALG 0
+#define DST_MAX_ALGS KEY_HMAC_SHA1
+
+/* DST constants to locations in KEY record changes in new KEY record */
+#define DST_FLAGS_SIZE 2
+#define DST_KEY_PROT 2
+#define DST_KEY_ALG 3
+#define DST_EXT_FLAG 4
+#define DST_KEY_START 4
+
+#ifndef SIGN_F_NOKEY
+#define SIGN_F_NOKEY 0xC000
+#endif
+
+/* error codes from dst routines */
+#define SIGN_INIT_FAILURE (-23)
+#define SIGN_UPDATE_FAILURE (-24)
+#define SIGN_FINAL_FAILURE (-25)
+#define VERIFY_INIT_FAILURE (-26)
+#define VERIFY_UPDATE_FAILURE (-27)
+#define VERIFY_FINAL_FAILURE (-28)
+#define MISSING_KEY_OR_SIGNATURE (-30)
+#define UNSUPPORTED_KEYALG (-31)
+
+#endif /* DST_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/eventlib.h b/usr/src/lib/libresolv2_joy/include/isc/eventlib.h
new file mode 100644
index 0000000000..a4cfdf9092
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/eventlib.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1995-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* eventlib.h - exported interfaces for eventlib
+ * vix 09sep95 [initial]
+ *
+ * $Id: eventlib.h,v 1.7 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef _EVENTLIB_H
+#define _EVENTLIB_H
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/time.h>
+#include <stdio.h>
+
+#include <isc/platform.h>
+
+#ifndef __P
+# define __EVENTLIB_P_DEFINED
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+/* In the absence of branded types... */
+typedef struct { void *opaque; } evConnID;
+typedef struct { void *opaque; } evFileID;
+typedef struct { void *opaque; } evStreamID;
+typedef struct { void *opaque; } evTimerID;
+typedef struct { void *opaque; } evWaitID;
+typedef struct { void *opaque; } evContext;
+typedef struct { void *opaque; } evEvent;
+
+#define evInitID(id) ((id)->opaque = NULL)
+#define evTestID(id) ((id).opaque != NULL)
+
+typedef void (*evConnFunc)__P((evContext, void *, int, const void *, int,
+ const void *, int));
+typedef void (*evFileFunc)__P((evContext, void *, int, int));
+typedef void (*evStreamFunc)__P((evContext, void *, int, int));
+typedef void (*evTimerFunc)__P((evContext, void *,
+ struct timespec, struct timespec));
+typedef void (*evWaitFunc)__P((evContext, void *, const void *));
+
+typedef struct { unsigned char mask[256/8]; } evByteMask;
+#define EV_BYTEMASK_BYTE(b) ((b) / 8)
+#define EV_BYTEMASK_MASK(b) (1 << ((b) % 8))
+#define EV_BYTEMASK_SET(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] |= EV_BYTEMASK_MASK(b))
+#define EV_BYTEMASK_CLR(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] &= ~EV_BYTEMASK_MASK(b))
+#define EV_BYTEMASK_TST(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] & EV_BYTEMASK_MASK(b))
+
+#define EV_POLL 1
+#define EV_WAIT 2
+#define EV_NULL 4
+
+#define EV_READ 1
+#define EV_WRITE 2
+#define EV_EXCEPT 4
+
+#define EV_WASNONBLOCKING 8 /* Internal library use. */
+
+/* eventlib.c */
+#define evCreate __evCreate
+#define evSetDebug __evSetDebug
+#define evDestroy __evDestroy
+#define evGetNext __evGetNext
+#define evDispatch __evDispatch
+#define evDrop __evDrop
+#define evMainLoop __evMainLoop
+#define evHighestFD __evHighestFD
+#define evGetOption __evGetOption
+#define evSetOption __evSetOption
+
+int evCreate __P((evContext *));
+void evSetDebug __P((evContext, int, FILE *));
+int evDestroy __P((evContext));
+int evGetNext __P((evContext, evEvent *, int));
+int evDispatch __P((evContext, evEvent));
+void evDrop __P((evContext, evEvent));
+int evMainLoop __P((evContext));
+int evHighestFD __P((evContext));
+int evGetOption __P((evContext *, const char *, int *));
+int evSetOption __P((evContext *, const char *, int));
+
+/* ev_connects.c */
+#define evListen __evListen
+#define evConnect __evConnect
+#define evCancelConn __evCancelConn
+#define evHold __evHold
+#define evUnhold __evUnhold
+#define evTryAccept __evTryAccept
+
+int evListen __P((evContext, int, int, evConnFunc, void *, evConnID *));
+int evConnect __P((evContext, int, const void *, int,
+ evConnFunc, void *, evConnID *));
+int evCancelConn __P((evContext, evConnID));
+int evHold __P((evContext, evConnID));
+int evUnhold __P((evContext, evConnID));
+int evTryAccept __P((evContext, evConnID, int *));
+
+/* ev_files.c */
+#define evSelectFD __evSelectFD
+#define evDeselectFD __evDeselectFD
+
+int evSelectFD __P((evContext, int, int, evFileFunc, void *, evFileID *));
+int evDeselectFD __P((evContext, evFileID));
+
+/* ev_streams.c */
+#define evConsIovec __evConsIovec
+#define evWrite __evWrite
+#define evRead __evRead
+#define evTimeRW __evTimeRW
+#define evUntimeRW __evUntimeRW
+#define evCancelRW __evCancelRW
+
+struct iovec evConsIovec __P((void *, size_t));
+int evWrite __P((evContext, int, const struct iovec *, int,
+ evStreamFunc func, void *, evStreamID *));
+int evRead __P((evContext, int, const struct iovec *, int,
+ evStreamFunc func, void *, evStreamID *));
+int evTimeRW __P((evContext, evStreamID, evTimerID timer));
+int evUntimeRW __P((evContext, evStreamID));
+int evCancelRW __P((evContext, evStreamID));
+
+/* ev_timers.c */
+#define evConsTime __evConsTime
+#define evAddTime __evAddTime
+#define evSubTime __evSubTime
+#define evCmpTime __evCmpTime
+#define evTimeSpec __evTimeSpec
+#define evTimeVal __evTimeVal
+
+#define evNowTime __evNowTime
+#define evUTCTime __evUTCTime
+#define evLastEventTime __evLastEventTime
+#define evSetTimer __evSetTimer
+#define evClearTimer __evClearTimer
+#define evConfigTimer __evConfigTimer
+#define evResetTimer __evResetTimer
+#define evSetIdleTimer __evSetIdleTimer
+#define evClearIdleTimer __evClearIdleTimer
+#define evResetIdleTimer __evResetIdleTimer
+#define evTouchIdleTimer __evTouchIdleTimer
+
+struct timespec evConsTime __P((time_t sec, long nsec));
+struct timespec evAddTime __P((struct timespec, struct timespec));
+struct timespec evSubTime __P((struct timespec, struct timespec));
+struct timespec evNowTime __P((void));
+struct timespec evUTCTime __P((void));
+struct timespec evLastEventTime __P((evContext));
+struct timespec evTimeSpec __P((struct timeval));
+struct timeval evTimeVal __P((struct timespec));
+int evCmpTime __P((struct timespec, struct timespec));
+int evSetTimer __P((evContext, evTimerFunc, void *, struct timespec,
+ struct timespec, evTimerID *));
+int evClearTimer __P((evContext, evTimerID));
+int evConfigTimer __P((evContext, evTimerID, const char *param,
+ int value));
+int evResetTimer __P((evContext, evTimerID, evTimerFunc, void *,
+ struct timespec, struct timespec));
+int evSetIdleTimer __P((evContext, evTimerFunc, void *, struct timespec,
+ evTimerID *));
+int evClearIdleTimer __P((evContext, evTimerID));
+int evResetIdleTimer __P((evContext, evTimerID, evTimerFunc, void *,
+ struct timespec));
+int evTouchIdleTimer __P((evContext, evTimerID));
+
+/* ev_waits.c */
+#define evWaitFor __evWaitFor
+#define evDo __evDo
+#define evUnwait __evUnwait
+#define evDefer __evDefer
+
+int evWaitFor __P((evContext, const void *, evWaitFunc, void *, evWaitID *));
+int evDo __P((evContext, const void *));
+int evUnwait __P((evContext, evWaitID));
+int evDefer __P((evContext, evWaitFunc, void *));
+
+#ifdef __EVENTLIB_P_DEFINED
+# undef __P
+#endif
+
+#endif /*_EVENTLIB_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/heap.h b/usr/src/lib/libresolv2_joy/include/isc/heap.h
new file mode 100644
index 0000000000..384d507cf5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/heap.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+typedef int (*heap_higher_priority_func)(void *, void *);
+typedef void (*heap_index_func)(void *, int);
+typedef void (*heap_for_each_func)(void *, void *);
+
+typedef struct heap_context {
+ int array_size;
+ int array_size_increment;
+ int heap_size;
+ void **heap;
+ heap_higher_priority_func higher_priority;
+ heap_index_func index;
+} *heap_context;
+
+#define heap_new __heap_new
+#define heap_free __heap_free
+#define heap_insert __heap_insert
+#define heap_delete __heap_delete
+#define heap_increased __heap_increased
+#define heap_decreased __heap_decreased
+#define heap_element __heap_element
+#define heap_for_each __heap_for_each
+
+heap_context heap_new(heap_higher_priority_func, heap_index_func, int);
+int heap_free(heap_context);
+int heap_insert(heap_context, void *);
+int heap_delete(heap_context, int);
+int heap_increased(heap_context, int);
+int heap_decreased(heap_context, int);
+void * heap_element(heap_context, int);
+int heap_for_each(heap_context, heap_for_each_func, void *);
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h b/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h
new file mode 100644
index 0000000000..244b3e3460
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irpmarshall.h,v 1.4 2005/04/27 04:56:17 sra Exp $
+ */
+
+#ifndef _IRPMARSHALL_H_INCLUDED
+#define _IRPMARSHALL_H_INCLUDED
+
+/* Hide function names */
+#define irp_marshall_gr __irp_marshall_gr
+#define irp_marshall_ho __irp_marshall_ho
+#define irp_marshall_ne __irp_marshall_ne
+#define irp_marshall_ng __irp_marshall_ng
+#define irp_marshall_nw __irp_marshall_nw
+#define irp_marshall_pr __irp_marshall_pr
+#define irp_marshall_pw __irp_marshall_pw
+#define irp_marshall_sv __irp_marshall_sv
+#define irp_unmarshall_gr __irp_unmarshall_gr
+#define irp_unmarshall_ho __irp_unmarshall_ho
+#define irp_unmarshall_ne __irp_unmarshall_ne
+#define irp_unmarshall_ng __irp_unmarshall_ng
+#define irp_unmarshall_nw __irp_unmarshall_nw
+#define irp_unmarshall_pr __irp_unmarshall_pr
+#define irp_unmarshall_pw __irp_unmarshall_pw
+#define irp_unmarshall_sv __irp_unmarshall_sv
+
+#define MAXPADDRSIZE (sizeof "255.255.255.255" + 1)
+#define ADDR_T_STR(x) (x == AF_INET ? "AF_INET" :\
+ (x == AF_INET6 ? "AF_INET6" : "UNKNOWN"))
+
+/* See comment below on usage */
+int irp_marshall_pw(const struct passwd *, char **, size_t *);
+int irp_unmarshall_pw(struct passwd *, char *);
+int irp_marshall_gr(const struct group *, char **, size_t *);
+int irp_unmarshall_gr(struct group *, char *);
+int irp_marshall_sv(const struct servent *, char **, size_t *);
+int irp_unmarshall_sv(struct servent *, char *);
+int irp_marshall_pr(struct protoent *, char **, size_t *);
+int irp_unmarshall_pr(struct protoent *, char *);
+int irp_marshall_ho(struct hostent *, char **, size_t *);
+int irp_unmarshall_ho(struct hostent *, char *);
+int irp_marshall_ng(const char *, const char *, const char *,
+ char **, size_t *);
+int irp_unmarshall_ng(const char **, const char **, const char **, char *);
+int irp_marshall_nw(struct nwent *, char **, size_t *);
+int irp_unmarshall_nw(struct nwent *, char *);
+int irp_marshall_ne(struct netent *, char **, size_t *);
+int irp_unmarshall_ne(struct netent *, char *);
+
+/*! \file
+ * \brief
+ * Functions to marshall and unmarshall various system data structures. We
+ * use a printable ascii format that is as close to various system config
+ * files as reasonable (e.g. /etc/passwd format).
+ *
+ * We are not forgiving with unmarhsalling misformatted buffers. In
+ * particular whitespace in fields is not ignored. So a formatted password
+ * entry "brister :1364:100:...." will yield a username of "brister "
+ *
+ * We potentially do a lot of mallocs to fill fields that are of type
+ * (char **) like a hostent h_addr field. Building (for example) the
+ * h_addr field and its associated addresses all in one buffer is
+ * certainly possible, but not done here.
+ *
+ * The following description is true for all the marshalling functions:
+ *
+ * int irp_marshall_XX(struct yyyy *XX, char **buffer, size_t *len);
+ *
+ * The argument XX (of type struct passwd for example) is marshalled in the
+ * buffer pointed at by *BUFFER, which is of length *LEN. Returns 0
+ * on success and -1 on failure. Failure will occur if *LEN is
+ * smaller than needed.
+ *
+ * If BUFFER is NULL, then *LEN is set to the size of the buffer
+ * needed to marshall the data and no marshalling is actually done.
+ *
+ * If *BUFFER is NULL, then a buffer large enough will be allocated
+ * with memget() and the size allocated will be stored in *LEN. An extra 2
+ * bytes will be allocated for the client to append CRLF if wanted. The
+ * value of *LEN will include these two bytes.
+ *
+ * All the marshalling functions produce a buffer with the fields
+ * separated by colons (except for the hostent marshalling, which uses '@'
+ * to separate fields). Fields that have multiple subfields (like the
+ * gr_mem field in struct group) have their subparts separated by
+ * commas.
+ *
+ * int irp_unmarshall_XX(struct YYYYY *XX, char *buffer);
+ *
+ * The unmashalling functions break apart the buffer and store the
+ * values in the struct pointed to by XX. All pointer values inside
+ * XX are allocated with malloc. All arrays of pointers have a NULL
+ * as the last element.
+ */
+
+#endif
diff --git a/usr/src/lib/libresolv2_joy/include/isc/list.h b/usr/src/lib/libresolv2_joy/include/isc/list.h
new file mode 100644
index 0000000000..5fe9031141
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/list.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LIST_H
+#define LIST_H 1
+#include <isc/assertions.h>
+
+#define LIST(type) struct { type *head, *tail; }
+#define INIT_LIST(list) \
+ do { (list).head = NULL; (list).tail = NULL; } while (0)
+
+#define LINK(type) struct { type *prev, *next; }
+#define INIT_LINK_TYPE(elt, link, type) \
+ do { \
+ (elt)->link.prev = (type *)(-1); \
+ (elt)->link.next = (type *)(-1); \
+ } while (0)
+#define INIT_LINK(elt, link) \
+ INIT_LINK_TYPE(elt, link, void)
+#define LINKED(elt, link) ((void *)((elt)->link.prev) != (void *)(-1) && \
+ (void *)((elt)->link.next) != (void *)(-1))
+
+#define HEAD(list) ((list).head)
+#define TAIL(list) ((list).tail)
+#define EMPTY(list) ((list).head == NULL)
+
+#define PREPEND(list, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((list).head != NULL) \
+ (list).head->link.prev = (elt); \
+ else \
+ (list).tail = (elt); \
+ (elt)->link.prev = NULL; \
+ (elt)->link.next = (list).head; \
+ (list).head = (elt); \
+ } while (0)
+
+#define APPEND(list, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((list).tail != NULL) \
+ (list).tail->link.next = (elt); \
+ else \
+ (list).head = (elt); \
+ (elt)->link.prev = (list).tail; \
+ (elt)->link.next = NULL; \
+ (list).tail = (elt); \
+ } while (0)
+
+#define UNLINK_TYPE(list, elt, link, type) \
+ do { \
+ INSIST(LINKED(elt, link));\
+ if ((elt)->link.next != NULL) \
+ (elt)->link.next->link.prev = (elt)->link.prev; \
+ else { \
+ INSIST((list).tail == (elt)); \
+ (list).tail = (elt)->link.prev; \
+ } \
+ if ((elt)->link.prev != NULL) \
+ (elt)->link.prev->link.next = (elt)->link.next; \
+ else { \
+ INSIST((list).head == (elt)); \
+ (list).head = (elt)->link.next; \
+ } \
+ INIT_LINK_TYPE(elt, link, type); \
+ } while (0)
+#define UNLINK(list, elt, link) \
+ UNLINK_TYPE(list, elt, link, void)
+
+#define PREV(elt, link) ((elt)->link.prev)
+#define NEXT(elt, link) ((elt)->link.next)
+
+#define INSERT_BEFORE(list, before, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((before)->link.prev == NULL) \
+ PREPEND(list, elt, link); \
+ else { \
+ (elt)->link.prev = (before)->link.prev; \
+ (before)->link.prev = (elt); \
+ (elt)->link.prev->link.next = (elt); \
+ (elt)->link.next = (before); \
+ } \
+ } while (0)
+
+#define INSERT_AFTER(list, after, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((after)->link.next == NULL) \
+ APPEND(list, elt, link); \
+ else { \
+ (elt)->link.next = (after)->link.next; \
+ (after)->link.next = (elt); \
+ (elt)->link.next->link.prev = (elt); \
+ (elt)->link.prev = (after); \
+ } \
+ } while (0)
+
+#define ENQUEUE(list, elt, link) APPEND(list, elt, link)
+#define DEQUEUE(list, elt, link) UNLINK(list, elt, link)
+
+#endif /* LIST_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/logging.h b/usr/src/lib/libresolv2_joy/include/isc/logging.h
new file mode 100644
index 0000000000..c539443ff8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/logging.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LOGGING_H
+#define LOGGING_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#define log_critical (-5)
+#define log_error (-4)
+#define log_warning (-3)
+#define log_notice (-2)
+#define log_info (-1)
+#define log_debug(level) (level)
+
+typedef enum { log_syslog, log_file, log_null } log_channel_type;
+
+#define LOG_MAX_VERSIONS 99
+
+#define LOG_CLOSE_STREAM 0x0001
+#define LOG_TIMESTAMP 0x0002
+#define LOG_TRUNCATE 0x0004
+#define LOG_USE_CONTEXT_LEVEL 0x0008
+#define LOG_PRINT_LEVEL 0x0010
+#define LOG_REQUIRE_DEBUG 0x0020
+#define LOG_CHANNEL_BROKEN 0x0040
+#define LOG_PRINT_CATEGORY 0x0080
+#define LOG_CHANNEL_OFF 0x0100
+
+typedef struct log_context *log_context;
+typedef struct log_channel *log_channel;
+
+#define LOG_OPTION_DEBUG 0x01
+#define LOG_OPTION_LEVEL 0x02
+
+#define log_open_stream __log_open_stream
+#define log_close_stream __log_close_stream
+#define log_get_stream __log_get_stream
+#define log_get_filename __log_get_filename
+#define log_check_channel __log_check_channel
+#define log_check __log_check
+#define log_vwrite __log_vwrite
+#define log_write __log_write
+#define log_new_context __log_new_context
+#define log_free_context __log_free_context
+#define log_add_channel __log_add_channel
+#define log_remove_channel __log_remove_channel
+#define log_option __log_option
+#define log_category_is_active __log_category_is_active
+#define log_new_syslog_channel __log_new_syslog_channel
+#define log_new_file_channel __log_new_file_channel
+#define log_set_file_owner __log_set_file_owner
+#define log_new_null_channel __log_new_null_channel
+#define log_inc_references __log_inc_references
+#define log_dec_references __log_dec_references
+#define log_get_channel_type __log_get_channel_type
+#define log_free_channel __log_free_channel
+#define log_close_debug_channels __log_close_debug_channels
+
+FILE * log_open_stream(log_channel);
+int log_close_stream(log_channel);
+FILE * log_get_stream(log_channel);
+char * log_get_filename(log_channel);
+int log_check_channel(log_context, int, log_channel);
+int log_check(log_context, int, int);
+#ifdef __GNUC__
+void log_vwrite(log_context, int, int, const char *,
+ va_list args)
+ __attribute__((__format__(__printf__, 4, 0)));
+void log_write(log_context, int, int, const char *, ...)
+ __attribute__((__format__(__printf__, 4, 5)));
+#else
+void log_vwrite(log_context, int, int, const char *,
+ va_list args);
+void log_write(log_context, int, int, const char *, ...);
+#endif
+int log_new_context(int, char **, log_context *);
+void log_free_context(log_context);
+int log_add_channel(log_context, int, log_channel);
+int log_remove_channel(log_context, int, log_channel);
+int log_option(log_context, int, int);
+int log_category_is_active(log_context, int);
+log_channel log_new_syslog_channel(unsigned int, int, int);
+log_channel log_new_file_channel(unsigned int, int, const char *,
+ FILE *, unsigned int,
+ unsigned long);
+int log_set_file_owner(log_channel, uid_t, gid_t);
+log_channel log_new_null_channel(void);
+int log_inc_references(log_channel);
+int log_dec_references(log_channel);
+log_channel_type log_get_channel_type(log_channel);
+int log_free_channel(log_channel);
+void log_close_debug_channels(log_context);
+
+#endif /* !LOGGING_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/memcluster.h b/usr/src/lib/libresolv2_joy/include/isc/memcluster.h
new file mode 100644
index 0000000000..0923deb5e7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/memcluster.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef MEMCLUSTER_H
+#define MEMCLUSTER_H
+
+#include <stdio.h>
+
+#define meminit __meminit
+#ifdef MEMCLUSTER_DEBUG
+#define memget(s) __memget_debug(s, __FILE__, __LINE__)
+#define memput(p, s) __memput_debug(p, s, __FILE__, __LINE__)
+#else /*MEMCLUSTER_DEBUG*/
+#ifdef MEMCLUSTER_RECORD
+#define memget(s) __memget_record(s, __FILE__, __LINE__)
+#define memput(p, s) __memput_record(p, s, __FILE__, __LINE__)
+#else /*MEMCLUSTER_RECORD*/
+#define memget __memget
+#define memput __memput
+#endif /*MEMCLUSTER_RECORD*/
+#endif /*MEMCLUSTER_DEBUG*/
+#define memstats __memstats
+#define memactive __memactive
+
+int meminit(size_t, size_t);
+void * __memget(size_t);
+void __memput(void *, size_t);
+void * __memget_debug(size_t, const char *, int);
+void __memput_debug(void *, size_t, const char *, int);
+void * __memget_record(size_t, const char *, int);
+void __memput_record(void *, size_t, const char *, int);
+void memstats(FILE *);
+int memactive(void);
+
+#endif /* MEMCLUSTER_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/misc.h b/usr/src/lib/libresolv2_joy/include/isc/misc.h
new file mode 100644
index 0000000000..b54f4ee6ed
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/misc.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1995-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: misc.h,v 1.7 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef _ISC_MISC_H
+#define _ISC_MISC_H
+
+/*! \file */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#define bitncmp __bitncmp
+/*#define isc_movefile __isc_movefile */
+
+extern int bitncmp(const void *, const void *, int);
+extern int isc_movefile(const char *, const char *);
+
+extern int isc_gethexstring(unsigned char *, size_t, int, FILE *,
+ int *);
+extern void isc_puthexstring(FILE *, const unsigned char *, size_t,
+ size_t, size_t, const char *);
+extern void isc_tohex(const unsigned char *, size_t, char *);
+
+#endif /*_ISC_MISC_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/platform.h b/usr/src/lib/libresolv2_joy/include/isc/platform.h
new file mode 100644
index 0000000000..2fc59b61a8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/platform.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: platform.h.in,v 1.3 2008/01/23 02:15:56 tbox Exp $ */
+
+/*! \file */
+
+#ifndef ISC_PLATFORM_H
+#define ISC_PLATFORM_H
+
+/*
+ * Define if the OS does not define struct timespec.
+ */
+#undef ISC_PLATFORM_NEEDTIMESPEC
+#ifdef ISC_PLATFORM_NEEDTIMESPEC
+#include <time.h> /* For time_t */
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif
+
+#endif
diff --git a/usr/src/lib/libresolv2_joy/include/isc/tree.h b/usr/src/lib/libresolv2_joy/include/isc/tree.h
new file mode 100644
index 0000000000..96feaca68d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/tree.h
@@ -0,0 +1,59 @@
+/* tree.h - declare structures used by tree library
+ *
+ * vix 22jan93 [revisited; uses RCS, ANSI, POSIX; has bug fixes]
+ * vix 27jun86 [broken out of tree.c]
+ *
+ * $Id: tree.h,v 1.3 2005/04/27 04:56:18 sra Exp $
+ */
+
+
+#ifndef _TREE_H_INCLUDED
+#define _TREE_H_INCLUDED
+
+
+#ifndef __P
+# if defined(__STDC__) || defined(__GNUC__)
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+/*%
+ * tree_t is our package-specific anonymous pointer.
+ */
+#if defined(__STDC__) || defined(__GNUC__)
+typedef void *tree_t;
+#else
+typedef char *tree_t;
+#endif
+
+/*%
+ * Do not taint namespace
+ */
+#define tree_add __tree_add
+#define tree_delete __tree_delete
+#define tree_init __tree_init
+#define tree_mung __tree_mung
+#define tree_srch __tree_srch
+#define tree_trav __tree_trav
+
+
+typedef struct tree_s {
+ tree_t data;
+ struct tree_s *left, *right;
+ short bal;
+ }
+ tree;
+
+
+void tree_init __P((tree **));
+tree_t tree_srch __P((tree **, int (*)(), tree_t));
+tree_t tree_add __P((tree **, int (*)(), tree_t, void (*)()));
+int tree_delete __P((tree **, int (*)(), tree_t, void (*)()));
+int tree_trav __P((tree **, int (*)()));
+void tree_mung __P((tree **, void (*)()));
+
+
+#endif /* _TREE_H_INCLUDED */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/make_os_version b/usr/src/lib/libresolv2_joy/include/make_os_version
new file mode 100755
index 0000000000..3654490fee
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/make_os_version
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+# Copyright (c) 1999 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+UNAME_R=`/usr/bin/uname -r`
+
+OS_MAJOR=`echo $UNAME_R | /usr/bin/sed -e 's/^\([^.]*\).*/\1/'`
+OS_MINOR=`echo $UNAME_R | /usr/bin/sed -e 's/^[^.]*\.\([^.]*\).*/\1/'`
+OS_VERSION=`echo $UNAME_R | tr '.' '_'`
+
+cat <<EOF > new_os_version.h
+#ifndef OS_VERSION_H
+#define OS_VERSION_H
+
+#define SUNOS_$OS_VERSION
+#define OS_MAJOR $OS_MAJOR
+#define OS_MINOR $OS_MINOR
+
+#endif
+EOF
+
+if [ -f os_version.h ]; then
+ if /usr/bin/cmp -s new_os_version.h os_version.h; then
+ /usr/bin/rm -f new_os_version.h
+ else
+ /usr/bin/rm -f os_version.h
+ /usr/bin/mv new_os_version.h os_version.h
+ fi
+else
+ /usr/bin/mv new_os_version.h os_version.h
+fi
diff --git a/usr/src/lib/libresolv2_joy/include/make_os_version.sh b/usr/src/lib/libresolv2_joy/include/make_os_version.sh
new file mode 100644
index 0000000000..3654490fee
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/make_os_version.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+# Copyright (c) 1999 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+UNAME_R=`/usr/bin/uname -r`
+
+OS_MAJOR=`echo $UNAME_R | /usr/bin/sed -e 's/^\([^.]*\).*/\1/'`
+OS_MINOR=`echo $UNAME_R | /usr/bin/sed -e 's/^[^.]*\.\([^.]*\).*/\1/'`
+OS_VERSION=`echo $UNAME_R | tr '.' '_'`
+
+cat <<EOF > new_os_version.h
+#ifndef OS_VERSION_H
+#define OS_VERSION_H
+
+#define SUNOS_$OS_VERSION
+#define OS_MAJOR $OS_MAJOR
+#define OS_MINOR $OS_MINOR
+
+#endif
+EOF
+
+if [ -f os_version.h ]; then
+ if /usr/bin/cmp -s new_os_version.h os_version.h; then
+ /usr/bin/rm -f new_os_version.h
+ else
+ /usr/bin/rm -f os_version.h
+ /usr/bin/mv new_os_version.h os_version.h
+ fi
+else
+ /usr/bin/mv new_os_version.h os_version.h
+fi
diff --git a/usr/src/lib/libresolv2_joy/include/port_after.h b/usr/src/lib/libresolv2_joy/include/port_after.h
new file mode 100644
index 0000000000..c3abf4b334
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_after.h
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2001-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: port_after.h.in,v 1.60 2008/02/28 05:34:17 marka Exp $ */
+
+#ifndef port_after_h
+#define port_after_h
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#if (!defined(BSD)) || (BSD < 199306)
+#include <sys/bitypes.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif /* HAVE_SYS_SELECT_H */
+
+#ifdef REENABLE_SEND
+#undef send
+#endif
+
+#undef NEED_PSELECT
+#undef HAVE_SA_LEN
+#undef HAVE_MINIMUM_IFREQ
+#define NEED_DAEMON 1
+#undef NEED_STRSEP
+#undef NEED_STRERROR
+#ifdef NEED_STRERROR
+const char *isc_strerror(int);
+#define strerror isc_strerror
+#endif
+/* HAS_INET6_STRUCTS and HAVE_SIN6_SCOPE_ID are defined by port_ipv6.h
+ * #define HAS_INET6_STRUCTS 1
+ * #define HAVE_SIN6_SCOPE_ID 1
+ */
+#include <port_ipv6.h>
+
+#undef NEED_IN6ADDR_ANY
+#undef HAS_IN_ADDR6
+#define HAVE_SOCKADDR_STORAGE 1
+#undef NEED_GETTIMEOFDAY
+#define HAVE_STRNDUP
+#undef USE_FIONBIO_IOCTL
+#undef INNETGR_ARGS
+
+#undef USE_IFNAMELINKID
+#define PORT_NONBLOCK O_NONBLOCK
+
+#ifndef _POSIX_PATH_MAX
+#define _POSIX_PATH_MAX 255
+#endif
+#ifndef PATH_MAX
+#define PATH_MAX _POSIX_PATH_MAX
+#endif
+
+/*
+ * We need to know the IPv6 address family number even on IPv4-only systems.
+ * Note that this is NOT a protocol constant, and that if the system has its
+ * own AF_INET6, different from ours below, all of BIND's libraries and
+ * executables will need to be recompiled after the system <sys/socket.h>
+ * has had this type added. The type number below is correct on most BSD-
+ * derived systems for which AF_INET6 is defined.
+ */
+#ifndef AF_INET6
+#define AF_INET6 24
+#endif
+
+#ifndef PF_INET6
+#define PF_INET6 AF_INET6
+#endif
+
+#ifdef HAS_IN_ADDR6
+/* Map to pre-RFC structure. */
+#define in6_addr in_addr6
+#endif
+
+#ifndef HAS_INET6_STRUCTS
+/* Replace with structure from later rev of O/S if known. */
+struct in6_addr {
+ u_int8_t s6_addr[16];
+};
+
+#define IN6ADDR_ANY_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}
+
+#define IN6ADDR_LOOPBACK_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}
+
+/* Replace with structure from later rev of O/S if known. */
+struct sockaddr_in6 {
+#ifdef HAVE_SA_LEN
+ u_int8_t sin6_len; /* length of this struct */
+ u_int8_t sin6_family; /* AF_INET6 */
+#else
+ u_int16_t sin6_family; /* AF_INET6 */
+#endif
+ u_int16_t sin6_port; /* transport layer port # */
+ u_int32_t sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ u_int32_t sin6_scope_id; /* set of interfaces for a scope */
+};
+#endif /* HAS_INET6_STRUCTS */
+
+#ifdef BROKEN_IN6ADDR_INIT_MACROS
+#undef IN6ADDR_ANY_INIT
+#undef IN6ADDR_LOOPBACK_INIT
+#endif
+
+#ifdef _AIX
+#ifndef IN6ADDR_ANY_INIT
+#define IN6ADDR_ANY_INIT {{{ 0, 0, 0, 0 }}}
+#endif
+#ifndef IN6ADDR_LOOPBACK_INIT
+#if BYTE_ORDER == BIG_ENDIAN
+#define IN6ADDR_LOOPBACK_INIT {{{ 0, 0, 0, 1 }}}
+#else
+#define IN6ADDR_LOOPBACK_INIT {{{0, 0, 0, 0x01000000}}}
+#endif
+#endif
+#endif
+
+#ifndef IN6ADDR_ANY_INIT
+#ifdef s6_addr
+#define IN6ADDR_ANY_INIT \
+ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
+#else
+#define IN6ADDR_ANY_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}
+#endif
+
+#endif
+#ifndef IN6ADDR_LOOPBACK_INIT
+#ifdef s6_addr
+#define IN6ADDR_LOOPBACK_INIT \
+ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
+#else
+#define IN6ADDR_LOOPBACK_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}
+#endif
+#endif
+
+#ifndef HAVE_SOCKADDR_STORAGE
+#define __SS_MAXSIZE 128
+#define __SS_ALLIGSIZE (sizeof (long))
+
+struct sockaddr_storage {
+#ifdef HAVE_SA_LEN
+ u_int8_t ss_len; /* address length */
+ u_int8_t ss_family; /* address family */
+ char __ss_pad1[__SS_ALLIGSIZE - 2 * sizeof(u_int8_t)];
+ long __ss_align;
+ char __ss_pad2[__SS_MAXSIZE - 2 * __SS_ALLIGSIZE];
+#else
+ u_int16_t ss_family; /* address family */
+ char __ss_pad1[__SS_ALLIGSIZE - sizeof(u_int16_t)];
+ long __ss_align;
+ char __ss_pad2[__SS_MAXSIZE - 2 * __SS_ALLIGSIZE];
+#endif
+};
+#endif
+
+
+#if !defined(HAS_INET6_STRUCTS) || defined(NEED_IN6ADDR_ANY)
+#define in6addr_any isc_in6addr_any
+extern const struct in6_addr in6addr_any;
+#endif
+
+/*
+ * IN6_ARE_ADDR_EQUAL, IN6_IS_ADDR_UNSPECIFIED, IN6_IS_ADDR_V4COMPAT and
+ * IN6_IS_ADDR_V4MAPPED are broken in glibc 2.1.
+ */
+#ifdef __GLIBC__
+#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2)
+#undef IN6_ARE_ADDR_EQUAL
+#undef IN6_IS_ADDR_UNSPECIFIED
+#undef IN6_IS_ADDR_V4COMPAT
+#undef IN6_IS_ADDR_V4MAPPED
+#endif
+#endif
+
+#ifndef IN6_ARE_ADDR_EQUAL
+#define IN6_ARE_ADDR_EQUAL(a,b) \
+ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0)
+#endif
+
+#ifndef IN6_IS_ADDR_UNSPECIFIED
+#define IN6_IS_ADDR_UNSPECIFIED(a) \
+ IN6_ARE_ADDR_EQUAL(a, &in6addr_any)
+#endif
+
+#ifndef IN6_IS_ADDR_LOOPBACK
+extern const struct in6_addr isc_in6addr_loopback;
+#define IN6_IS_ADDR_LOOPBACK(a) \
+ IN6_ARE_ADDR_EQUAL(a, &isc_in6addr_loopback)
+#endif
+
+#ifndef IN6_IS_ADDR_V4MAPPED
+#define IN6_IS_ADDR_V4MAPPED(a) \
+ ((a)->s6_addr[0] == 0x00 && (a)->s6_addr[1] == 0x00 && \
+ (a)->s6_addr[2] == 0x00 && (a)->s6_addr[3] == 0x00 && \
+ (a)->s6_addr[4] == 0x00 && (a)->s6_addr[5] == 0x00 && \
+ (a)->s6_addr[6] == 0x00 && (a)->s6_addr[9] == 0x00 && \
+ (a)->s6_addr[8] == 0x00 && (a)->s6_addr[9] == 0x00 && \
+ (a)->s6_addr[10] == 0xff && (a)->s6_addr[11] == 0xff)
+#endif
+
+#ifndef IN6_IS_ADDR_SITELOCAL
+#define IN6_IS_ADDR_SITELOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0))
+#endif
+
+#ifndef IN6_IS_ADDR_LINKLOCAL
+#define IN6_IS_ADDR_LINKLOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
+#endif
+
+#ifndef IN6_IS_ADDR_MULTICAST
+#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff)
+#endif
+
+#ifndef __IPV6_ADDR_MC_SCOPE
+#define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f)
+#endif
+
+#ifndef __IPV6_ADDR_SCOPE_SITELOCAL
+#define __IPV6_ADDR_SCOPE_SITELOCAL 0x05
+#endif
+#ifndef __IPV6_ADDR_SCOPE_ORGLOCAL
+#define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08
+#endif
+
+#ifndef IN6_IS_ADDR_MC_SITELOCAL
+#define IN6_IS_ADDR_MC_SITELOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && \
+ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
+#endif
+
+#ifndef IN6_IS_ADDR_MC_ORGLOCAL
+#define IN6_IS_ADDR_MC_ORGLOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && \
+ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL))
+#endif
+
+#ifndef INADDR_NONE
+#define INADDR_NONE 0xffffffff
+#endif
+
+#ifndef MAXHOSTNAMELEN
+#define MAXHOSTNAMELEN 256
+#endif
+
+#ifndef INET6_ADDRSTRLEN
+/* sizeof("aaaa:bbbb:cccc:dddd:eeee:ffff:123.123.123.123") */
+#define INET6_ADDRSTRLEN 46
+#endif
+
+#ifndef MIN
+#define MIN(x,y) (((x) <= (y)) ? (x) : (y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y) (((x) >= (y)) ? (x) : (y))
+#endif
+
+#ifdef NEED_DAEMON
+int daemon(int nochdir, int noclose);
+#endif
+
+#ifdef NEED_STRSEP
+char * strsep(char **stringp, const char *delim);
+#endif
+
+#ifndef ALIGN
+#define ALIGN(p) (((uintptr_t)(p) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
+#endif
+
+#ifdef NEED_SETGROUPENT
+int setgroupent(int stayopen);
+#endif
+
+#ifdef NEED_GETGROUPLIST
+int getgrouplist(GETGROUPLIST_ARGS);
+#endif
+
+#ifdef POSIX_GETGRNAM_R
+int
+__posix_getgrnam_r(const char *, struct group *, char *, int, struct group **);
+#endif
+
+#ifdef NEED_GETGRNAM_R
+int
+getgrnam_r(const char *, struct group *, char *, size_t, struct group **);
+#endif
+
+#ifdef POSIX_GETGRGID_R
+int
+__posix_getgrgid_r(gid_t, struct group *, char *, int, struct group **) ;
+#endif
+
+#ifdef NEED_GETGRGID_R
+int
+getgrgid_r(gid_t, struct group *, char *, size_t, struct group **);
+#endif
+
+#ifdef NEED_GETGRENT_R
+GROUP_R_RETURN getgrent_r(struct group *gptr, GROUP_R_ARGS);
+#endif
+
+#ifdef NEED_SETGRENT_R
+GROUP_R_SET_RETURN setgrent_r(GROUP_R_ENT_ARGS);
+#endif
+
+#ifdef NEED_ENDGRENT_R
+GROUP_R_END_RETURN endgrent_r(GROUP_R_ENT_ARGS);
+#endif
+
+#if defined(NEED_INNETGR_R) && defined(NGR_R_RETURN)
+NGR_R_RETURN
+innetgr_r(const char *, const char *, const char *, const char *);
+#endif
+
+#ifdef NEED_SETNETGRENT_R
+#ifdef NGR_R_SET_ARGS
+NGR_R_SET_RETURN setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS);
+#else
+NGR_R_SET_RETURN setnetgrent_r(NGR_R_SET_CONST char *netgroup);
+#endif
+#endif
+
+#ifdef NEED_ENDNETGRENT_R
+#ifdef NGR_R_END_ARGS
+NGR_R_END_RETURN endnetgrent_r(NGR_R_END_ARGS);
+#else
+NGR_R_END_RETURN endnetgrent_r(void);
+#endif
+#endif
+
+#ifdef POSIX_GETPWNAM_R
+int
+__posix_getpwnam_r(const char *login, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_GETPWNAM_R
+int
+getpwnam_r(const char *login, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef POSIX_GETPWUID_R
+int
+__posix_getpwuid_r(uid_t uid, struct passwd *pwptr,
+ char *buf, int buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_GETPWUID_R
+int
+getpwuid_r(uid_t uid, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_SETPWENT_R
+#ifdef PASS_R_ENT_ARGS
+PASS_R_SET_RETURN setpwent_r(PASS_R_ENT_ARGS);
+#else
+PASS_R_SET_RETURN setpwent_r(void);
+#endif
+
+#endif
+
+#ifdef NEED_SETPASSENT_R
+#ifdef PASS_R_ENT_ARGS
+PASS_R_SET_RETURN setpassent_r(int stayopen, PASS_R_ENT_ARGS);
+#else
+PASS_R_SET_RETURN setpassent_r(int stayopen);
+#endif
+#endif
+
+#ifdef NEED_GETPWENT_R
+PASS_R_RETURN getpwent_r(struct passwd *pwptr, PASS_R_ARGS);
+#endif
+
+#ifdef NEED_ENDPWENT_R
+void endpwent_r(void);
+#endif
+
+#ifdef NEED_SETPASSENT
+int setpassent(int stayopen);
+#endif
+
+#define gettimeofday isc__gettimeofday
+#ifdef NEED_GETTIMEOFDAY
+int isc__gettimeofday(struct timeval *tvp, struct _TIMEZONE *tzp);
+#else
+int isc__gettimeofday(struct timeval *tp, struct timezone *tzp);
+#endif
+
+int getnetgrent(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp);
+
+#ifdef NGR_R_ARGS
+int getnetgrent_r(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, NGR_R_ARGS);
+#endif
+
+/* setnetgrent and endnetgrent are defined in sunw_port_after.h
+#ifdef SETNETGRENT_ARGS
+void setnetgrent(SETNETGRENT_ARGS);
+#else
+void setnetgrent(const char *netgroup);
+#endif
+
+void endnetgrent(void);
+*/
+
+#ifdef INNETGR_ARGS
+int innetgr(INNETGR_ARGS);
+#else
+int innetgr(const char *netgroup, const char *machine,
+ const char *user, const char *domain);
+#endif
+
+#ifdef NGR_R_SET_ARGS
+NGR_R_SET_RETURN
+setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS);
+#else
+NGR_R_SET_RETURN
+setnetgrent_r(NGR_R_SET_CONST char *netgroup);
+#endif
+
+#ifdef NEED_STRTOUL
+unsigned long strtoul(const char *, char **, int);
+#endif
+
+#ifdef NEED_SUN4PROTOS
+#include <stdarg.h>
+#ifndef __SIZE_TYPE__
+#define __SIZE_TYPE__ int
+#endif
+struct sockaddr;
+struct iovec;
+struct timeval;
+struct timezone;
+int fprintf(FILE *, const char *, ...);
+int getsockname(int, struct sockaddr *, int *);
+int getpeername(int, struct sockaddr *, int *);
+int socket(int, int, int);
+int connect(int, const struct sockaddr *, int);
+int writev(int, struct iovec *, int);
+int readv(int, struct iovec *, int);
+int send(int, const char *, int, int);
+void bzero(char *, int);
+int recvfrom(int, char *, int, int, struct sockaddr *, int *);
+int syslog(int, const char *, ... );
+int printf(const char *, ...);
+__SIZE_TYPE__ fread(void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *);
+__SIZE_TYPE__ fwrite(const void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *);
+int fclose(FILE *);
+int ungetc(int, FILE *);
+int scanf(const char *, ...);
+int sscanf(const char *, const char *, ... );
+int tolower(int);
+int toupper(int);
+int strcasecmp(const char *, const char *);
+int strncasecmp(const char *, const char *, int);
+int select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
+#ifdef gettimeofday
+#undef gettimeofday
+int gettimeofday(struct timeval *, struct timezone *);
+#define gettimeofday isc__gettimeofday
+#else
+int gettimeofday(struct timeval *, struct timezone *);
+#endif
+long strtol(const char*, char **, int);
+int fseek(FILE *, long, int);
+int setsockopt(int, int, int, const char *, int);
+int bind(int, const struct sockaddr *, int);
+void bcopy(char *, char *, int);
+int fputc(char, FILE *);
+int listen(int, int);
+int accept(int, struct sockaddr *, int *);
+int getsockopt(int, int, int, char *, int *);
+int vfprintf(FILE *, const char *, va_list);
+int fflush(FILE *);
+int fgetc(FILE *);
+int fputs(const char *, FILE *);
+int fchown(int, int, int);
+void setbuf(FILE *, char *);
+int gethostname(char *, int);
+int rename(const char *, const char *);
+time_t time(time_t *);
+int fscanf(FILE *, const char *, ...);
+int sscanf(const char *, const char *, ...);
+int ioctl(int, int, caddr_t);
+void perror(const char *);
+
+#if !defined(__USE_FIXED_PROTOTYPES__) && !defined(__cplusplus) && !defined(__STRICT_ANSI__)
+/*
+ * 'gcc -ansi' changes the prototype for vsprintf().
+ * Use this prototype when 'gcc -ansi' is not in effect.
+ */
+char *vsprintf(char *, const char *, va_list);
+#endif
+#endif
+
+/* Solaris-specific changes */
+#include "sunw_port_after.h"
+
+#endif /* port_after_h */
diff --git a/usr/src/lib/libresolv2_joy/include/port_before.h b/usr/src/lib/libresolv2_joy/include/port_before.h
new file mode 100644
index 0000000000..2801139223
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_before.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2005-2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: port_before.h.in,v 1.31 2008/02/28 05:36:10 marka Exp $ */
+
+#ifndef port_before_h
+#define port_before_h
+/* Solaris-specific changes */
+#include "sunw_port_before.h"
+#include <config.h>
+
+#ifdef NEED_SUN4PROTOS
+#define _PARAMS(x) x
+#endif
+
+struct group; /* silence warning */
+struct passwd; /* silence warning */
+struct timeval; /* silence warning */
+struct timezone; /* silence warning */
+
+#ifdef HAVE_SYS_TIMERS_H
+#include <sys/timers.h>
+#endif
+#include <limits.h>
+
+#ifdef ISC_PLATFORM_NEEDTIMESPEC
+#include <time.h> /* For time_t */
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif
+#ifndef HAVE_MEMMOVE
+#define memmove(a,b,c) bcopy(b,a,c)
+#endif
+
+#undef WANT_IRS_GR
+#undef WANT_IRS_NIS
+#undef WANT_IRS_PW
+
+#define BSD_COMP 1
+#define USE_POLL 1
+#define HAVE_MD5 1
+#define SOLARIS2 1
+
+/* DO_PTHREADS is conditionally defined in sunw_port_before.h
+ * #define DO_PTHREADS 1 */
+#define GETGROUPLIST_ARGS const char *name, gid_t basegid, gid_t *groups, int *ngroups
+#define GETNETBYADDR_ADDR_T long
+#define SETPWENT_VOID 1
+#define SETGRENT_VOID 1
+
+#define NET_R_ARGS char *buf, int buflen
+#define NET_R_BAD NULL
+#define NET_R_COPY buf, buflen
+#define NET_R_COPY_ARGS NET_R_ARGS
+#define NET_R_END_RESULT(x) /*empty*/
+#define NET_R_END_RETURN void
+#undef NET_R_ENT_ARGS /*empty*/
+#define NET_R_OK nptr
+#define NET_R_RETURN struct netent *
+#undef NET_R_SET_RESULT /*empty*/
+#undef NET_R_SETANSWER
+#define NET_R_SET_RETURN void
+#undef NETENT_DATA
+
+#define GROUP_R_RETURN struct group *
+#define GROUP_R_SET_RETURN void
+#undef GROUP_R_SET_RESULT /*empty*/
+#define GROUP_R_END_RETURN void
+#define GROUP_R_END_RESULT(x) /*empty*/
+#define GROUP_R_ARGS char *buf, int buflen
+#define GROUP_R_ENT_ARGS void
+#define GROUP_R_OK gptr
+#define GROUP_R_BAD NULL
+
+#define HOST_R_ARGS char *buf, int buflen, int *h_errnop
+#define HOST_R_BAD NULL
+#define HOST_R_COPY buf, buflen
+#define HOST_R_COPY_ARGS char *buf, int buflen
+#define HOST_R_END_RESULT(x) /*empty*/
+#define HOST_R_END_RETURN void
+#undef HOST_R_ENT_ARGS /*empty*/
+#define HOST_R_ERRNO *h_errnop = h_errno
+#define HOST_R_OK hptr
+#define HOST_R_RETURN struct hostent *
+#undef HOST_R_SETANSWER
+#undef HOST_R_SET_RESULT
+#define HOST_R_SET_RETURN void
+#undef HOSTENT_DATA
+
+#define NGR_R_ARGS char *buf, int buflen
+#define NGR_R_BAD (0)
+#define NGR_R_COPY buf, buflen
+#define NGR_R_COPY_ARGS NGR_R_ARGS
+#define NGR_R_CONST
+#define NGR_R_END_RESULT(x) /*empty*/
+#define NGR_R_END_RETURN void
+#undef NGR_R_END_ARGS /*empty*/
+#define NGR_R_OK 1
+#define NGR_R_RETURN int
+#define NGR_R_SET_CONST const
+#undef NGR_R_SET_RESULT /*empty*/
+#define NGR_R_SET_RETURN void
+#undef NGR_R_SET_ARGS
+
+
+#if !defined(NGR_R_SET_ARGS) && defined(NGR_R_END_ARGS)
+#define NGR_R_SET_ARGS NGR_R_END_ARGS
+#endif
+
+#define PROTO_R_ARGS char *buf, int buflen
+#define PROTO_R_BAD NULL
+#define PROTO_R_COPY buf, buflen
+#define PROTO_R_COPY_ARGS PROTO_R_ARGS
+#define PROTO_R_END_RESULT(x) /*empty*/
+#define PROTO_R_END_RETURN void
+#undef PROTO_R_ENT_ARGS /*empty*/
+#undef PROTO_R_ENT_UNUSED
+#define PROTO_R_OK pptr
+#undef PROTO_R_SETANSWER
+#define PROTO_R_RETURN struct protoent *
+#undef PROTO_R_SET_RESULT
+#define PROTO_R_SET_RETURN void
+#undef PROTOENT_DATA
+
+#define PASS_R_ARGS char *buf, int buflen
+#define PASS_R_BAD NULL
+#define PASS_R_COPY buf, buflen
+#define PASS_R_COPY_ARGS PASS_R_ARGS
+#define PASS_R_END_RESULT(x) /*empty*/
+#define PASS_R_END_RETURN void
+#undef PASS_R_ENT_ARGS
+#define PASS_R_OK pwptr
+#define PASS_R_RETURN struct passwd *
+#undef PASS_R_SET_RESULT /*empty*/
+#define PASS_R_SET_RETURN void
+
+#define SERV_R_ARGS char *buf, int buflen
+#define SERV_R_BAD NULL
+#define SERV_R_COPY buf, buflen
+#define SERV_R_COPY_ARGS SERV_R_ARGS
+#define SERV_R_END_RESULT(x) /*empty*/
+#define SERV_R_END_RETURN void
+#undef SERV_R_ENT_ARGS /*empty*/
+#undef SERV_R_ENT_UNUSED /*empty*/
+#define SERV_R_OK sptr
+#undef SERV_R_SETANSWER
+#define SERV_R_RETURN struct servent *
+#undef SERV_R_SET_RESULT
+#define SERV_R_SET_RETURN void
+
+
+
+#define DE_CONST(konst, var) \
+ do { \
+ union { const void *k; void *v; } _u; \
+ _u.k = konst; \
+ var = _u.v; \
+ } while (0)
+
+#define UNUSED(x) (x) = (x)
+
+#undef NEED_SOLARIS_BITTYPES
+#define ISC_SOCKLEN_T int
+
+#ifdef __GNUC__
+#define ISC_FORMAT_PRINTF(fmt, args) \
+ __attribute__((__format__(__printf__, fmt, args)))
+#else
+#define ISC_FORMAT_PRINTF(fmt, args)
+#endif
+
+/* Pull in host order macros when _XOPEN_SOURCE_EXTENDED is defined. */
+#if defined(__hpux) && defined(_XOPEN_SOURCE_EXTENDED)
+#include <sys/byteorder.h>
+#endif
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/port_netdb.h b/usr/src/lib/libresolv2_joy/include/port_netdb.h
new file mode 100644
index 0000000000..a308cc7efa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_netdb.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _PORT_NETDB_H
+#define _PORT_NETDB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* AI_NUMERICSERV is not a valid flag for getaddrinfo */
+#define AI_MASK 0x0038 /* mask of valid flags */
+
+/* EAI_OVERFLOW was removed from ISC */
+#define EAI_BADHINTS 12
+
+/*
+ * these are libresolv2 functions that were renamed in previous versions to
+ * res_* because they conflict with libnsl or libsocket
+ */
+
+#define endhostent joy_res_endhostent /* libnsl */
+void endhostent __P((void));
+#define endnetent res_endnetent /* libsocket */
+void endnetent __P((void));
+#define freeaddrinfo res_freeaddrinfo /* libsocket */
+void freeaddrinfo __P((struct addrinfo *));
+#define freehostent res_freehostent /* libsocket and libnsl */
+void freehostent __P((struct hostent *));
+#define getaddrinfo res_getaddrinfo /* libsocket */
+int getaddrinfo __P((const char *, const char *,
+ const struct addrinfo *, struct addrinfo **));
+#define gethostbyaddr joy_res_gethostbyaddr /* libnsl */
+struct hostent *gethostbyaddr __P((const char *, int, int));
+#define gethostbyname joy_res_gethostbyname /* libnsl */
+struct hostent *gethostbyname __P((const char *));
+#define gethostbyname2 joy_res_gethostbyname2 /* lib/nsswitch/dns */
+struct hostent *gethostbyname2 __P((const char *, int));
+#define gethostent res_gethostent /* libnsl */
+struct hostent *gethostent __P((void));
+#define getipnodebyaddr res_getipnodebyaddr /* libnsl and libsocket */
+struct hostent *getipnodebyaddr __P((const void *, size_t, int, int *));
+#define getipnodebyname res_getipnodebyname /* libnsl and libsocket */
+struct hostent *getipnodebyname __P((const char *, int, int, int *));
+
+#define getnetbyaddr res_getnetbyaddr /* libsocket */
+struct netent *getnetbyaddr __P((unsigned long, int));
+#define getnetbyname res_getnetbyname /* libsocket */
+struct netent *getnetbyname __P((const char *));
+#define getnetent res_getnetent /* libsocket */
+struct netent *getnetent __P((void));
+#define sethostent joy_res_sethostent /* libnsl */
+void sethostent __P((int));
+#define setnetent res_setnetent /* libsocket */
+void setnetent __P((int));
+
+/*
+ * these are other irs functions now included in libresolv.so.2. We rename the
+ * ones that overlap with libsocket or libnsl
+ */
+
+/* endprotoent is in libsocket.so.1 */
+#define endprotoent res_endprotoent
+void endprotoent __P((void));
+
+/* endservent is in libsocket.so.1 */
+#define endservent res_endservent
+void endservent __P((void));
+
+/* note: the next two symbols are variables, not functions */
+
+/* gai_errlist is in libsocket.so.1 */
+#define gai_errlist res_gai_errlist
+
+/* gai_nerr is in libsocket.so.1 */
+#define gai_nerr res_gai_nerr
+
+/* gai_strerror is in libsocket.so.1 */
+#define gai_strerror res_gai_strerror
+const char *gai_strerror __P((int ecode));
+
+/* gethostbyaddr_r is in libnsl.so.1 */
+#define gethostbyaddr_r res_gethostbyaddr_r
+struct hostent *gethostbyaddr_r __P((const char *addr, int len, int type,
+ struct hostent *hptr, char *buf,
+ int buflen, int *h_errnop));
+
+/* gethostbyname_r is in libnsl.so.1 */
+#define gethostbyname_r res_gethostbyname_r
+struct hostent *gethostbyname_r __P((const char *name, struct hostent *hptr,
+ char *buf, int buflen, int *h_errnop));
+
+/* gethostent_r is in libnsl.so.1 */
+#define gethostent_r res_gethostent_r
+struct hostent *gethostent_r __P((struct hostent *hptr, char *buf, int buflen,
+ int *h_errnop));
+
+/* getnameinfo is in libsocket.so.1 */
+#define getnameinfo res_getnameinfo
+int getnameinfo __P((const struct sockaddr *, size_t, char *,
+ size_t, char *, size_t, int));
+
+/* getnetbyaddr_r is in libsocket.so.1 */
+#define getnetbyaddr_r res_getnetbyaddr_r
+struct netent *getnetbyaddr_r __P((long, int, struct netent *, char *, int));
+
+/* getnetbyname_r is in libsocket.so.1 */
+#define getnetbyname_r res_getnetbyname_r
+struct netent *getnetbyname_r __P((const char *, struct netent *, char *, int));
+
+/* getnetent_r is in libsocket.so.1 */
+#define getnetent_r res_getnetent_r
+struct netent *getnetent_r __P((struct netent *, char *, int));
+
+/* getprotobyname is in libsocket.so.1 */
+#define getprotobyname res_getprotobyname
+struct protoent *getprotobyname __P((const char *));
+
+/* getprotobyname_r is in libsocket.so.1 */
+#define getprotobyname_r res_getprotobyname_r
+struct protoent *getprotobyname_r __P((const char *, struct protoent *,
+ char *, int));
+
+/* getprotobynumber is in libsocket.so.1 */
+#define getprotobynumber res_getprotobynumber
+struct protoent *getprotobynumber __P((int));
+
+/* getprotobynumber_r is in libsocket.so.1 */
+#define getprotobynumber_r res_getprotobynumber_r
+struct protoent *getprotobynumber_r __P((int,
+ struct protoent *, char *, int));
+
+/* getprotoent is in libsocket.so.1 */
+#define getprotoent res_getprotoent
+struct protoent *getprotoent __P((void));
+
+/* getprotoent_r is in libsocket.so.1 */
+#define getprotoent_r res_getprotoent_r
+struct protoent *getprotoent_r __P((struct protoent *, char *, int));
+
+/* getservbyname is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyname res_getservbyname
+struct servent *getservbyname __P((const char *, const char *));
+
+/* getservbyname_r is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyname_r res_getservbyname_r
+struct servent *getservbyname_r __P((const char *name, const char *,
+ struct servent *, char *, int));
+
+/* getservbyport is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyport res_getservbyport
+struct servent *getservbyport __P((int, const char *));
+
+/* getservbyport_r is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyport_r res_getservbyport_r
+struct servent *getservbyport_r __P((int port, const char *,
+ struct servent *, char *, int));
+
+/* getservent is in libsocket.so.1 */
+#define getservent res_getservent
+struct servent *getservent __P((void));
+
+/* getservent_r is in libsocket.so.1 */
+#define getservent_r res_getservent_r
+struct servent *getservent_r __P((struct servent *, char *, int));
+
+/* innetgr is in libsocket.so.1 */
+#define innetgr res_innetgr
+int innetgr __P((const char *, const char *, const char *, const char *));
+
+/* setprotoent is in libsocket.so.1 */
+#define setprotoent res_setprotoent
+void setprotoent __P((int));
+
+/* setservent is in libsocket.so.1 */
+#define setservent res_setservent
+void setservent __P((int));
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PORT_NETDB_H */
diff --git a/usr/src/lib/libresolv2_joy/include/port_resolv.h b/usr/src/lib/libresolv2_joy/include/port_resolv.h
new file mode 100644
index 0000000000..cd1a97d40c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_resolv.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PORT_RESOLV_H
+#define _PORT_RESOLV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* RES_NSID has the same value as RES_NO_NIBBLE, which has been deleted */
+#define RES_NSID 0x00040000 /* request name server ID */
+
+/* RES_DEFAULT has a new value in libbind-6.0 */
+#undef RES_DEFAULT
+#define RES_DEFAULT (RES_RECURSE | RES_DEFNAMES | \
+ RES_DNSRCH | RES_NO_NIBBLE2)
+
+#ifndef __ultrix__
+u_int16_t _getshort __P((const uchar_t *));
+u_int32_t _getlong __P((const uchar_t *));
+#endif
+
+/* rename functions so they can be wrapped (see sunw/sunw_wrappers.c */
+#define p_option isc_p_option
+const char *p_option(ulong_t option);
+#define p_secstodate isc_p_secstodate
+char *p_secstodate(ulong_t secs);
+
+/* prevent namespace pollution */
+#define res_protocolnumber __res_protocolnumber
+#define res_servicenumber __res_servicenumber
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PORT_RESOLV_H */
diff --git a/usr/src/lib/libresolv2_joy/include/probe_ipv6 b/usr/src/lib/libresolv2_joy/include/probe_ipv6
new file mode 100755
index 0000000000..371ac96c55
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/probe_ipv6
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+# Copyright 2003 by Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+set -e
+PATH=/bin:/usr/bin:$PATH; export PATH
+trap "rm -f tmp$$[abc].[oc]" 0
+target=port_ipv6
+new=new_${target}.h
+old=${target}.h
+
+cat > tmp$$a.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+EOF
+
+cat > tmp$$b.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct in6_addr xx;
+EOF
+
+cat > tmp$$c.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+main() { xx.sin6_scope_id = 0; }
+EOF
+
+cat > ${new} <<EOF
+
+/* This file is automatically generated. Do Not Edit. */
+
+#ifndef ${target}_h
+#define ${target}_h
+
+EOF
+
+if ${CC} -c tmp$$a.c > /dev/null 2>&1
+then
+ echo "#define HAS_INET6_STRUCTS" >> ${new}
+ if ${CC} -c tmp$$b.c > /dev/null 2>&1
+ then
+ :
+ else
+ echo "#define in6_addr in_addr6" >> ${new}
+ fi
+ if ${CC} -c tmp$$c.c > /dev/null 2>&1
+ then
+ echo "#define HAVE_SIN6_SCOPE_ID" >> ${new}
+ else
+ echo "#undef HAVE_SIN6_SCOPE_ID" >> ${new}
+ fi
+else
+ echo "#undef HAS_INET6_STRUCTS" >> ${new}
+fi
+echo >> ${new}
+echo "#endif" >> ${new}
+if [ -f ${old} ]; then
+ if cmp -s ${new} ${old} ; then
+ rm -f ${new}
+ else
+ rm -f ${old}
+ mv ${new} ${old}
+ fi
+else
+ mv ${new} ${old}
+fi
+exit 0
diff --git a/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh b/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh
new file mode 100644
index 0000000000..371ac96c55
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+# Copyright 2003 by Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+set -e
+PATH=/bin:/usr/bin:$PATH; export PATH
+trap "rm -f tmp$$[abc].[oc]" 0
+target=port_ipv6
+new=new_${target}.h
+old=${target}.h
+
+cat > tmp$$a.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+EOF
+
+cat > tmp$$b.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct in6_addr xx;
+EOF
+
+cat > tmp$$c.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+main() { xx.sin6_scope_id = 0; }
+EOF
+
+cat > ${new} <<EOF
+
+/* This file is automatically generated. Do Not Edit. */
+
+#ifndef ${target}_h
+#define ${target}_h
+
+EOF
+
+if ${CC} -c tmp$$a.c > /dev/null 2>&1
+then
+ echo "#define HAS_INET6_STRUCTS" >> ${new}
+ if ${CC} -c tmp$$b.c > /dev/null 2>&1
+ then
+ :
+ else
+ echo "#define in6_addr in_addr6" >> ${new}
+ fi
+ if ${CC} -c tmp$$c.c > /dev/null 2>&1
+ then
+ echo "#define HAVE_SIN6_SCOPE_ID" >> ${new}
+ else
+ echo "#undef HAVE_SIN6_SCOPE_ID" >> ${new}
+ fi
+else
+ echo "#undef HAS_INET6_STRUCTS" >> ${new}
+fi
+echo >> ${new}
+echo "#endif" >> ${new}
+if [ -f ${old} ]; then
+ if cmp -s ${new} ${old} ; then
+ rm -f ${new}
+ else
+ rm -f ${old}
+ mv ${new} ${old}
+ fi
+else
+ mv ${new} ${old}
+fi
+exit 0
diff --git a/usr/src/lib/libresolv2_joy/include/res_update.h b/usr/src/lib/libresolv2_joy/include/res_update.h
new file mode 100644
index 0000000000..0c6967db56
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/res_update.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: res_update.h,v 1.3 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef __RES_UPDATE_H
+#define __RES_UPDATE_H
+
+/*! \file */
+
+#include <sys/types.h>
+#include <arpa/nameser.h>
+#include <isc/list.h>
+#include <resolv_joy.h>
+
+#ifndef ORIGINAL_ISC_CODE
+/* definition of u_int32_t needed on Solaris */
+#include <sys/bitypes.h>
+/* need to rename ns_updrec before we define it here */
+#include "arpa/port_nameser.h"
+#endif /* ORIGINAL_ISC_CODE */
+
+
+/*%
+ * This RR-like structure is particular to UPDATE.
+ */
+struct ns_updrec {
+ LINK(struct ns_updrec) r_link, r_glink;
+ ns_sect r_section; /*%< ZONE/PREREQUISITE/UPDATE */
+ char * r_dname; /*%< owner of the RR */
+ ns_class r_class; /*%< class number */
+ ns_type r_type; /*%< type number */
+ u_int32_t r_ttl; /*%< time to live */
+ u_char * r_data; /*%< rdata fields as text string */
+ u_int r_size; /*%< size of r_data field */
+ int r_opcode; /*%< type of operation */
+ /* following fields for private use by the resolver/server routines */
+ struct databuf *r_dp; /*%< databuf to process */
+ struct databuf *r_deldp; /*%< databuf's deleted/overwritten */
+ u_int r_zone; /*%< zone number on server */
+};
+typedef struct ns_updrec ns_updrec;
+typedef LIST(ns_updrec) ns_updque;
+
+#ifdef ORIGINAL_ISC_CODE
+#define res_mkupdate __res_mkupdate
+#define res_update __res_update
+#define res_mkupdrec __res_mkupdrec
+#define res_freeupdrec __res_freeupdrec
+#define res_nmkupdate __res_nmkupdate
+#define res_nupdate __res_nupdate
+#else
+/* these are renamed in "port_nameser.h" */
+#endif /* ORIGINAL_ISC_CODE */
+
+
+int res_mkupdate __P((ns_updrec *, u_char *, int));
+int res_update __P((ns_updrec *));
+ns_updrec * res_mkupdrec __P((int, const char *, u_int, u_int, u_long));
+void res_freeupdrec __P((ns_updrec *));
+int res_nmkupdate __P((res_state, ns_updrec *, u_char *, int));
+int res_nupdate __P((res_state, ns_updrec *, ns_tsig_key *));
+
+#endif /*__RES_UPDATE_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/resolv_mt.h b/usr/src/lib/libresolv2_joy/include/resolv_mt.h
new file mode 100644
index 0000000000..500d4d764c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/resolv_mt.h
@@ -0,0 +1,47 @@
+#ifndef _RESOLV_MT_H
+#define _RESOLV_MT_H
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+/* Access functions for the libresolv private interface */
+
+int __res_enable_mt(void);
+int __res_disable_mt(void);
+
+/* Per-thread context */
+
+typedef struct {
+int no_hosts_fallback_private;
+int retry_save;
+int retry_private;
+char inet_nsap_ntoa_tmpbuf[255*3];
+char sym_ntos_unname[20];
+char sym_ntop_unname[20];
+char p_option_nbuf[40];
+char p_time_nbuf[40];
+char precsize_ntoa_retbuf[sizeof "90000000.00"];
+char loc_ntoa_tmpbuf[sizeof
+"1000 60 60.000 N 1000 60 60.000 W -12345678.00m 90000000.00m 90000000.00m 90000000.00m"];
+char p_secstodate_output[15];
+} mtctxres_t;
+
+/* Thread-specific data (TSD) */
+
+mtctxres_t *___mtctxres(void);
+#define mtctxres (___mtctxres())
+
+/* Various static data that should be TSD */
+
+#define sym_ntos_unname (mtctxres->sym_ntos_unname)
+#define sym_ntop_unname (mtctxres->sym_ntop_unname)
+#define inet_nsap_ntoa_tmpbuf (mtctxres->inet_nsap_ntoa_tmpbuf)
+#define p_option_nbuf (mtctxres->p_option_nbuf)
+#define p_time_nbuf (mtctxres->p_time_nbuf)
+#define precsize_ntoa_retbuf (mtctxres->precsize_ntoa_retbuf)
+#define loc_ntoa_tmpbuf (mtctxres->loc_ntoa_tmpbuf)
+#define p_secstodate_output (mtctxres->p_secstodate_output)
+
+#endif /* _RESOLV_MT_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sunw_port_after.h b/usr/src/lib/libresolv2_joy/include/sunw_port_after.h
new file mode 100644
index 0000000000..bff64a74c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sunw_port_after.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNW_PORT_AFTER_H
+#define _SUNW_PORT_AFTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * rename setnetgrent and endnetgrent which were formerly in a separate irs
+ * shared library. These functions should come from libc.so
+ */
+#define setnetgrent res_setnetgrent
+#ifdef SETNETGRENT_ARGS
+void setnetgrent(SETNETGRENT_ARGS);
+#else
+void setnetgrent(const char *netgroup);
+#endif
+
+#define endnetgrent res_endnetgrent
+void endnetgrent(void);
+
+
+/*
+ * include ports for the public header files. ISC's versions are quite different
+ * from those currently in OpenSolaris.
+ */
+
+#ifdef _RESOLV_JOY_H
+#include <port_resolv.h>
+#endif /* _RESOLV_JOY_H */
+
+#ifdef _NETDB_H
+#include <port_netdb.h>
+#endif /* _NETDB_H */
+
+#ifdef _ARPA_INET_H
+#include <arpa/port_inet.h>
+#endif /* _ARPA_INET_H */
+
+#ifdef _ARPA_NAMESER_H
+#include <arpa/port_nameser.h>
+#endif /* _ARPA_NAMESER_H */
+
+
+#ifdef _ARPA_NAMESER_COMPAT_H
+/* no changes */
+#endif /* _ARPA_NAMESER_COMPAT_H */
+
+/* version-specific defines */
+#include <os_version.h>
+
+/*
+ * Prior to 2.6, Solaris needs a prototype for gethostname().
+ */
+#if (OS_MAJOR == 5 && OS_MINOR < 6)
+extern int gethostname(char *, size_t);
+#endif
+/*
+ * gethostid() was not available until 2.5
+ * setsockopt(SO_REUSEADDR) fails on unix domain sockets before 2.5
+ * use ioctl(FIONBIO) rather than fcntl() calls to set/clear non-blocking i/o.
+ */
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#define GET_HOST_ID_MISSING
+#define NO_UNIX_REUSEADDR
+#define USE_FIONBIO_IOCTL
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 11)
+#define NEED_STRSEP
+extern char *strsep(char **, const char *);
+#endif
+
+
+/*
+ * Solaris 2.5 and later have getrlimit(), setrlimit() and getrusage().
+ */
+#if (OS_MAJOR > 5 || (OS_MAJOR == 5 && OS_MINOR >= 5))
+#include <sys/resource.h>
+#define HAVE_GETRUSAGE
+#define RLIMIT_TYPE rlim_t
+#define RLIMIT_FILE_INFINITY
+#endif
+
+/* the default syslog facility of named/lwresd. */
+#ifndef ISC_FACILITY
+#define ISC_FACILITY LOG_DAEMON
+#endif
+
+
+/*
+ * Solaris 8 has if_nametoindex().
+ */
+#if (OS_MAJOR > 5 || (OS_MAJOR == 5 && OS_MINOR >= 8))
+#define USE_IFNAMELINKID
+#endif
+
+#undef ALIGN
+#if (OS_MAJOR == 5 && OS_MINOR > 8)
+#define ALIGN(x) (((uintptr_t)(x) + (sizeof (char *) - 1UL)) & \
+ ~(sizeof (char *) - 1UL))
+#else
+#define ALIGN(x) (((unsigned long)(x) + (sizeof (char *) - 1UL)) & \
+ ~(sizeof (char *) - 1UL))
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#ifndef USE_FIONBIO_IOCTL
+#define USE_FIONBIO_IOCTL 1
+#endif
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SUNW_PORT_AFTER_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sunw_port_before.h b/usr/src/lib/libresolv2_joy/include/sunw_port_before.h
new file mode 100644
index 0000000000..776e311fcc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sunw_port_before.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNW_PORT_BEFORE_H
+#define _SUNW_PORT_BEFORE_H
+
+#ifdef SUNW_OPTIONS
+#include <conf/sunoptions.h>
+#endif
+
+/* version-specific defines */
+#include <os_version.h>
+#if (OS_MAJOR == 5 && OS_MINOR < 6)
+#ifndef SOLARIS_BITTYPES
+#define NEED_SOLARIS_BITTYPES 1
+#endif
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#undef HAS_PTHREADS
+#else
+#define HAS_PTHREADS
+#endif
+
+#if defined(HAS_PTHREADS) && defined(_REENTRANT)
+#define DO_PTHREADS
+#endif
+
+/*
+ * need these if we are using public versions of nameser.h, resolv.h, and
+ * inet.h
+ */
+#include <sys/param.h>
+#if (!defined(BSD)) || (BSD < 199306)
+#include <sys/bitypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <sys/cdefs.h>
+
+#endif /* _SUNW_PORT_BEFORE_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sys/bitypes.h b/usr/src/lib/libresolv2_joy/include/sys/bitypes.h
new file mode 100644
index 0000000000..54fb42bad7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sys/bitypes.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: bitypes.h,v 1.7 2008/11/14 02:54:35 tbox Exp $ */
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+ /*
+ * Basic integral types. Omit the typedef if
+ * not possible for a machine/compiler combination.
+ */
+
+#ifdef NEED_SOLARIS_BITTYPES
+ typedef /*signed*/ char int8_t;
+ typedef short int16_t;
+ typedef int int32_t;
+#endif
+ typedef unsigned char u_int8_t;
+ typedef unsigned short u_int16_t;
+ typedef unsigned int u_int32_t;
+
+#endif /* __BIT_TYPES_DEFINED__ */
diff --git a/usr/src/lib/libresolv2_joy/include/sys/cdefs.h b/usr/src/lib/libresolv2_joy/include/sys/cdefs.h
new file mode 100644
index 0000000000..67aac00cc7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sys/cdefs.h
@@ -0,0 +1,144 @@
+/*
+ * ++Copyright++ 1991, 1993
+ * -
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * -
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ * -
+ * --Copyright--
+ */
+
+/*
+ * @(#)cdefs.h 8.1 (Berkeley) 6/2/93
+ * $Id: cdefs.h,v 1.2 2004/07/19 05:54:07 marka Exp $
+ */
+
+#ifndef _CDEFS_H_
+#define _CDEFS_H_
+
+#if defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __END_DECLS };
+#else
+#define __BEGIN_DECLS
+#define __END_DECLS
+#endif
+
+/*
+ * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
+ * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
+ * The __CONCAT macro is a bit tricky -- make sure you don't put spaces
+ * in between its arguments. __CONCAT can also concatenate double-quoted
+ * strings produced by the __STRING macro, but this only works with ANSI C.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+#define __P(protos) protos /* full-blown ANSI C */
+#define __CONCAT(x,y) x ## y
+#define __STRING(x) #x
+
+#define __const const /* define reserved names to standard */
+#define __signed signed
+#define __volatile volatile
+#if defined(__cplusplus)
+#define __inline inline /* convert to C++ keyword */
+#else
+#ifndef __GNUC__
+#define __inline /* delete GCC keyword */
+#endif /* !__GNUC__ */
+#endif /* !__cplusplus */
+
+#else /* !(__STDC__ || __cplusplus) */
+#define __P(protos) () /* traditional C preprocessor */
+#define __CONCAT(x,y) x/**/y
+#define __STRING(x) "x"
+
+#ifndef __GNUC__
+#define __const /* delete pseudo-ANSI C keywords */
+#define __inline
+#define __signed
+#define __volatile
+/*
+ * In non-ANSI C environments, new programs will want ANSI-only C keywords
+ * deleted from the program and old programs will want them left alone.
+ * When using a compiler other than gcc, programs using the ANSI C keywords
+ * const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
+ * When using "gcc -traditional", we assume that this is the intent; if
+ * __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
+ */
+#ifndef NO_ANSI_KEYWORDS
+#define const /* delete ANSI C keywords */
+#define inline
+#define signed
+#define volatile
+#endif
+#endif /* !__GNUC__ */
+#endif /* !(__STDC__ || __cplusplus) */
+
+/*
+ * GCC1 and some versions of GCC2 declare dead (non-returning) and
+ * pure (no side effects) functions using "volatile" and "const";
+ * unfortunately, these then cause warnings under "-ansi -pedantic".
+ * GCC2 uses a new, peculiar __attribute__((attrs)) style. All of
+ * these work for GNU C++ (modulo a slight glitch in the C++ grammar
+ * in the distribution version of 2.5.5).
+ */
+#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+#define __attribute__(x) /* delete __attribute__ if non-gcc or gcc1 */
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#define __dead __volatile
+#define __pure __const
+#endif
+#endif
+
+/* Delete pseudo-keywords wherever they are not available or needed. */
+#ifndef __dead
+#define __dead
+#define __pure
+#endif
+
+#endif /* !_CDEFS_H_ */
diff --git a/usr/src/lib/libresolv2_joy/sparc/Makefile b/usr/src/lib/libresolv2_joy/sparc/Makefile
new file mode 100644
index 0000000000..a333224278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/sparc/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libresolv2_joy/sparcv9/Makefile b/usr/src/lib/libresolv2_joy/sparcv9/Makefile
new file mode 100644
index 0000000000..ceed393e0d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/sparcv9/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+# With the adition of BIND 8.3.3, the symbol table for 64 bit went over
+# the limit for Kpic, so we've added -KPIC here, for just the 64 bit
+# library. This avoids compiling the 32-bit library with PIC unnecessarily.
+
+sparcv9_C_PICFLAGS = -K PIC
+sparcv9_CC_PICFLAGS = -KPIC
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libscf/inc/libscf.h b/usr/src/lib/libscf/inc/libscf.h
index c00a59dc5d..1940308f92 100644
--- a/usr/src/lib/libscf/inc/libscf.h
+++ b/usr/src/lib/libscf/inc/libscf.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _LIBSCF_H
@@ -832,8 +833,13 @@ int smf_notify_del_params(const char *, const char *, int32_t);
/*
* SMF exit status definitions
+ *
+ * The SMF_EXIT_NODAEMON exit status should be used when a method does not
+ * need to run any persistent process. This indicates success, abandons the
+ * contract, and allows dependencies to be met.
*/
#define SMF_EXIT_OK 0
+#define SMF_EXIT_NODAEMON 94
#define SMF_EXIT_ERR_FATAL 95
#define SMF_EXIT_ERR_CONFIG 96
#define SMF_EXIT_MON_DEGRADE 97
diff --git a/usr/src/lib/libshare/nfs/libshare_nfs.c b/usr/src/lib/libshare/nfs/libshare_nfs.c
index 9e6124286d..e897596e72 100644
--- a/usr/src/lib/libshare/nfs/libshare_nfs.c
+++ b/usr/src/lib/libshare/nfs/libshare_nfs.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/*
@@ -2827,9 +2828,16 @@ nfs_init()
ret = initprotofromsmf();
if (ret != SA_OK) {
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS plugin problem with SMF repository: %s\n"),
- sa_errorstr(ret));
+ /*
+ * This is a workaround. See the comment in
+ * cmd/fs.d/nfs/lib/smfcfg.c for an explanation.
+ */
+ if (getzoneid() == GLOBAL_ZONEID ||
+ ret != SCF_ERROR_NOT_FOUND) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "NFS plugin problem with SMF repository: %s\n"),
+ sa_errorstr(ret));
+ }
ret = SA_OK;
}
add_defaults();
diff --git a/usr/src/lib/libshell/Makefile b/usr/src/lib/libshell/Makefile
index b584728d5f..1cce2316f0 100644
--- a/usr/src/lib/libshell/Makefile
+++ b/usr/src/lib/libshell/Makefile
@@ -64,6 +64,5 @@ $(SUBDIRS): FRC
FRC:
include Makefile.demo
-include Makefile.doc
include ../Makefile.targ
diff --git a/usr/src/lib/libshell/Makefile.doc b/usr/src/lib/libshell/Makefile.doc
deleted file mode 100644
index c822b0bf5c..0000000000
--- a/usr/src/lib/libshell/Makefile.doc
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-ROOTDOCDIRBASE= $(ROOT)/usr/share/doc/ksh
-
-DOCFILES= \
- RELEASE \
- README \
- TYPES \
- DESIGN \
- COMPATIBILITY \
- OBSOLETE \
- shell_styleguide.docbook \
- shell_styleguide.html \
- images/tag_bourne.png \
- images/tag_i18n.png \
- images/tag_ksh88.png \
- images/tag_ksh93.png \
- images/tag_ksh.png \
- images/tag_l10n.png \
- images/tag_perf.png \
- images/callouts/1.png \
- images/callouts/2.png \
- images/callouts/3.png \
- images/callouts/4.png \
- images/callouts/5.png \
- images/callouts/6.png \
- images/callouts/7.png \
- images/callouts/8.png \
- images/callouts/9.png \
- images/callouts/10.png
-
-# Documentation rules
-$(ROOTDOCDIRBASE)/%: common/%
- $(INS.file)
-
-$(ROOTDOCDIRBASE)/%: misc/%
- $(INS.file)
-
-ROOTDOCDIRS= \
- $(ROOTDOCDIRBASE) .WAIT \
- $(ROOTDOCDIRBASE)/images .WAIT \
- $(ROOTDOCDIRBASE)/images/callouts
-
-$(ROOTDOCDIRBASE)/%.html: misc/%.docbook
- /usr/bin/xsltproc \
- --nonet \
- --stringparam generate.section.toc.level 0 \
- --stringparam toc.max.depth 3 \
- --stringparam toc.section.depth 12 \
- --xinclude \
- -o "$(@F)" \
- $(DOCBOOK_XSL_ROOT)/html/docbook.xsl \
- "$<" >xsltproc.log 2>&1
- $(INS) -s -m $(FILEMODE) -f "$(@D)" "$(@F)"
- $(RM) "$(@F)"
-
-CLOBBERFILES += xsltproc.log
-
-# Generic documentation rules
-DOCFILESRCDIR= common
-ROOTDOCFILES= $(DOCFILES:%=$(ROOTDOCDIRBASE)/%)
-$(ROOTDOCDIRS) := OWNER = root
-$(ROOTDOCDIRS) := GROUP = bin
-$(ROOTDOCDIRS) := DIRMODE = 755
-
-$(ROOTDOCDIRS):
- $(INS.dir)
-
-install: $(ROOTDOCDIRS) .WAIT $(ROOTDOCFILES)
diff --git a/usr/src/lib/libshell/common/COMPATIBILITY b/usr/src/lib/libshell/common/COMPATIBILITY
deleted file mode 100644
index d4d645a99a..0000000000
--- a/usr/src/lib/libshell/common/COMPATIBILITY
+++ /dev/null
@@ -1,134 +0,0 @@
-
- KSH-93 VS. KSH-88
-
-
-The following is a list of known incompatibilities between ksh-93 and ksh-88.
-I have not include cases that are clearly bugs in ksh-88. I also have
-omitted features that are completely upward compatible.
-
-1. Functions, defined with name() with ksh-93 are compatible with
- the POSIX standard, not with ksh-88. No local variables are
- permitted, and there is no separate scope. Functions defined
- with the function name syntax, maintain compatibility.
- This also affects function traces.
-
-2. ! is now a reserved word. As a result, any command by that
- name will no longer work with ksh-93.
-
-3. The -x attribute of alias and typeset -f is no longer
- effective and the ENV file is only read for interactive
- shells. You need to use FPATH to make function definitions
- visible to scripts.
-
-4. A built-in command named command has been added which is
- always found before the PATH search. Any script which uses
- this name as the name of a command (or function) will not
- be compatible.
-
-5. The output format for some built-ins has changed. In particular
- the output format for set, typeset and alias now have single
- quotes around values that have special characters. The output
- for trap without arguments has a format that can be used as input.
-
-6. With ksh-88, a dollar sign ($') followed by a single quote was
- interpreted literally. Now it is an ANSI-C string. You
- must quote the dollar sign to get the previous behavior.
- Also, a $ in front of a " indicates that the string needs
- to be translated for locales other than C or POSIX. The $
- is ignored in the C and POSIX locale.
-
-7. With ksh-88, tilde expansion did not take place inside ${...}.
- with ksh-93, ${foo-~} will cause tilde expansion if foo is
- not set. You need to escape the ~ for the previous behavior.
-
-8. Some changes in the tokenizing rules where made that might
- cause some scripts with previously ambiguous use of quoting
- to produce syntax errors.
-
-9. Programs that rely on specific exit values for the shell,
- (rather than 0 or non-zero) may not be compatible. The
- exit status for many shell failures has been changed.
-
-10. Built-ins in ksh-88 were always executed before looking for
- the command in the PATH variable. This is no longer true.
- Thus, with ksh-93, if you have the current directory first
- in your PATH, and you have a program named test in your
- directory, it will be executed when you type test; the
- built-in version will be run at the point /bin is found
- in your PATH.
-
-11. Some undocumented combinations of argument passing to ksh
- builtins no longer works since ksh-93 is getopts conforming
- with respect to its built-ins. For example, typeset -8i
- previously would work as a synonym for typeset -i8.
-
-12. Command substitution and arithmetic expansion are now performed
- on PS1, PS3, and ENV when they are expanded. Thus, ` and $(
- as part of the value of these variables must be preceded by a \
- to preserve their previous behavior.
-
-13. The ERRNO variable has been dropped.
-
-14. If the file name following a redirection symbol contain pattern
- characters they will only be expanded for interactive shells.
-
-15. The arguments to a dot script will be restored when it completes.
-
-16. The list of tracked aliases is not displayed with alias unless
- the -t option is specified.
-
-17. The POSIX standard requires that test "$arg" have exit status
- of 0, if and only if $arg is null. However, since this breaks
- programs that use test -t, ksh93 treats an explicit test -t
- as if the user had entered test -t 1.
-
-18. The ^T directive of emacs mode has been changed to work the
- way it does in gnu-emacs.
-
-19. ksh-88 allowed unbalanced parenthes within ${name op val} whereas
- ksh-93 does not. Thus, ${foo-(} needs to be written as ${foo-\(}
- which works with both versions.
-
-20. kill -l in ksh-93 lists only the signal names, not their numerical
- values.
-
-21. Local variables defined by typeset are statically scoped in
- ksh93. In ksh88 they were dynamically scoped although this
- behavior was never documented.
-
-22. The value of the variable given to getopts is set to ? when
- the end-of-options is reached to conform to the POSIX standard.
-
-23. Since the POSIX standard requires that octal constants be
- recongnized, doing arithmetic on typeset -Z variables can
- yield different results that with ksh88. Most of these
- differences were eliminated in ksh93o.
-
-24. Starting after ksh93l, If you run ksh name, where name does
- not contain a /, the current directory will be searched
- before doing a path search on name as required by the POSIX
- shell standard.
-
-25. In ksh93, cd - will output the directory that it changes
- to on standard output as required by X/Open. With ksh88,
- this only happened for interactive shells.
-
-26. As an undocumented feature of ksh-88, a leading 0 to an
- assignment of an integer variable caused that variable
- to be treated as unsigned. This behavior was removed
- starting in ksh93p.
-
-27. The getopts builtin in ksh93 requires that optstring contain
- a leading + to allow options to begin with a +.
-
-28. In emacs/gmacs mode, control-v will not display the version when
- the stty lnext character is set to control-v or is unset.
- The sequence escape control-v will display the shell version.
-
-29. In ksh88, DEBUG traps were executed. after each command. In ksh93
- DEBUG traps are exeucted before each command.
-
-30. In ksh88, a redirection to a file name given by an empty string was
- ignored. In ksh93, this is an error.
-I am interested in expanding this list so please let me know if you
-uncover any others.
diff --git a/usr/src/lib/libshell/common/DESIGN b/usr/src/lib/libshell/common/DESIGN
deleted file mode 100644
index c11c0aff1e..0000000000
--- a/usr/src/lib/libshell/common/DESIGN
+++ /dev/null
@@ -1,170 +0,0 @@
-Here is an overview of the source code organization for ksh93.
-
-Directory layout:
-
- The directory include contains header files for ksh93.
- The files nval.h and shell.h are intended to be public
- headers and can be used to add runtime builtin command.
- The remainder are private.
-
- The directory data contains readonly data files for ksh93.
- The man pages for built-ins are in builtins.c rather
- than included as statics with the implementations in the
- bltins directory because some systems don't make static const
- data readonly and we want these to be shared by all running
- shells.
-
- The directory edit contains the code for command line
- editing and history.
-
- The fun directory contains some shell function such as
- pushd, popd, and dirs.
-
- The directory features contains files that are used to generate
- header files in the FEATURE directory. Most of these files
- are in a format that is processed by iffe.
-
- The directory bltins contains code for most of the built-in
- commands. Additional built-in commands are part of libcmd.
-
- The directory sh contains most of the code for ksh93.
-
- The directory tests contains a number of regression tests.
- In most cases, when a bug gets fixed, a test is added to
- one of these files. The regression tests can be run by
- going to this directory and running
- SHELL=shell_path shell_path shtests
- where shell_path is an absolute pathname for the shell to
- be tested.
-
- The top level directory contains the nmake Makefile, a README,
- and several documentation files. The RELEASE file contains
- the list of bug fixes and new features since the original
- ksh93 release. The file COMPATIBILITY is a list of all
- known incompatibilities with ksh88.
-
- The bash_pre_rc.sh is a startup script used when emulating
- bash if the shell is compiled with SHOPT_BASH and the shell
- is invoked as bash. The bash emulation is not complete.
-
-Include directory:
- 1. argnod.h contains the type definitions for command
- nodes, io nodes, argument nodes, and for positional
- parameters.a It defines the prototypes for
- all the positional parameters functions.
- 2. builtins.h contains prototypes for builtins as well
- as symbolic constants that refer to the name-pairs
- that are associated with some of the built-ins.
- It also contains prototypes for many of the strings.
- 3. defs.h is the catch all for all definitions that
- don't fit elsewhere and it includes several other
- headers. It defines a strucuture that contains ksh
- global data, sh, and a structure that contains per
- function data, sh.st.
- 4. edit.h contains definitions that are common to both
- vi and emacs edit modes.
- 5. env.h contains interfaces for creating and modifying
- environment variables.
- 6. fault.h contains prototypes for signal related
- functions and trap and fault handling.
- 7. fcin.h contains macro and function definitions for
- reading from a file or string.
- 8. history.h contains macros and functions definitions
- related to history file processing.
- 9. jobs.h contains the definitions relating to job
- processing and control.
- 10. lexstates.h contains the states associated with
- lexical processing.
- 11. name.h contains the internal definitions related
- to name-value pair processing.
- 12. national.h contains a few I18N definitions, mostly
- obsolete.
- 13. nval.h is the public interface to the name-value
- pair library that is documented with nval.3.
- 14. path.h contains the interface for pathname processing
- and pathname searching.
- 15. shell.h is the public interface for shell functions
- that are documented int shell.3.
- 16. shlex.h contains the lexical token definitions and
- interfaces for lexical analysis.
- 17. shnodes.h contains the definition of the structures
- for each of the parse nodes and flags for the attributes.
- 18. shtable.h contains some interfaces and functions for
- table lookup.
- 19. streval.h contains the interface to the arithmetic
- functions.
- 20. terminal.h is a header file that includes the appropriate
- terminal include.
- 21. test.h contains the definitions for the test and [[...]]
- commands.
- 22. timeout.h contains the define constant for the maximum
- shell timeout.
- 23. ulimit.h includes the appropriate resource header.
- 24. variables.h contains symbolic constants for the built-in
- shell variables.
-
-sh directory:
- 1. args.c contains functions for parsing shell options
- and for processing positional parameters.
- 2. arith.c contains callback functions for the streval.c
- library and the interface to shell arithmetic.
- 3. array.c contains the code for indexed and associative
- arrays.
- 4. bash.h contains code used when compiling with SHOPT_BASH
- to add bash specific features such as shopt.
- 5. defs.c contains the data definitions for global symbols.
- 6. deparse.c contains code to generate shell script from
- a parse tree.
- 7. env.c contains code to add and delete environment variables
- to an environment list.
- 8. expand.c contains code for file name expansion and
- file name generation.
- 9. fault.c contains code for signal processing, trap
- handling and termination.
- 10. fcin.c contains code for reading and writing a character
- at a time from a file or string.
- 11. init.c contains initialization code and callbacks
- for get and set functions for built-in variables.
- 12. io.o contains code for redirections and managing file
- descriptors and file streams.
- 13. jobs.c contains the code for job management.
- 14. lex.c contains the code for the lexical analyzer.
- 15. macro.c contains code for the $ macro expansions, including
- here-documents.
- 16. main.c contains the calls to initialization, profile
- processing and the main evaluation loop as well as
- mail processing.
- 17. name.c contains the name-value pair routines that are
- built on the hash library in libast.
- 18. nvdisc.c contains code related to name-value pair disciplines.
- 19. nvtree.c contains code for compound variables and for
- walking the namespace.
- 20. nvtype.c contains most of the code related to types that
- are created with typeset -T.
- 21. parse.c contains the code for the shell parser.
- 22. path.c contains the code for pathname lookup and
- some path functions. It also contains the code
- that executes commands and scripts.
- 23. pmain.c is just a calls sh_main() so that all of the
- rest of the shell can be in a shared library.
- 24. shcomp.c contains the main program to the shell
- compiler. This program parses a script and creates
- a file that the shell can read containing the parse tree.
- 25. streval.c is an C arithmetic evaluator.
- 26. string.c contains some string related functions.
- 27. subshell.c contains the code to save and restore
- environments so that subshells can run without creating
- a new process.
- 28. suid_exec.c contains the program from running execute
- only and/or setuid/setgid scripts.
- 29. tdump.c contains the code to dump a parse tree into
- a file.
- 30. timers.c contains code for multiple event timeouts.
- 31. trestore contians the code for restoring the parse
- tree from the file created by tdump.
- 32. userinit.c contains a dummy userinit() function.
- This is now obsolete with the new version of sh_main().
- 33. waitevent.c contains the sh_waitnotify function so
- that builtins can handle processing events when the
- shell is waiting for input or for process completion.
- 34. xec.c is the main shell executuion loop.
diff --git a/usr/src/lib/libshell/common/OBSOLETE b/usr/src/lib/libshell/common/OBSOLETE
deleted file mode 100644
index c29cb8bb63..0000000000
--- a/usr/src/lib/libshell/common/OBSOLETE
+++ /dev/null
@@ -1,152 +0,0 @@
-.sp 3
-.tl ''Ksh Features That Are Obsolete in Ksh93''
-.sp 2
-.AL 1
-.LI
-Using a pair of grave accents \^\fB\(ga\fR ... \fB\(ga\fR\^
-for command substition. Use \fB$(\fR ... \fB)\fR instead.
-.LI
-.B FCEDIT
-is an obsolete name for
-the default editor name for the
-.B hist
-command.
-.B FCEDIT
-is not used when
-.B HISTEDIT
-is set. Use
-.B HISTEDIT
-instead.
-.LI
-The newtest (\fB[[\fR ... \fB]]\fR) operator
-\fB\-a\fP \fIfile\fP
-is obsolete. Use
-\fB\-e\fP instead.
-.LI
-The newtest (\fB[[\fR ... \fB]]\fR) operator
-.BR = ,
-as used in
-\fIstring\fP \fB=\fP \fIpattern\fP
-is obsolete. Use
-\fB==\fP instead.
-.LI
-The following obsolete arithmetic comparisons are also permitted:
-.in +5
-.VL 20
-.LI "\fIexp1\fP \fB\-eq\fP \fIexp2\fP"
-True, if
-.I exp1
-is equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-ne\fP \fIexp2\fP"
-True, if
-.I exp1
-is not equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-lt\fP \fIexp2\fP"
-True, if
-.I exp1
-is less than
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-gt\fP \fIexp2\fP"
-True, if
-.I exp1
-is greater than
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-le\fP \fIexp2\fP"
-True, if
-.I exp1
-is less than or equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-ge\fP \fIexp2\fP"
-True, if
-.I exp1
-is greater than or equal to
-.IR exp2 .
-.LE \" End .VL
-.in -5
-.LI
-Using test -t or [ -t ] without specifying the file unit number.
-.LI
-The
-.B \-k
-option to the \fBset\fR builtin is obsolete. It causes
-.I all\^
-variable assignment arguments are placed in the environment,
-even if they occur after the command name.
-The following
-first prints
-.B "a=b c"
-and then
-.BR c :
-There is no alternative.
-.LI
-The obsolete
-.B \-xf
-option of the
-.B typeset
-command allows a function to be exported
-to scripts that are executed without a separate
-invocation of the shell.
-Functions that need to be defined across separate
-invocations of the shell should
-be placed in a directory and the
-.B FPATH
-variable should contains the name of this directory.
-They may also
-be specified in the
-.B ENV
-file with the
-.B \-xf
-option of
-.BR typeset .
-.LI
-The shell environment variable
-.B FCEDIT
-is obsolete. Use
-.B HISTEDIT
-instead.
-.LI
-In the
-.B \-s
-option
-(to \fBfc\fR or \fBhist\fR command???)
-(
-and in obsolete versions, the editor name
-.B \-
-)
-is used to skip the editing phase and
-to re-execute the command.
-.LI
-The
-.B \-t
-option to \fBalias\fR builtin is is obsolete. It
-is used to set and list tracked aliases.
-There is no replacement.
-.LI
-The shell command line option
-.B \-t
-is obsolete. This option cause the shell to exit after reading
-and executing one command. The is no replacement (although ending
-\&"command" with the exit builtin should have the same effect).
-.LI
-As an obsolete feature of the "set" builtin,
-if the first
-.I arg\^
-is
-.B \-
-then the
-.B \-x
-and
-.B \-v
-options are turned off and the next
-.I arg
-is treated as the first argument.
-Using
-.B \+
-rather than
-.B \-
-causes these options to be turned off.
-These options can also be used upon invocation of the shell.
-.LE
-
diff --git a/usr/src/lib/libshell/common/README b/usr/src/lib/libshell/common/README
deleted file mode 100644
index 1feeec8a90..0000000000
--- a/usr/src/lib/libshell/common/README
+++ /dev/null
@@ -1,232 +0,0 @@
-This directory, and its subdirectories contain the source code
-for ksh-93; the language described in the second addition of
-the book, "The KornShell Command and Programming Language," by
-Morris Bolsky and David Korn which is published by Prentice Hall.
-ksh-93 has been compiled and run on several machines with several
-operating systems. The end of this file contains a partial list of
-operating systems and machines that ksh-93 has been known to run on.
-
-The layout of files for ksh-93 has changed somewhat since ksh-88,
-the last major release. Most of the source code for ksh remains in
-the sh directory. However, the shell editing and history routines
-are in the edit sub-directory. The code for shell built-ins is
-in the bltins directory. The data directory contains read-only
-data tables and messages that are used by the shell. The include
-files remain in the include directory and the shlib directory
-is gone. The features directory replaces the older install
-directory. The method for generating systems specific feature
-information has changed substantially.
-
-The Makefile file contains several compilation options that can be set
-before compiling ksh. Options are of the form SHOPT_option and become
-#define inside the code. These options are set to their recommended
-value and some of these may disappear as options in future releases.
-A value of 0, or no value represents off, 1 represents on.
-Note that == is needed, not =, because these are nmake state variables
-and changing their value will cause all modules that could be affected
-by this change to be recompiled.
-The options have the following defaults and meanings:
- ACCT off Shell accounting.
- ACCTFILE off Enable per user accounting info.
- AUDIT off For auditing specific users
- AUDITFILE "/etc/ksh_audit"
- APPEND on Allows var+=val string and array append.
- BASH off Bash compatibility mode. It is not fully implemented
- and is experimental.
- BRACEPAT on C-shell type abc{d,e}f style file generation
- CMDLIB_BLTIN off Makes all commands in libcmd.a builtins. The
- SH_CMDLIB_DIR nmake state variable can be used to
- specify a directory.
- CMDLIB_DIR off Sets CMDLIB_BLTIN=1 and provides a default value
- of "/opt/ast/bin" for SH_CMDLIB_DIR.
- COMPOUND_ARRAY
- on Allows all components of compound variables except the
- first to be any string by enclosing in [...]. It also
- allows components other than the last to be arrays.
- This is experimental and only partially complete.
- CRNL off <cr><nl> treated as <nl> in shell grammar.
- DYNAMIC on Dynamic loading of builtins. (Requires dlopen() interface.)
- ECHOPRINT off Make echo equivalent to print.
- ESH on Compile with emacs command line editing. The original
- emacs line editor code was provided by Mike Veach at IH.
- FILESCAN on Experimental option that allows fast reading of files
- using while < file;do ...; done and allowing fields in
- each line to be accessed as positional parameters.
- FS_3D off For use with 3-D file system. Enabled automatically for
- sytems with dynamic linking.
- KIA off Allow generation of shell cross reference database with -I.
- MULTIBYTE on Multibyte character handling. Requires mblen() and
- mbctowc().
- NAMESPACE on Allows namespaces. This is experimental, incomplete
- and undocumented.
- OLDTERMIO off Use either termios or termio at runtime.
- OO on Experimental object oriented extension. This option
- should disappear soon.
- OPTIMIZE on Optimize loop invariants for with for and while loops.
- P_SUID off If set, all real uids, greater than or equal to this
- value will require the -p flag to run suid/sgid scripts.
- PFSH off Compile with support for profile shell.
- RAWONLY off Turn on if the vi line mode doesn't work right unless
- you do a set -o viraw.
- SEVENBIT off Strip the eigth bit from characters.
- SPAWN off Use spawn as combined fork/exec. May improve speed on
- some systems.
- STATS on Add .sh.stats compound variable.
- SUID_EXEC on Execute /etc/suid_exec for setuid, setgid script.
- TIMEOUT off Set this to the number of seconds for timing out and
- exiting the shell when you don't enter a command. If
- non-zero, TMOUT can not be set larger than this value.
- TYPEDEF on Enable typeset type definitions.
- VSH on Compile with vi command line editing. The original vi
- line editor code was provided by Pat Sullivan at CB.
-
-The following compile options are set automatically by the feature testing:
- DEVFD Set when /dev/fd is a directory that names open files.
- SHELLMAGIC
- Set on systems that recognize script beginning with #! specially.
- VPIX Set on systems the have /usr/bin/vpix program for running MS-DOS.
-
-
-In most instances, you will generate ksh from a higher level directory
-which also generates libcmd and libast libraries on which ksh depends.
-However, it is possible to generate ksh, with by running make -f ksh.mk
-in this directory. The ksh.mk file was generated from the nmake Makefile.
-If you do not have make or nmake, but do have a Version 7 UNIX compatible
-shell, then you can run the script mamexec < Mamfile to build ksh.
-If you have nmake, version 2.3 or later, you can use it without the -f ksh.mk.
-In either case, ksh relies on libraries libast and libcmd which must be
-built first. The binary for ksh becomes the file named ./ksh which can
-be copied to where ever you install it.
-
-If you use old make or the Mamfile, and you system has dynamic shared
-libraries, then you should define the variables mam_cc_static and
-mam_cc_dynanamic as the compiler options that request static linking
-and dynamic linking respectively. This will decrease the number of
-shared libraries that ksh need and cut startup time substantially.
-
-The makefile should also generate shcomp, a program that will precompile
-a script. ksh93 is able to recognize files in this format and process
-them as scripts. You can use shcomp to send out scripts when you
-don't want to give away the original script source.
-
-It is advisable that you put the line PWD=$HOME;export PWD into the
-/etc/profile file to reduce initialization time for ksh.
-
-To be able to run setuid/setgid shell scripts, or scripts without read
-permission, the SUID_EXEC compile option must be on, and ksh must be installed
-in the /bin directory, the /usr/bin directory, the /usr/lbin directory,
-or the /usr/local/bin directory and the name must end in sh. The program
-suid_exec must be installed in the /etc directory, must be owned by root,
-and must be a suid program. If you must install ksh in some other directory
-and want to be able to run setuid/setgid and execute only scripts, then
-you will have to change the source code file sh/suid_exec.c explicitly.
-If you do not have ksh in one of these secure locations, /bin/sh will
-be invoked with the -p options and will fail when you execute a setuid/setgid
-and/or execute only script. Note, that ksh does not read the .profile
-or $ENV file when it the real and effective user/group id's are not
-equal.
-
-The tests sub-directory contains a number of regression tests for ksh.
-To run all these tests with the shell you just built, go to the tests
-directory and run the command
- SHELL=$dir/ksh $dir/ksh shtests
-where dir is the directory of the ksh you want to test.
-
-The file PROMO.mm is an advertisement that extolls the virtues of ksh.
-The file sh.1 contains the troff (man) description of this Shell.
-The file nval.3 contains the troff (man) description of the name-value
-pair library that is needed for writing built-ins that need to
-access shell variables.
-
-The file sh.memo contains a draft troff (mm) memo describing ksh. The
-file RELEASE88 contains the changes made for ksh88. The file RELEASE93
-contains the changes made in this release since ksh-88. The file
-RELEASE contains bug fixes made in this release since ksh-88. The file
-COMPATIBILITY contains a list of incompatibilities with ksh-88. The
-file bltins.mm is a draft troff (mm) memo describing how to write
-built-in commands that can be loaded at run time.
-
-Most of the work for internationalization has been done with ksh93.
-The file ksh.msg is a generated file that contains error messages
-that need to be translated. In addition, the function translate()
-in sh/init.c has to be completed to interface with the dictionary
-lookup. The translate function takes two argument, the string
-that is to be translated and a type which is
- 0 when a library string needs translation.
- 1 when one of the error messages in ksh.msg needs translation.
- 2 when a string in a script needs translation. You use a $ in front
- of a double quoted string in a script to indicate that it
- needs translation. The -D option for ksh builds the dictionary.
-The translate routine needs to return the translated message.
-For dictionaries that need to use a numeric key, it should be
-possible to use the strhash() function to generate numbers to
-go along with each of the messages and to use this number both
-when generating the dictionary and when converting strings.
-If you encounter error messages of type 1 that are not be translated via
-this translate() function send mail to the address below.
-
-Please report any problems or suggestions to:
-
-dgk@research.att.com
-
-
-ksh93 has been compiled and alpha tested on the following. An asterisk
-signifies that ksh has been installed as /bin/sh on this machine.
-
-* Sun OS 4.1.[123] on sparc.
- Sun OS 4.1.1 on sun.
- Solaris 2.[1-9] on sparc.
- Solaris 2.[4-8] on X86.
- HP/UX 8 on HP-9000/730.
- HP/UX 9 on HP-9000/730.
- HP/UX 10 on HP-9000/857.
- HP/UX 11 on pa-risc.
- System V Release 3 on Counterpoint C19
- System V Release 4 on AT&T Intel 486.
- System V Release 4 on NCR 4850 Intel 486.
- IRIX Release 4.0.? System V on SGI-MIPS.
- IRIX Release 5.1 System V on SGI-MIPS.
- IRIX Release 6.[1-5] System V on SGI-MIPS.
- System V Release 3.2 on 3B2.
- UTS 5.2.6 on Amdahl 3090,5990,580.
- System V Release 3.2 on i386.
- SMP_DC.OSx olivetti dcosx MIServer-S 2/128.
- SMP_DC.OSx Pyramid dcosx MIServer-S 2/160 r3000.
- 4.3BSD on Vax 8650.
- AIX release 2 on RS6000.
- AIX 3.2 on RS6000.
- Linux 1.X on Intel
- Linux 2.X on Intel
- Linux 2.X on Alpha
- Linux 2.X on Alpha
- Linux 2.X on OS/390
- Linux 2.X on sparc
- Linux 2.4 on intel itanium 64
- Linux Slackware on sparc64
-* Linux ARM on i-PAQ
- OSF1 on DEC alpha.
- OSF4 on DEC alpha.
- UMIPS 4.52 on mips.
- BSD-i [2-4] on X86.
- OpenBSD on X86
- NetBSD on X86
- FreeBSD on X86
- NeXT on Intel X86.
- NeXT on HP.
-* Windows NT using UWIN on X86
-* Windows NT using UWIN on alpha
- Windows NT using Cygwin on X86
- Windows NT with NutCracker libraries.
- Windows NT with Portage libraries.
- Windows 3.1 using custom C library.
- OpenEdition on MVS
- Darwin OS X on PPC
- MVS on OS 390
- SCO Openserver 3.2 on X86
- Unixware 7 on X86
-
-Good luck!!
-
-David Korn
-dgk@research.att.com
-
diff --git a/usr/src/lib/libshell/common/RELEASE b/usr/src/lib/libshell/common/RELEASE
deleted file mode 100644
index b8fc92511a..0000000000
--- a/usr/src/lib/libshell/common/RELEASE
+++ /dev/null
@@ -1,2204 +0,0 @@
-10-03-05 --- Release ksh93t+ ---
-10-03-05 A varibale unset memory leak has been fixed and tests/leaks.sh
- has been added to verify the fix.
-10-03-04 Documentation, comment, and disgnostic spelling typos corrected.
-10-02-14 Fix sh_getenv() initialization to cooperate with the 3d fs.
-10-02-12 A bug in which the get discipline function was not invoked for
- associative array subscripts for unset array elements has been fixed.
-10-02-12 A bug which could occur if the last line of a script was an eval
- that executed multiple commands has been fixed.
-10-02-02 A buffer overflow in read and another in binary type base64
- encoding were fixed.
-10-01-20 A bug in the evaluation of arithmetic expression in which the
- subscript was evaluated twice for $((foo[x++]++)) has been fixed.
-10-01-19 A workaround for a double-free of a trap in both a subshell and its
- parent has been added.
-10-01-18 A bug in type handling of typeset -H has been fixed.
-10-01-15 The "adding empty subscript" warning now only emitted with -x set.
-10-01-01 A bug in the parser in which '$((case i in i):;esac);:))' was not
- parsed correctly was fixed.
-10-01-01 A bug in the parser in which '$(( 2 , 3.6 ))' dumped core for locales
- with radix char , and thousands separator . has been fixed.
-09-12-28 A bug in the handling of SIGCLD on systems that generated SIGCLD
- while blocked waiting for process to complete has been fixed.
-09-12-24 ast setlocale() reworked to differentiate env var changes from user
- override.
-09-12-18 A bug with the SHOPT_BGX option set which disabled traps for signals
- < SIGCHLD when a trap for a signal > SIGCHLD was set has been fixed.
-09-12-18 A bug where [[ -v var ]] was incorrect for some variables (including
- LC_* vars) has been fixed.
-09-12-15 A bug that produced a syntax error when a multibyte character
- straddled a buffer boundary has been fixed.
-09-12-11 A bug where the subscript of an unset variable was not evaluated has
- been fixed.
-09-12-09 A bug where shcomp dumped core on certain syntax errors has been fixed.
-09-12-07 A bug where a parent shell environment var reset in a subshell removed
- the value in subsequent children of the parent shell has been fixed.
-09-12-04 A bug in which in some cases a trap in a function executed in
- a subshell could trigger twice has been fixed.
-09-12-03 A bug in which SHLVL exported with some attributes could cause
- the shell to abort at startup has been fixed.
-09-12-02 A bug with pipefail in which the shell could hang waiting for the
- writer to complete before the last reader command has been fixed.
-09-11-30 A bug in which a trap could be inherited by the first element of
- a pipeline when the command had more than 63 arguments that did
- not contain any macro expansions has been fixed.
-09-11-19 When read from a terminal was called from with a while or foo loop,
- and an edit mode was on, a backspace or erase no longer will
- overwrite the prompt.
-09-11-17 Change .paths parse to handle BUILTIN_LIB=foo BUILTIN_LIB=foo-1.2.
-09-11-17 Inside a function, typeset foo.bar will bind foo to global variable
- foo if local variable foo does not exist, instead of creating a
- local variable.
-09-11-17 "read -n1" from the terminal has been fixed to read exactly one character.
-09-11-11 Job control now works for subshell commands, (...).
-09-11-11 If set -e is on for an interactive shell errors in special builtins
- now cause the shell to exit.
-09-11-11 A bug in which an interrupt handler processed during the read builtin
- when IFS did not contain a new line has been fixed.
-09-11-09 A bug in which a variable that has been unset in a subshell and then
- exported from that subshell does not show up in the environment
- has been fixed.
-09-11-02 ``,2'' is now a valid numeric constant for locales with
- decimal_point=','.
-09-11-02 A bug where "return" in .profile did not restore the shell state
- has been fixed.
-09-10-31 A bug that corrupted saved exit status when pids wrapped around has
- been fixed.
-09-10-26 A bug in { LANG LC_ALL LC_category } ordering has been fixed in -last.
-09-10-16 A bug where notification to libast that the environment has changed
- has been fixed.
-09-10-12 A bug in which a function loaded in a subshell could leave side
- effects in the parent shell has been fixed.
-09-10-12 A bug in converting a printf %d operand to a number when the operand
- contains multiple subscripts for the same variable has been fixed.
-09-10-09 A bug in the handling of the escape character \ in directory prefixes
- in command completion has been fixed.
-09-10-09 $PATH processing has been changed to delay dir stat() and .paths
- lookup until the directory is needed in the path search.
-09-09-28 Call the ast setlocale() intercept on unset too.
-09-09-24 A bug in which LANG=foo; LC_ALL=foo; unset LC_ALL; did not revert
- LC_CTYPE etc. to the LANG value has been fixed.
-09-09-17 A bug in which unsetting SVLVL could cause a script invoked by
- name without #! to core dump has been fixed.
-09-09-16 A bug in which a pipeline in a here-document could hang when the
- pipefail option was on has been fixed.
-09-09-09 A bug in the processing of line joining in here documents which
- occurred when a buffer began with <escape><new-line> has been fixed.
-09-09-09 A leading ; with commands in a brace group or parenthesis group
- no longer causes an error. It now is used for the "showme" option.
-09-09-09 A bug in which a subshell containing a background process could
- block until the background process completed has been fixed.
-09-09-04 A bug in handing ${var[sub]}, where var is a nameref has been fixed.
-09-09-03 A bug which caused an index array to have the wrong number of elements
- when it was converted from a compound variable by adding an another
- element has been fixed.
-09-09-03 Specifying export for a compound variable now generates an error.
-09-09-02 $"..." localizations strings are no longer recognized inside `...`.
-09-09-01 A bug in the for loop optimizer in the handling of type static
- variables has been fixed.
-09-09-01 An error message is not displayed when * and @ are used as subscripts.
-09-09-01 Several bugs in the processing for types that included an associative
- array of another type has been fixed.
-09-09-01 A bug in the tracing of [[ a < b ]] and [[ a > b ]] has been fixed.
-09-08-26 The .sh.file variable was not being set for a script that was run
- by name and didn't start with #! and this has been fixed.
-09-08-25 A bug in which a function called to deeply from command substitution
- did not display an error message has been fixed.
-09-08-24 When processing profiles, ksh93 now violates the POSIX standard and
- treats &> as a redirection operator similar to bash.
-09-08-23 A bug in the handling of the trap on SIGPIPE that could lead to a
- memory fault has been fixed.
-09-08-21 A bug in the handling of the comma operator in arithmetic expressions
- that could cause a core dump on some systems has been fixed.
-09-08-20 A bug in which a compound variable containing an array of a type
- that doesn't have any elements now expands correctly.
-09-08-19 A bug which disabled function tracing inside a function after
- a call to another function has been fixed.
-09-08-19 A bug in which initializing a compound variable instance to another
- compound variable by name has been fixed.
-09-08-18 A bug in which compound variable instances could be lost after
- an instance that invoked a type method discipline has been fixed.
-09-08-18 A bug in which a discipline function for a type applied to an
- array instance when invoked in a function ignored the subscript
- has been fixed.
-09-08-18 A scoping error with variables in arithmetic expression with
- type variables when reference with a name reference has been fixed.
-09-08-10 Several memory leaks were fixed primarily related to subshells.
-09-08-06 A bug in which setting the trap on CHLD to ignore could cause
- a script to hang has been fixed.
-09-07-08 A bug in the processing of name reference assignments when it
- contained pattern expansions with quoting has been fixed.
-09-06-22 The default width for typeset -X has been changed so that there
- should be no loss of precision when converting to a string.
-09-06-19 A bug in the printing of array elements for binary variables with
- printf %B has been fixed.
-09-06-19 A bug which caused a core dump with trap DEBUG set with an array
- assignment with no elements has been fixed.
-09-06-19 A bug with read with typeset -b -Z<num> has been fixed.
-09-06-19 Two bugs related to read -b for array variables has been fixed.
-09-06-19 A bug with typeset for compound variables containing arrays of
- compound variables has been fixed.
-09-06-18 A bug in appending a compound variable to a an indexed array of
- compound variables has been fixed.
-09-06-18 A bug which occurs when appending a compound variable to an indexed
- array element has been fixed.
-09-06-18 Setting VISUAL to a value other than one ending in vi or emacs will
- no longer unset the edit mode.
-09-06-17 A bug in typeset -m when moving a local compound variable to a
- global compound variable via a name reference has been fixed.
-09-06-17 A bug in appending to nodes of an array of compound variables when
- addressing them via nameref has been fixed.
-09-06-17 A bug in typeset -p var, when var is an array of compound variables
- in which the output only contained on array element has been fixed.
-09-06-17 The prefix expansion ${!y.@} now works when y is a name
- reference to an element of an array.
-09-06-16 Traps on signals that are ignored when the shell is invoked
- no longer display. Previously they were ignored as required but
- would be listed with trap -p.
-09-06-12 A bug in vi edit mode in which hitting the up arrow key at the
- end of a line longer than 40 characters which caused a core dump
- has been fixed.
-09-06-11 A bug in which "eval non-builtin &" would create two processes,
- one for the & and another for non-builtin has been fixed.
-09-06-08 When var is an identifier and is unset, ${var} no longer tries to
- run command substitution on the command var.
-09-06-08 Process substitution arguments of the form <(command) can now be
- used following the < redirection operator to redirect from command.
-09-05-13 A bug in which redirections of the form 2>&1 1>&5 inside command
- substitution could cause the command substitution to hang has been
- fixed.
-09-05-12 To conform with POSIX, the -u option only checks for unset variables
- and subscript elements rather than checking for all parameters.
-09-05-12 A bug which could cause a core dump when a variable whose name
- begins with a . was referenced as part of a name reference inside
- a function has been fixed.
-09-05-01 A bug that caused a core dump when SIGWINCH was received and
- both vi and emacs mode were off has been fixed.
-09-04-22 Default alias compound='typeset -C' added.
-09-04-15 A bug that caused ${...;} to hang for large files has been fixed.
-09-04-08 A change was made in the -n option which printed out an incorrect
- warning with <>.
-09-04-07 The emacs edit command M-_ and M_. and the vi command _ was fixed
- to handle the case there there is no history file.
-09-04-05 A bug in handling new-lines with read -n has been fixed.
-09-04-05 The ENV variable defaults the the file named by $HOME/.kshrc rather
- then to the string $HOME/.kshrc.
-09-03-31 A bug in which a nested command substitution with redirections could
- leave a file descriptor open has been fixed.
-09-03-24 ksh now only uses the value of the _ variable on startup if it can
- verify that it was set by the invoking process rather than being
- inherited by some other ancestor.
-09-03-24 When ksh is invoked without -p and ruid!=euid, and the shell is
- compiled without SHOPT_P_UID or ruid<SHOPT_P_UID, the shell now
- enables the -p option. The previous version instead set the
- euid to the ruid as it does for set +p.
-09-03-24 When SHOPT_P_UID is defined at compile time and the shell is started
- without -p and ruid!=euid and ruid>=SHOPT_P_UID then euid is set
- to ruid. A bug that did the wrong test (ruid<SHOPT_P_UID) was fixed.
-09-03-17 The sleep(1) builtin now accept and ISO 8601 PnYnMnDTnHnMnS
- duration or date(1) compatible date/time operand.
-09-03-10 If a variable that was left or right justified or zero-filled was
- changed with a typeset statement that was left or right justified
- or zero-filled, then the original justification no longer affects
- the result.
-09-03-10 A bug in the handling of traps when the last command in a script
- is a subshell grouping command has been fixed.
-09-03-03 A bug in which an expansion of the form ${!prefix@} could generate
- an exception after the return from a function has been fixed.
-09-02-02 A bug in restricted mode in which the value of ENV could be
- changed from within a function has been fixed.
-09-02-02 A bug in which an erroneous message indicating that a process
- terminated with a coredump has been fixed.
-09-02-02 The exit status when exit was called without an argument from
- a signal handler was incorrect and has been fixed.
-09-02-02 A bug in which a function autoloaded in a subshell could cause
- a core dump when the subshell completed has been fixed.
-09-02-02 A bug in which 2>&1 inside a command substitution wasn't working
- correctly has been fixed.
-09-02-02 A bug in the call stack of arithmetic function with 2 args
- returning int has been fixed.
-09-01-30 A bug in which 'eval print \$0' inside a function was giving the
- wrong value for $0 has been fixed.
-09-01-28 A bug in which a command substitution could return an exit status
- of 127 when the pipefail option is enabled has been fixed.
-09-01-26 ksh93 now generates an error message if you attempt to create
- a global name reference to a local variable.
-09-01-26 The [[ -v var ]] operator was modified to test for array elements.
-09-01-23 The redirection operator <>; was added. It is similar to <>
- except that if the command it is applied to succeeds, the file
- is truncated to the offset at the command completion.
-09-01-23 The default file descriptor for <> was changed to 1.
-09-01-20 A bug in which the exit status specified in an exit trap was
- not used when a process terminated with a signal has been fixed.
-09-01-19 A bug in which a signal whose default action is to terminate
- a process could be ignored when the process is running a sub-shell
- has been fixed.
-09-01-19 A bug in which sending SIGWINCH to a process that reads from a pipe
- could cause a memory fault has been fixed.
-09-01-16 The -R unary operator was added to [[...]] and test to check whether
- a variable is a name reference.
-09-01-16 The -v unary operator was added to [[...]] and test to check whether
- a variable is set.
-09-01-14 The unset built-in was modified to return 0 exit status when
- unsetting a variable that was unset to conform with the POSIX
- standard.
-09-01-14 The unset built-in was modified to continue to unset variables
- after encountering a variable that it could not unset to
- conform to the POSIX standard.
-09-01-14 The parameter expansion ${x+value} no longer expands the value of
- the variable x when determining whether x is set or not.
-09-01-13 A bug in which background jobs and pipelines that were not waited
- for could, in rare instances, cause the shell to go into an infinite
- loop or fail has been fixed.
-09-01-06 A bug in indexed arrays of compound variables in which referencing
- non-existent sub-variable in an arithmetic expression could cause
- the sub-variable to be created has been fixed.
-09-01-05 A bug in which the \ character did not escape extended regular
- expression pattern characters has been fixed.
-08-12-24 A bug in which killing the last element of a pipe did not cause
- a write to the pipe to generate a SIGPIPE has been fixed.
-08-12-19 A bug which could cause command substitution to hang when the
- last element of a pipeline in a command substitution was a built-in
- and the output was more that PIPE_BUFF.
-08-12-18 A bug which occurs when a here documented marker embedded in a
- command substitution occurs on a buffer boundary has been fixed.
-08-12-17 A bug in the output of typeset -p for variables that had attributes
- but did not have a value has been fixed.
-08-12-16 A bug in which a name reference to a name reference variable that
- references an array element has been fixed.
-08-12-16 A bug in which a variable given both the -A and -C attribute along
- with an initial assignment didn't work correctly has been fixed.
-08-12-10 The [[ -t fd ]] test was fixed to handle fd>9.
-08-12-10 A bug where function stack misalignment could cause a bus error
- has been fixed.
-08-12-09 Command completion was changed to use \ to quote special characters
- instead of quoting the argument in single quotes.
-08-12-07 A bug in typeset -m which occurred when the target node was an
- associative array element has been fixed.
-08-12-07 A timing bug on some systems (for example darwin), that could
- cause the last process of a pipeline entered interactively to fail
- with an "Exec format error" has been fixed.
-08-12-04 SHOPT_BGX enables background job extensions. Noted by "J" in
- the version string when enabled. (1) JOBMAX=n limits the number
- of concurrent & jobs to n; the n+1 & job will block until a
- running background job completes. (2) SIGCHLD traps are queued
- so that each completing background job gets its own trap; $! is
- set to the job pid and $? is set to the job exit status at the
- beginning of the trap. (3) sleep -s added to sleep until the time
- expires or until a signal is delivered.
-08-12-04 The sign of floating point zero is preserved across arithmetic
- function calls.
-08-12-04 A bug that caused print(1) to produce garbled stdout/stderr
- output has been fixed.
-08-12-04 A bug in which printf "%d\n" "'<euro>'" did not output the
- numerical value of the EURO symbol, 8354, has been fixed.
-08-11-24 /dev/fd* and /dev/std* redirections are first attempted with
- open() to preserve seek semantics; failing that the corresponding
- file descriptors are dup()'d.
-08-11-20 A bug which could cause a core dump if a function compiled with
- shcomp was found has been fixed.
-08-11-20 A bug in which jobs were not cleared from the jobs table for
- interactive shells when the pipefail option is on has been fixed.
-08-11-11 A bug in which a field that was unset in a type definition and later
- set for an instance could appear twice when displaying the variable
- has been fixed.
-08-11-11 A bug in which running a simple command & inside a function would
- not return the correct process id has been fixed.
-08=11-10 A bug in which the exit status of a command could be lost if the pid
- was that of the most recent command substitution that had completed
- has been fixed.
-08-11-10 The maximum depth for subshells has been increased from 256 to 65536.
-08-11-06 A bug which could cause a core dump when the _ reference variable was
- used as an embedded type with a compound assignment has been fixed.
-
-08-10-31 --- Release ksh93t ---
-08-10-31 Variable scoping/initialization bugs that could dump core were fixed.
-08-10-24 The lexer now accepts all RE characters for patterns prefixed
- with a ksh ~(...) option expression.
-08-10-24 For ${var/pat/sub} \0 in sub expands to the text matched by pat.
-08-10-18 A bug in array scoping that could dump core has been fixed.
-08-10-10 read -n and -N fixed to count characters in multibyte locales.
-08-10-10 A bug that mishandled _.array[] type references has been fixed.
-08-10-09 ${.sh.version} now contains a concatenation of the following (after
- 'Version') denoting compile time features:
- A SHOPT_AUDIT
- B SHOPT_BASH
- L SHOPT_ACCT
- M SHOPT_MULTIBYTE
-08-10-09 A bug that caused subshell command substitution with redirection
- to hang has been fixed.
-08-10-08 Output errors, other than to stderr, now result in a diagnostic.
-08-10-08 ksh93 now supports types that contain arrays of other types as
- members. Earlier versions core dumped in this case.
-08-10-05 A bug which caused the shell to emit a syntax error for an arithmetic
- statement of the form (( var.name[sub] = value)) has been fixed.
-08-10-01 A bug that caused subshell command substitution to hang has
- been fixed.
-08-09-29 When the -p export option of typeset is used with other options,
- only those variables matching the specified options are displayed.
-08-09-29 When the shell reads the environment and finds variables that are
- not valid shell assignments, it now passes these on to subsequent
- commands rather than deleting them.
-08-09-29 A bug in the display of compound variables containing an indexed
- array of compound variables has been fixed.
-08-09-29 A bug in the display of compound variables containing an associative
- array with a subscript containing a . in the name has been fixed.
-08-09-26 A core dump in the subshell environment restore has been fixed.
-08-09-24 $(...) has been fixed to properly set the exit status in $?.
-08-09-23 $(<...) with IFS=$'\n\n' has been fixed to retain all but the last
- of multiple trailing newlines.
-08-09-23 The -p option to typeset when used with other attributes, restricts
- the output to variables with the specified attributes.
-08-09-22 A bug that sometimes lost the exit status of a job has been fixed.
-08-09-21 A bug that retained trailing command substitution newlines in
- cases where the command caused the shell to fork has been fixed.
-08-09-19 type, whence -v, and command -v were fixed to comply with POSIX
- by writing 'not found' diagnostics to the standard error.
-08-09-18 test and [...] were fixed to comply with POSIX in the case
- of test '(' binop ')' where binop is a valid binary test operator.
-08-09-16 If a method discipline named create is specified when defining a
- type, this function will be called when an instance is created.
-08-09-15 The variable _ is now set as a reference to the compound variable
- when defining a compound variable or a type.
-08-09-10 The shell now prints an error message when the type name specified
- for an indexed array subscript is not an enumeration type.
-08-09-10 A bug in which a subshell that spawned a background process could
- loose output that was produced after the foreground completed
- has been fixed.
-08-09-10 A timing bug on some systems that could cause coprocesses started by a
- subshell to not clean up and prevent other coprocesses has been fixed.
-08-09-09 The typeset -m option is now able to rename array elements from
- the same array.
-08-09-09 The exit status of 2 from the DEBUG trap causes the next command
- to be skipped. An exit value of 255 from a DEBUG trap called from
- a function causes the function to return.
-08-09-08 A bug in which a coprocess created in a subshell that did not
- complete when the subshell terminated could prevent a coprocess
- from being created in the parent shell has been fixed.
-08-09-05 An assignment of the form name1=name2 where name1 and name2
- are both compound variables causes name1 to get a copy of name2.
- name1+=name2 causes name2 sub-variables to be appended to name1.
-08-09-05 A bug in which unsetting a compound variable did not unset all
- the sub-variables has been fixed.
-08-09-01 A bug in the subshell cleanup code that could cause SIGSEGV has
- been fixed.
-06-08-26 The SHLVL variable which is an environment variable used by bash
- and zsh that gets incremented when the shell starts.
-08-08-25 For an indexed array, a negative subscript now refers to offsets
- from the end so that -1 refers to the last element.
-08-08-24 An alignment error for shorts on 64 bit architectures has been fixed.
-08-08-22 If oldvar is a compound variable, typeset -C newvar=oldvar creates
- newvar as a copy of oldvar.
-08-08-19 The ALRM signal no longer cause the sleep builtin to terminate.
-08-08-13 When used in an arithmetic expression, the .sh.version variable
- now produces a number that will be increasing for each release.
-08-08-11 A bug in which type instantiation with a compound assignment in
- a dot script in which the type is defined has been fixed.
-08-08-07 The -m option has been added to typeset to move or rename a
- variable. Not documented yet.
-08-08-06 A bug in read when used in a loop when a prompt was specified
- when reading from a terminal has been fixed.
-08-08-01 A bug with the pipefail option in which a nested pipeline could
- cause an asynchronous command to block has been fixed.
-08-08-01 A for loop optimizer bug that treats .sh.lineno as an invariant
- has been fixed.
-08-07-30 A bug in which expanding compound variable that had a get discipline
- from with a here document could cause a syntax error has been fixed.
-08-07-18 A bug in which a nameref caused a local variable to be created
- rather than binding to an existing variable in the global scope
- has been fixed.
-08-07-17 A bug which occurred when a nameref was created from within a
- function that was part of a pipeline has been fixed.
-08-07-14 The compile option SHOPT_STATS was added. With this option the
- compound variable .sh.stats keeps usage statistics that could help
- with performance tuning.
-08-07-10 The output of set now always uses a single line for each variable.
- For array variables, the complete set of values is now displayed.
-08-07-09 The typeset -C option can be used with arrays to indicate that
- each element should default to a compound variable.
-08-07-08 The %B format now outputs compound variables and arrays. The
- alternate flag # can be used to cause output into a single line.
-08-07-03 When the window change signal, WINCH, is received, the current
- edit line is redrawn in place.
-08-07-01 A bug in the handling of shared variables when inside an embedded
- type has been fixed.
-08-06-29 A bug in multiline edit mode which occurred when the prompt length
- was three characters or less has been fixed.
-08-06-23 A bug in which the SIGCLD was not be triggered when background
- jobs completed has been fixed.
-08-06-23 KSH_VERSION added as a name reference to .sh.version.
-08-06-20 type now outputs 'special builtin' for special builtins.
-08-06-19 A couple of bugs in multi-dimensional arrays have been fixed.
-08-06-19 A bug in which a syntax error in a dot script could generated
- a syntax error in the next subsequent command has been fixed.
-08-06-17 Reduced the maximum function call depth to 2048 to avoid exceptions
- on some architectures.
-08-06-16 A bug in which printf "%B" could generate an exception when the
- specified variable was not set has been fixed.
-08-06-16 When typeset -p is followed by variable names, it now displays
- the attributes names and values for the specific names.
-08-06-14 A bug that could effect the drawing of the screen from multiline
- emacs or gmacs mode when walking up the history file has been fixed.
-08-06-13 A bug in which a compound variable defined in a subshell could
- have side effects into the parent shell has been fixed.
-08-06-13 A number of bugs related to using .sh.level to set the stack from
- for DEBUG traps have been fixed.
-08-06-13 The .sh.lineno variable has been added. When .sh.level is changed
- inside a DEBUG trap, the .sh.lineno contains the calling line number
- for the specified stack frame.
-08-06-13 The .sh.level variable has been documented and now works.
-08-06-11 The -C option has been added to read for reading compound command
- definitions from a file.
-08-06-11 The . command is now permitted inside a compound command definition.
- The dot script can contain declaration commands and dot commands.
-08-06-09 Add -C option to typeset so that typeset -C foo, is equivalent
- to foo=().
-08-06-09 Added -n warning message for typeset option orderings that are valid
- with ksh88 but not valid with ksh93, for example Lx5.
-08-06-09 A bug in which the return value for an assignment command containing
- a command substitution with that failed was zero when the assignment
- contained redirections has been fixed.
-08-06-09 A bug in the quoting of $ inside a ERE pattern ~(E)(pattern)
- has been fixed.
-08-06-06 A bug when processing `` command substitution with the character
- sequence \$' has been fixed.
-08-06-02 When defining a type, the typeset -r attribute causes this field
- to be required to be specified for each instance of the type and
- does not allow a default value.
-08-06-02 Several bugs in which compound variables were modified by
- subshells have been fixed.
-08-05-22 The ceil function has been added to the math functions.
-08-05-21 A bug in which a name reference defined in a function and passed
- as an argument to another function could cause an incorrect binding.
-08-05-21 A bug in freeing compound variables that are local to functions
- has been fixed.
-08-05-19 The array expansions ${array[sub1..sub2]} and ${!array[sub1..sub2]}
- to expand to the value (or subscripts) for array between sub1 and
- sub2 inclusive. For associative arrays, the range is based on
- location in the POSIX locale. The .. must be explicit and cannot
- result from an expansion.
-08-05-15 The trap on SIGCLD is no longer triggered by the completion of
- the foreground job as with ksh88.
-08-05-14 A bug in the implementation of the editing feature added on
- 07-09-19 in emacs mode has been fixed.
-08-05-12 A bug in processing the test built-in with parenthesis has been
- fixed.
-08-05-12 The unset built-in now returns non-zero when deleting an array
- subscript that is not set.
-08-05-08 Changing the value of HISTFILE or HISTSIZE will cause the old
- history file to be close and reopened with the new name or size.
-08-05-08 When FPATH is changed functions that were found via a path search
- will be searched for again.
-08-05-08 A parser bug in which reserved words and labels were recognized
- inside compound indexed array assignment after a new-line has
- been fixed.
-08-05-07 A bug in getopts when handling numerical option arguments has
- been fixed.
-08-05-07 The typeset -S option was added for variables outside type
- definitions to provide a storage class similar to C static
- inside a function defined with function name. When outside
- type definitions and outside a function, the -S option cause
- the specified variable so be unset before the assignment and
- before the remaining attributes are supplied.
-08-05-07 A bug that affected the cursor movement in multiline mode when
- a character was deleted from near the beginning of the any
- line other than the first.
-08-05-01 In multiline edit mode, the refresh operation will now clear
- the remaining portion of the last line.
-08-05-01 A bug in computing prompt widths for the edit modes for prompts
- with multibyte characters has been fixed.
-08-05-01 A bug in the multiline edit mode which could cause the current
- line to be displayed incorrectly when moving backwards from third
- or higher line to the previous line has been fixed.
-08-05-01 A bug in which options set in functions declared with the function
- name syntax were carried across into functions invoked by these
- functions has been fixed.
-08-04-30 A bug which could cause a coprocess to hang when the read end
- is a builtin command has been fixed.
-08-04-30 The emacs and vi editors have been modified to handle window
- change commands as soon as they happen rather than waiting for
- the next command.
-08-04-28 A bug in which ${!x} did not expand to x when x was unset has been
- fixed.
-08-04-27 A bug in which the assignment x=(typeset -a foo=([0]=abc)) created
- x.foo as an associative array has been fixed.
-08-04-25 A bug in which $# did not report correctly when there were more
- than 32K positional parameters has been fixed.
-08-04-04 Choose the name _ as the sub-variable that holds type or instance
- specific data used by discipline functions.
-08-03-27 A bug in which the terminal group was not given back to the parent
- shell when the last part of a pipeline was handled by the parent shell
- and the other parts of the pipeline complete has been fixed.
- The symptom was that the pipeline became uninterruptable.
-08-03-25 A bug in restricted mode introduced in ksh93s that caused scripts
- that did not use #! to executed in restricted mode has been fixed.
-08-03-25 A bug in which the pipefail option did not work for a pipeline
- within a pipeline has been fixed.
-08-03-24 A bug in which OPTIND was not set correctly in subshells has
- been fixed.
-08-03-24 A bug which could cause a memory exception when a compound variable
- containing an indexed array with only element 0 defined was expanded.
-08-03-20 A bug in which ${!var[sub].*} was treated as an error has been fixed.
-08-03-20 Associative array assignments of the form ([name]=value ...)
- now allow ; as well as space tab and new line to separate elements.
-08-03-18 A buffering problem in which standard error was sometimes
- not flushed before sleep has been fixed.
-08-03-17 A bug in which a signal sent to $$ while in a subshell would be
- sent to the subshell rather than the parent has been fixed.
-08-03-17 --default option added to set(1) to handle set +o POSIX semantics.
- set --state added as a long name alias for set +o.
-08-03-14 A bug in which using monitor mode from within a script could
- cause the terminal group to change has been fixed.
-08-03-10 The new ${...} command substitution will treat the trailing }
- as a reserved word even if it is not at the beginning of a command,
- for example, ${ date }.
-08-03-10 If the name of the ENV begins with /./ or ././ then the
- /etc/ksh.kshrc file will not be executed on systems that support
- this interactive initialization file.
-08-03-07 A bug in which ksh -i did not run the ENV file has been fixed.
-08-03-07 A bug in which ulimit did not always produce the same output as
- ulimit -fS has been fixed.
-08-03-04 A bug in multiline mode in emacs and vi mode which could cause the
- cursor to be on the wrong line when interrupt was hit has been fixed.
-08-03-03 The change made in ksh93s+ on 07-06-18 in which braces became
- optional for ${a[i]} inside [[...]] was restored in the case
- where the argument can be a pattern.
-08-03-03 A bug in which creating a name reference to an associative array
- instance would fail when the subscript contained characters [ or
- ] has been fixed.
-08-02-29 The redirection operator >; has been added which for non-special
- files, generates the output in a temporary file and writes the
- specified file only of the command has completed successfully.
-08-02-15 A bug in ${var/pattern/string} for patterns of the form ?(*) and +(*)
- has bee fixed.
-08-02-07 A bug in which test \( ! -e \) produced an error has been fixed.
-08-02-14 The typeset -a option can now optionally be followed by the name
- of an enumeration type which allows subscripts to be enumerations.
-08-02-14 The enum builtin which creates enumeration types has been added.
-08-02-12 The backoff logic when there are no more processes has been fixed.
-08-02-07 The -X option has been added to typeset. The -X option creates
- a double precision number that gets displayed using the C99 %a
- format. It can be used along with -l for long double.
-08-01-31 The -T option to typeset has been added for creating typed
- variables. Also the -h and -S options have been added to
- typeset that are only applicable when defining a type.
-08-01-31 The prefix expansion operator @ has been added. ${@name}
- expands to the type of name or yields the attributes.
-07-11-15 A bug in the macro expander for multibyte characters in which
- part of the character contains a file pattern byte has been fixed.
-07-10-03 A bug in which : was not allowed as part of an alias name has been
- fixed.
-07-09-26 A bug in which appending a compound variable to a compound variable
- or to an index array didn't work has been fixed.
-07-09-19 In both emacs and vi edit mode, the escape sequence \E[A (usually
- cursor up, when the cursor is at the end of the line will fetch
- the most recent line starting with the current line.
-07-09-18 The value of ${!var} was correct when var was a reference to an
- array instance.
-07-09-18 The value of ${!var[sub]} was not expanding to var[sub] and this
- was fixed. It also fixed ${name} where name is a name reference
- to var[sub].
-07-09-18 It is now legal to create a name reference without an initialization.
- It will be bound to a variable on the first assignment.
-07-08-30 A discipline function can be invoked as ${x.foo} and is equivalent
- to ${ x.foo;} and can be invoked as x.foo inside ((...)).
-07-07-09 A bug in which typeset -a did not list indexed arrays has been
- fixed.
-07-07-03 The command substitution ${ command;} has been added. It behaves
- like $(command) except that command is executed in the current
- shell environment. The ${ must be followed by a blank or an
- operator.
-
-08-04-17 --- Release ksh93s+ ---
-08-04-17 A bug in which umask was not being restored correctly after a
- subshell has been fixed.
-08-04-15 A bug in which sending a STOP signal to a job control shell started
- from within a shell function caused cause the invoking shell to
- terminate has been fixed.
-08-04-11 A bug which caused $(exec > /dev/null) to go into an infinite loop
- has been fixed.
-08-03-27 A bug in which typeset -LZ was being treated as -RZ has been fixed.
-08-03-06 A bug with ksh -P on systems that support the the profile shell,
- in which it would exit after running a non-builtin has been fixed.
-08-01-31 A bug in which command substitution inside ((...)) could cause
- syntax errors or lead to core dumps has been fixed.
-08-01-17 A bug in which discipline functions could be deleted when invoked
- from a subshell has been fixed.
-08-01-17 A bug in which a command substitution consisting only of
- assignments was treated as a noop has been fixed.
-08-01-17 A bug in which discipline functions invoked from withing a
- compound assignment could fail has been fixed.
-08-01-16 Incomplete arithmetic assignments, for example ((x += )), now
- generate an error message.
-08-01-16 A bug in which a set discipline defined for a variable before
- an array assignment could cause a core dump has been fixed.
-08-01-03 A bug in on some systems in which exit status 0 is incorrectly
- returned by a process that catches the SIGCONT signal is stopped
- and then continued.
-07-12-13 A race condition in which a program that has been stopped and then
- continued could loose the exit status has been fixed.
-07-12-12 Code to check for file system out of space write errors for all
- writes has been added.
-07-12-11 A bug in the macro expander for multibyte characters in which
- part of the character contains a file pattern byte has been fixed.
-07-12-06 A bug in the emacs edit mode when multiline was set that output
- a backspace before the newline to the screen has been fixed.
-07-12-04 A bug in which using <n>TAB after a variable name listing expansion
- in the edit modes would cause the $ to disappear has been fixed.
-07-11-28 A bug in which setting IFS to readonly could cause a subsequent
- command substitution to fail has been fixed.
-07-11-27 A work around for a gcc 4.* C99 "feature" that could cause a job
- control shell to go into an infinite loop by adding the volatile
- attribute to some auto vars in functions that call setjmp().
-07-11-27 A bug in which the shell could read ahead on a pipe causing the
- standard input to be incorrectly positioned has been fixed.
-07-11-27 A bug in which compound variable UTF-8 multibyte values were not
- expanded or traced properly has been fixed.
-07-11-21 A bug where an unbalanced '[' in a command argument was not treated
- properly has been fixed.
-07-11-15 A bug in which compatibility mode (no long option names) getopts(1)
- incorrectly set the value of OPTARG for flag options has been fixed.
-07-11-15 A bug in which "hash -- name" treated "--" as an invalid name operand
- has been fixed.
-07-11-15 typeset now handles "-t -- [-r] [--]" for s5r4 hash(1) compatibility.
-07-11-15 A bug in which the umask builtin mis-handled symbolic mode operands
- has been fixed.
-07-11-15 Bugs in which shell arithmetic and the printf builtin mis-handled the
- signs of { -NaN -Inf -0.0 } have been fixed.
-07-11-15 The full { SIGRTMIN SIGRTMIN+1 ... SIGRTMAX-1 SIGRTMAX } range
- of signals, determined at runtime, are now supported.
-07-11-15 A bug in which creating an index array with only subscript 0 created
- only a simple variable has been fixed.
-07-11-14 A bug in which appending to an indexed array using the form
- name+=([sub]=value) could cause the array to become an associative
- array has been fixed.
-07-11-14 A bug in which typeset without arguments could coredump if a
- variable is declared as in indexed array and has no elements has
- been fixed.
-07-11-14 A bug in which creating a local SECONDS variable with typeset in
- a function could corrupt memory has been fixed.
-07-11-14 A bug which could cause a core dump when a script invoked by name
- from a function used compound variables has been fixed.
-07-11-05 A bug in which printf %d "'AB" did not diagnose unconverted characters
- has been fixed.
-07-11-05 printf %g "'A" support added for all floating point formats.
-07-11-01 A bug in which typeset -f fun did not display the function definition
- when invoked in a subshell has been fixed.
-07-10-29 The sleep builtin was fixed so that all floating point constants
- are valid operands.
-07-10-10 A bug in which the locale was not being restored after
- LANG=value command has been fixed.
-07-09-20 A bug in which a nameref to a compound variable that was local
- to the calling function would not expand correctly when displaying
- is value has been fixed.
-07-09-19 A bug which cause cause a core dump if .sh.edchar returned
- 80 characters or more from a keyboard trap has been fixed.
-07-09-14 A bug in which could cause a core dump when more than 8 file
- descriptors were in use has been fixed.
-07-09-10 A bug in which creating a name reference to an instance of
- an array when the array name is itself a reference has been fixed.
-07-09-10 The file completion code has been modified so that after an = in
- any word, each : will be considered a path delimiter.
-07-09-06 A bug in which subprocess cleanup could corrupt the malloc() heap
- has been fixed.
-07-08-26 A bug in which a name reference to an associative array instance
- could cause the subscript to be evaluated as an arithmetic expression
- has been fixed.
-07-08-22 A bug in which the value of an array instance was of a compound
- variable was not expanded correctly has been fixed.
-07-08-14 A bug which could cause a core dump when a compound assignment was
- made to a compound variable element with a typeset -a attribute
- has been fixed.
-07-08-08 A bug in which a trap ignored in a subshell caused it to be
- ignored by the parent has been fixed.
-07-08-07 A bug in which the set command would generated erroneous output
- for a variable with the -RZ attribute if the variable name had been
- passed to a function has been fixed.
-07-08-02 A bug in which read x[1] could core dump has been fixed.
-07-08-02 A second bug in which after read x[sub] into an associative array
- of an element that hasn't been assigned could lead to a core dump
- has been fixed.
-07-07-31 A bug in which a pipeline that completed correctly could have
- an exit status of 127 when pipefail was enabled has been fixed.
-07-07-09 The SHOPT_AUDIT compile option has been added for keyboard logging.
-07-06-25 In vi insert mode, ksh no longer emits a backspace character
- before the carriage return when the newline is entered.
-07-06-25 A bug in which pipefail would cause a command to return 0
- when the pipeline was the last command and the failure happened
- on a component other than the last has been fixed.
-07-06-25 A bug in the expansion of ${var/pattern/rep} when pattern or rep
- contained a left parenthesis in single quotes has been fixed.
-07-06-18 The braces for a subscripted variable with ${var[sub]} are now
- optional when inside [[...]], ((...)) or as a subscript.
-07-05-28 A bug in brace expansion in which single and double quotes did
- not treat the comma as a literal character has been fixed.
-07-05-24 The -p option of whence now disables -v.
-07-05-23 Several bug fixes in compound variables and arrays of arrays
- have been made.
-07-05-15 A bug in which the %B format of printf was affected by the
- locale has been fixed.
-07-05-14 A bug in which \ was not removed in the replacement pattern with
- ${var/pattern/rep} when it was not followed by \ or a digit has
- been fixed.
-07-05-10 A bug in which ksh -R file core dumped if no script was specified
- has been fixed. it not displays an error message.
-07-05-07 Added additional Solaris signals to signal table.
-07-04-30 A bug in which a pipeline with command substitution inside a
- function could cause a pipeline that invokes this function to
- hang when the pipefail option is on has been fixed.
-07-04-30 Added -q to whence.
-07-04-18 A small memory leak with each redirection of a non-builtin has
- been fixed.
-07-03-08 A bug in which set +o output command line options has been fixed.
-07-03-08 A bug in which an error in read (for example, an invalid variable
- name), could leave the terminal in raw mode has been fixed.
-07-03-06 A bug in which read could core dump when specified with an array
- variable with a subscript that is an arithmetic expression has
- been fixed.
-07-03-06 Several serious bugs with the restricted shell were reported and
- fixed.
-07-03-02 If a job is stopped, and subsequently restarted with a CONT
- signal and exits normally, ksh93 was incorrectly exiting with
- the exit status of the stop signal number.
-07-02-26 M-^L added to emacs mode to clear the screen.
-07-02-26 A bug in which setting a variable readonly in a subshell would
- cause an unset error when the subshell completed has been fixed.
-07-02-19 The format with printf uses the new = flag to center the output.
-07-02-19 A bug in which ksh93 did not allow multibyte characters in
- identifier names has been fixed.
-07-02-19 A bug introduced in ksh93 that causes global compound variable
- definitions inside functions to exit with "no parent" has been fixed.
-07-02-19 A bug in which using compound commands in process redirection
- arguments would give syntax errors <(...) and >(...) has been fixed.
-07-01-29 A bug which caused the shell to core dump which can occur when a
- built-in exits without closing files that it opens has been fixed.
-07-01-26 A bug in which ~(E) in patterns containing \ that are not inside ()
- has been fixed.
-
-06-12-29 --- Release ksh93s ---
-06-12-29 A bug in which the value of IFS could be changed after a command
- substitution has been fixed.
-06-12-22 /dev/(tcp|udp|sctp)/HOST/SEVRICE now handles IPv6 addresses on
- systems that provide getaddrinfo(3).
-06-12-19 A -v option was added to read. With this option the value of
- the first variable name argument will become the default value
- when read from a terminal device.
-06-11-20 A bug in which "${foo[@]:1}}" expands a null argument (instead of
- no argument), when foo[0] is not empty has been fixed.
-06-11-16 The discipline functions have been modified to allow each subscript
- to act independently. Currently the discipline function will not
- be called when called from a discipline function of the same variable.
-06-11-14 A bug which could cause a core dump if a file descriptor for
- an internal file was closed from with a subshell has been fixed.
-06-10-30 The redirections <# pattern, and <## pattern have been added.
- Both seek forward to the beginning of the next line that contains
- the pattern. The <## form copies the skipped portion to standard
- output.
-06-10-26 On systems that support stream control transport, the virtual file
- name /dev/sctp/host/port can now be used to establish connections.
-06-10-26 The printf modifier # when used with d produces units in thousands
- with a single letter suffix added. The modifier # when used with
- the i specification provides units of 1024 with a two letter suffix.
-06-10-24 The value of $! is now set to the process id of a job put
- into the background with the bg command as required by POSIX.
-06-10-23 A bug in which the value of $! was affected by a background
- job started from a subshell has been fixed.
-06-10-23 A bug in ${var:offset:len} in multibyte locales has been fixed.
-06-10-15 The remaining math functions from C99 were added for any system
- that supports them.
-06-10-13 The klockwork.com software detected a few coding errors that
- have been fixed.
-06-10-12 A bug when skipping over `...` with ${x:=`...`} when x is set
- has been fixed.
-06-10-11 A bug in process floating constants produced by the %a format
- of printf has been fixed.
-06-10-06 A bug in which IFS was not being restored correctly in some
- cases after a subshell has been fixed.
-06-10-06 A bug in which pipefail was not detecting some failures in
- pipelines with 3 or more states has been fixed.
-06-10-03 A bug in the processing of >(...) with builtins which could
- cause the builtin to hang has been fixed.
-06-10-03 A bug in the for loop optimizer which causes >(...) process
- substitution to be ignored has been fixed.
-06-09-17 The -a option was added to typeset for indexed arrays. This
- is only needed when using the ([subscript]=value ...) form.
-06-09-06 The showme option was added. Each simple command not beginning
- with a redirection and not occurring with in the while, until, if,
- select condition can be preceded by a semi-colon which will
- be ignored when showme is off. When showme is on, any command
- preceded by a colon will be traced but not executed.
-06-08-16 As a new feature, a leading ~(N) on a pattern has no effect
- except when used for file expansion. In this case if not
- matches are found, the pattern is replaced by nothing rather
- than itself.
-06-08-11 A bug in the expansion of ${.sh.match[i]:${#.shmatch[i]}} has
- been fixed.
-06-08-10 The read builtin options -n and -N have been modified to treat
- the size as characters rather than bytes unless storing into a
- binary (typeset -B) variable.
-06-07-27 When the here document operator << is followed directly by a #
- rather than a -, the first line of the here-document determines
- how much whitespace is removed for each line.
-06-07-26 A bug in the C-shell history (enabled with set -H) in which the
- history event !$ was not processed has been fixed.
-06-07-21 A bug on some systems in which assigning PATH on a command line
- would not take effect has been fixed.
-06-07-20 Add ksh93 and rksh93 as allowable names for ksh binaries.
-06-07-20 Removed the SHOPT_OO compilation option which was only partially
- implemented.
-06-07-20 The ability to use egrep, grep, and fgrep expressions within
- shell patterns has been documented.
-06-07-17 A bug with arithmetic command expressions for locales in which
- the comma is a thousands separator has been fixed.
-06-07-13 The default HISTSIZE was increased from 128 to 512.
-06-07-13 A multibyte problem with locales that use shift codes has been fixed.
-06-06-23 A number of bug fixes for command, file, and variable completion
- have been mode.
-06-06-20 Floating point division by zero now yields the constant Inf or -Inf
- and floating functions with invalid arguments yield NaN.
-06-06-20 The floating point constants Inf and NaN can be used in arithmetic
- expressions.
-06-06-20 The functions isinf(), isnan(), tanhl() have been added for
- arithmetic expressions.
-06-06-13 Internal change to use ordering for variables instead of hashing
- to speed up prefix matching.
-06-06-13 A window between fork/exec in which a signal could get lost
- and cause a program to hang has been eliminated
-06-06-13 A bug in edit completion with quoted strings has been fixed.
-06-06-07 The restricted options can now be enabled by set as well as on
- the command line. Once set, it can not be disabled.
-06-06-04 Modified built-in binding so that for systems for which /bin
- and /usr/bin are the same, a builtin bound to /bin will get
- selected when either /bin or /usr/bin is scanned.
-06-06-04 Added literal-next character processing for emacs/gmacs mode.
- This change is not compatible with earlier versions of ksh93
- and ksh88 when the stty lnext is control-v. The sequence
- escape-control-v will display the shell version.
-06-05-31 Modified emacs and vi mode so that entering a TAB after a partial
- TAB completion, generates a listing of possible completions.
- After the second TAB, a number followed by a TAB will perform
- the completion with the corresponding item.
-06-05-19 Modified arithmetic so that conversions to strings default to
- the maximum number of precision digits.
-06-05-16 Bug fixes for multibyte locales.
-06-05-10 The =~ operator was added to [[...]] and [[ string ~= ERE ]]
- is equivalent to [[ string == ~(E)ERE ]].
-06-04-25 A bug in the vi edit mode which could cause the shell to core dump
- when switching from emacs mode.
-06-04-17 A bug in which using LANG or LC_ in assignment lists with builtins
- did not restore the localed correctly has been fixed.
-06-04-04 A bug in which discipline functions could not be added to variables
- whose names started with .sh has been fixed.
-06-03-28 The -s option to typeset was added to modify -i to indicate short
- integers.
-06-03-28 A bug in which variables assignment lists before functions
- defined with function name were not passed on the functions
- invoked by this function has been fixed.
-06-03-28 A bug in which name references defined within a function defined
- with function name could not be used with compound variables has
- been fixed.
-06-03-27 A bug in which read <&p (print >&p) would cause the coprocess input
- (output) pipe to close before reading from (after writing to)
- it has been fixed.
-06-02-28 A bug in which stopping a job created with the hist builtin command
- would create a job that could not be restarted has been fixed.
-
-06-01-24 --- Release ksh93r ---
-06-01-24 A bug in which running commands with standard output closed would
- not work as expected has been fixed.
-06-01-23 A bug in which print -u<n> could fail when file descriptor <n> was
- open for writing has been fixed.
-06-01-19 The ?: arithmetic operator fixed to work when the operation after
- the colon was an assignment.
-05-12-24 A bug which could lead to a core dump when elements of a compound
- variable were array elements, i.e. foo=(bar=(1 2)), has been fixed.
-05-12-13 An arithmetic bug in which x+=y+=z was not working has been fixed.
-05-12-13 An arithmetic bug in which x||y was returning x when x was non-zero
- rather than 1 has been fixed.
-05-12-07 The aliases for integer and float have been changed to use attributes
- -li and -lE to handle long long and long double types.
-05-12-07 The histexpand (-H) option has been added which allows C-shell
- style history expansions using the history character !.
-05-12-07 The multiline option was added which changes that way the edit
- modes handle lines longer than the window width. Instead of
- horizontal scrolling, multiple lines on the screen are used.
-05-12-05 The whence builtin now returns an absolute pathname when the
- command is found in the current directory.
-05-11-29 A bug which caused ksh -c '[[ ! ((' to core dump rather than
- report a syntax error has been fixed.
-05-11-29 A bug when reading fixed length records into typeset -b variables
- which caused a zero byte to terminate the value has been fixed.
-05-11-22 The ability to seek to an offset within a file has been added
- with the new I/O redirection operators, <# and >#. Currently,
- these redirection operators must be followed by ((expr))
- but in a future release, it should be able to used to seek forward
- to the specified shell pattern. In addition $(n<#) expands to the
- current byte offset for file descriptor n.
-05-11-22 The .sh.match array variable is now set after each [[ ... ]]
- pattern match. Previously it was only set for substring matches.
-05-10-17 A bug in which the library path variable could be prefixed
- with a directory when a .path file was not encountered in
- the directory of the executable has been fixed.
-05-09-15 A for/while loop optimizer bug in which $OPTIND was not
- correctly expanded has been fixed.
-05-09-05 A bug in which a history command that invoked a history
- command could go into an infinite loop has been fixed.
-05-08-31 In the case that IFS contains to adjacent new-lines so that
- new-line is not treated as a space delimiter, only a single
- new-line is deleted at the end of a command substitution.
-05-08-19 When a tilde substitution expands to the / directory and is
- followed by a /, it is replaced by the empty string.
-05-08-16 A bug in which n<&m did not synchronize m has been fixed.
-05-08-16 A bug in which process substitution ( <() and >() ) was not
- working within for and while loops has been fixed.
-05-07-24 A bug in which the pattern ~(E)(foo|bar) was treated as a syntax
- error has been fixed.
-05-07-24 A bug in completion with <n>=, where n was the one of the
- previous selection choices has been fixed.
-05-07-21 A bug with multibyte input when no edit mode was specified which
- caused the input line to shift left/right has been fixed.
-05-06-24 A race condition which could cause the exit status to get lost
- on some fast systems has been fixed.
-05-06-21 A bug in which nested patterns of the form {m,n}(pat) would cause
- syntax errors has been fixed.
-05-06-21 A bug in the macro expander has been fixed which could cause a
- syntax error for an expansion of the form ${x-$(...)} when
- x is set and the command substitution contained certain strings.
-05-06-08 On systems for which echo does not do System V style \ expansions,
- the -e option was added to enable these expansion.
-05-06-08 A bug in which ${var op pattern} to not work when inside an
- arithmetic expression has been fixed.
-05-05-23 An extension to shell patterns that allows matching of nested
- groups while skipping over quoted strings has been added.
-05-05-18 A bug in which the line number for errors was not correct for
- functions loaded from FPATH has been fixed.
-05-04-18 A bug in which the exit status $? is not set when a trap triggered
- by the [[...]] command is executed has been fixed.
-05-04-08 Redirection operators can be directly preceded with {varname}
- with no intervening space, where varname is a variable name which
- allows the shell to select a file descriptor > 10 and store it
- into varname.
-05-04-08 SHOPT_CMDLIB_BLTIN=1 now includes <cmdlist.h> generated table.
-05-04-07 [[ -o ?option ]] is true if "option" is a supported option.
-05-04-05 A bug in handling file completion with spaces in the names
- has been fixed.
-05-03-25 The SIGWINCH signal is caught by default to keeps the LINES and
- COLUMNS variables in sync with the actual window size.
-05-03-25 Building ksh with SHOPT_REMOTE=1 causes ksh to set --rc if stdin is
- a socket (presumably part of a remote shell invocation.)
-05-03-25 Building ksh with SHOPT_SYSRC=1 causes interactive ksh to source
- /etc/ksh.kshrc (if it exists) before sourcing the $ENV file.
-05-03-25 {first..last[..incr][%fmt]} sequences added to brace expansions
- when braceexpand is enabled.
-05-03-03 A bug where a SIGCHLD interrupt could cause a fifo open to fail has
- been fixed.
-05-02-25 A bug in which a builtin command run in the background could
- keep a file descriptor open which could cause a foreground
- process to hang has been fixed.
-05-02-24 A bug where builtin library commands (e.g., date and TZ) failed to
- detect environment variable changes has been fixed.
-05-02-22 The read builtin and word splitting are now consistent with respect
- to IFS -- both treat IFS as field delimiters.
-05-02-22 The read builtin no longer strips off trailing delimiters that
- are not space characters when there are fewer variables than fields.
-05-02-17 A builtin bug on systems where dlsym(libcmd) returns link-time
- bindings has been fixed.
-05-02-12 A bug in which the lib_init() function for .paths BUILTIN_LIB
- libraries was not called has been fixed.
-05-02-06 A bug on some systems in which moving the write end of a co-process
- to a numbered file descriptor could cause it to close has been fixed.
-05-02-06 A bug in the vi-edit mode in which the character under the cursor
- was not deleted in some cases with the d% directive has been fixed.
-05-02-06 A bug where external builtin stdout/stderr redirection corrupted
- stdout has been fixed.
-05-02-04 A bug where times formatting assumed CLK_TCK==60 has been fixed.
-
-05-01-11 --- Release ksh93q ---
-05-01-11 A bug in the integral divide by zero check has been fixed.
-05-01-11 The -l option has been added to read /etc/profile and
- $HOME/.profile, if they exist, before the first command.
-05-01-11 An argument parsing bug that caused `kill -s x -- n' to fail has
- been fixed.
-05-01-11 The .paths file, introduced in ksh93m, which can appear in
- any directory in PATH, now allows a line of the form 'BUILTIN_LIB=.'
- When a command is searched for this directory, and the full path
- matches the path of the built-in version of the command (listed
- by the 'builtin' command) then the built-in version of the command
- is used. When ksh is built with SHOPT_CMDLIB_DIR=1 then all libcmd
- functions become builtins with the '/opt/ast/bin/' directory prefix.
-05-01-10 A bug in which a nameref to a compound name caused a core dump has
- been fixed.
-05-01-09 A bug in which some SIGCHLD interrupts (from child processes exiting)
- caused a fatal print/echo error diagnostic has been fixed.
-04-12-24 A bug in which some SIGCHLD interrupts (from child processes exiting)
- corrupted the internal process/job list, sometimes causing the shell
- to hang, has been fixed.
-04-12-01 A bug in which typeset -Fn truncated less than n digits for large
- numbers has been fixed.
-04-11-25 A bug in which standard error could be closed after a redirection
- to /dev/stderr has been fixed.
-04-11-17 A bug in which an expansion of the form ${array[@]:3} could expand
- to ${array[0]} when ${array[3]} was not set has been fixed.
-04-10-22 The -E or -orc command line option reads ${ENV-$HOME/.kshrc} file.
-04-10-22 `-o foo' equivalent to `+o nofoo', `-o nobar' equivalent to `+o bar'.
- `--foo' equivalent to `-o foo', `--nofoo' equivalent to `+o foo'
-04-10-05 The .paths file, introduced in ksh93m, which can appear in
- any directory in PATH, now allows a line of the form
- 'BUILTIN_LIB=libname'. When a command is searched for this directory,
- the shared library named by libname will first be searched for a
- built-in version of the command.
-04-09-03 <<< here documents now handle quotes in the word token correctly.
-04-08-08 The maximum size for read -n and and read -N was increased from
- 4095 to 32M.
-04-08-04 printf %q was modified so that if an no operand was supplied, no
- no output would be generated rather than a quoted empty string.
-04-08-01 The -n and -N options of the read builtin has been modified
- when reading variables with the binary attribute so that the
- data is stored directly rather than through assignment.
-04-08-01 The shcomp command has been modified to process alias commands
- under some conditions.
-04-07-31 The .sh.match variable added in ksh93l, now works like other
- indexed arrays.
-04-07-08 A loop optimizer bug which occurs when typeset is used in
- a for or while loop inside a function has been fixed.
-04-06-24 The number of subexpressions in a pattern was increased to 64
- from the current number of 20.
-04-06-17 The -t option to read was modified to allow seconds to be
- specified as any arithmetic expression rather than just
- an integral number of seconds, for example even -t 'sin(.5)'
- is now valid.
-04-06-16 Two small memory leak problems were fixed.
-04-06-15 A bug in ${var/pattern/"string"} which occurred when string
- contained pattern matching characters has been fixed.
-04-05-08 printf $'%d\n' produced an erroneous error message and has
- been fixed.
-04-05-24 A bug in which an associative array without any elements could
- cause a core dump when a script with an associative array with
- the same name was declared in a script invoked by name has
- been fixed.
-04-05-11 A bug in which an exec statement could close the script that
- is being processed in a script that is run by name causing
- a failure has been fixed.
-04-04-28 If the first character of assignment to an integer variable was 0,
- the variable had been treated as unsigned. This behavior was
- undocumented and has been removed.
-04-04-05 A bug in which the positioning of standard input could be incorrect
- after reading from standard input from a subshell has been fixed.
-04-03-30 A bug in the for loop optimizer which in rare cases could cause
- memory corruption has been fixed.
-04-03-29 The preset alias source='command .' has been added.
-04-03-29 A bug introduced in ksh93p on some systems in which invoked by
- name with #! on the first line would not get the signals handling
- initialized correctly has been fixed.
-04-03-29 A bug introduced in ksh93p in which a HUP signal received by
- a shell that is a session group leader was not passed down to
- its children has been fixed.
-
-04-02-28 --- Release ksh93p ---
-04-02-28 The ability to apply an append discipline to any variable has
- been added.
-04-02-14 A bug in which the exportall option (set -a) would cause incorrect
- results for arrays has been fixed.
-04-02-02 A bug in which an exported array would pass more than
- the first element to a script invoked by name has been fixed.
-04-02-02 A bug on some systems in which name=value pairs preceding a script
- invoked by name was not getting passed to the script has been fixed.
-04-01-20 A bug in which an unset discipline function could cause a core
- dump on some systems has been fixed.
-04-01-12 A bug in which a continue or break called outside a loop from
- inside a function defined with name() syntax could affect
- the invoking function has been fixed.
-04-01-08 If a command name begins with ~, only filename completion will be
- attempted rather than pathname completion using the builtin editors.
-04-01-08 A bug in the vi edit mode in which the wrong repeat count on
- multiple word replacements with the . directive has been fixed.
-04-01-06 Backspace characters are now handled correctly in prompt strings.
-04-01-06 The getopts builtin has been modified to accept numerical
- arguments of size long long on systems that support this.
-04-01-06 A bug in which unsetting all elements of an associative array
- would cause it to be treated as an indexed array has been fixed.
-03-12-15 A bug in which a quoted string ending with an unescaped $ would
- delete the ending $ in certain cases has been fixed.
-03-12-05 A bug in which the shell could hang when set -x tracing a command
- when an invalid multibyte character is encountered has been fixed.
-03-12-05 On some systems, if the KEYBD trap is set, then commands that use
- the meta key were not processed until return was hit. This
- has been fixed.
-03-12-05 A problem which occurred when the login shell was not a group
- leader that could cause it to fail has been fixed.
-03-12-05 A problem in which a shell could core dump after receiving a signal
- that should cause it to terminate while it was in the process
- of acquiring more space has been fixed.
-03-12-05 If ENV is not specified, the shell will default to $HOME/.kshrc
- for interactive shells.
-03-11-21 A bug introduced in ksh93o in which the DEBUG trap could get
- disabled after it triggered has been fixed.
-03-11-04 A bug in which using arithmetic prefix operators ++ or -- on a
- non-lvalue could cause a core dump has been fixed.
-03-11-04 A bug in which leading zeros were stripped from variable
- expansions within arithmetic computation to avoid being treated
- as octal constants when they should not have, has been fixed.
-03-10-08 A bug introduced in ksh93o in which a large here document inside
- a function definition could get corrupted has been fixed.
-03-09-22 A bug in which the .get discipline function was not being
- called when a string variable was implicitly referenced from
- within a numerical expression has been fixed.
-03-09-22 A bug in which a script without a leading #! could get executed
- by /bin/sh rather than the current shell on some systems has
- been fixed.
-03-09-12 To improve conformance with ksh88, leading zeros will be ignored
- when getting the numerical value of a string variable so that
- they will not be treated as octal constants.
-03-09-03 The builtin kill command now processes obsolete invocations
- such as kill -1 -pid.
-03-09-02 The restriction on modifying FPATH in a restricted shell (sh -r)
- has been documented.
-03-09-02 The restricted shell (sh -r) has been modified to disallow
- executing command -p.
-03-08-07 A bug in which the KEYBD trap was not being invoked when
- characters with the 8th bit set has been fixed.
-03-08-02 A parser bug introduced in ksh93o which caused the character
- after () in a Posix function definition to be skipped
- when reading from standard input has been fixed.
-03-08-01 A bug in which "${foo#pattern}(x)" treated (x) as if it were
- part of the pattern has been fixed.
-03-08-01 The command -x option has been modified so that any trailing
- arguments that do expand to a single word will be included
- on each invocation, so that commands like command -x mv * dir
- work as expected.
-
-03-07-20 --- Release ksh93o+ ---
-03-07-20 A bug in which could cause memory corruption when a posix
- function invoked another one has been fixed.
-03-07-15 A bug in which a file descriptor>2 could be closed before
- executing a script has been fixed.
-03-07-15 A parsing error for <() and >() process substitutions inside
- command substitution has been fixed.
-03-07-15 A parsing error for patterns of the form {...}(...) when
- used inside ${...} has been fixed.
-03-07-15 An error in which expanding an indexed array inside a compound
- variable could cause a core dump has been fixed.
-03-07-15 A bug in which on rare occasions a job completion interrupt
- could cause to core dump has been fixed.
-03-06-26 A bug in which process substitution embedded within command
- substitution would generate a syntax error has been fixed.
-03-96-23 A bug in which ${@:offset:len} could core dump when there
- were no arguments has been fixed.
-03-96-23 A bug in which ${X[@]:offset:len} could core dump when X
- was unset has been fixed.
-03-06-22 The -x option was added to the command builtin. If this
- option is on, and the number of arguments would exceed ARG_MAX,
- the command will be invoked multiple times with a subset of
- the arguments. For example, with alias grep='command -x grep,
- any number of arguments can be specified.
-03-06-14 A bug in which could cause a core dump on some systems with
- vi and emacs editors with the MULTIBYTE option has been fixed.
-03-06-06 A bug in which the shell could core dump when a script was
- run from its directory, and the script name a symlink to a file
- beginning with .., has been fixed.
-03-06-05 A bug in which the shell could core dump when a child process
- that it is unaware of terminates while it is calling malloc()
- has been fixed.
-03-06-02 An option named globstar (set -G) has been added. When enabled,
- during pathname expansion, any component that consists only of ** is
- matches all files and any number of directory levels.
-03-05-30 A bug in which the PATH search could give incorrect results when
- run from directory foo and PATH contained .:foo:xxx has been fixed.
-03-05-29 Some changes were made to the code that displays the prompt in edit
- mode to better handle escape sequences in the prompt.
-03-05-27 I added = to the list of characters that mark the beginning of
- a word for edit completion so that filenames in assignments
- can be completed.
-03-05-20 A bug in which read -N could hang on some systems when reading
- from a terminal or a pipe has been fixed.
-03-05-19 A bug in which the output of uname from a command substitution
- would go to the standard output of the invoking command when
- uname was invoked with a non-standard option has been fixed.
-03-05-19 A job control bug which would cause the shell to exit because
- it hadn't take back the terminal has been fixed. The bug
- could occur when running a function that contained a pipeline
- whose last element was a function.
-03-05-19 A job control timing bug introduced in ksh93o on some systems
- which could cause a pipeline to hang if the first component
- completed quickly has been fixed.
-03-05-13 The read builtin has been modified so that the builtin editors
- will not overwrite output from a previous incomplete line.
-03-05-13 A bug in which the name of an identifier could have the string
- .sh. prefixed to it after expanding a variable whose name begins
- with .sh. has been fixed.
-03-05-13 A bug in the expansion of $var for compound variables in which
- some elements would not be output when the name was a prefix
- of another name in the compound variable has been fixed.
-03-05-08 The last item in the ksh93o release on 03-01-02 has been
- altered slightly to preserve the leading 0's when the
- preceding character is a digit. Thus, with typeset -LZ3 x=10,
- $(( 1$x)) will be 1010 whereas $(( $x) will be 10.
-03-04-25 A bug in which if x is a name reference, then nameref y=x.foo
- did not follow x has been fixed.
-
-03-03-18 --- Release ksh93o ---
-03-03-18 A -N unary operator was added to test and [[...]] which returns
- true if the file exists and the file has been modified since it
- was last read.
-03-03-18 The TIMEFORMAT variable was added to control the format for
- the time compound command. The formatting description is
- described in the man page.
-03-03-06 A -N n option was added to read which causes exactly n bytes
- to be read unlike -n n which causes at most n bytes to be read.
-03-03-03 Three new shell variables were added. The variable .sh.file
- stores the full pathname of the file that the current command
- was found in. The variable .sh.fun names the current function
- that is running. The variable .sh.subshell contains the depth
- of the current subshell or command substitution.
-03-03-03 When the DEBUG trap is executed, the current command line after
- expansions is placed in the variable .sh.command. The trap
- is also now triggered before each iteration of a for, select,
- and case command and before each assignment and redirection.
-03-02-28 Function definitions are no longer stored in the history file so
- that set -o nolog no longer has any meaning.
-03-02-28 All function definitions can be displayed with typeset -f not
- just those stored in the history file. In addition, typeset +f
- displays the function name followed by a comment containing the
- line number and the path name for the file that defined this function.
-03-02-28 A bug in which the value of $LINENO was not correct when executing
- command contained inside mult-line command substitutions has been
- fixed.
-03-02-19 Since some existing ksh88 scripts use the undocumented and
- unintended ability to insert a : in front of the % and # parameter
- expansion operators, ksh93 was modified to accept :% as equivalent
- to % and :# as equivalent to # with ${name op word}.
-03-02-14 A bug which could cause a core dump when reading from standard
- error when standard error was a pty has been fixed.
-03-02-14 The shell arithmetic was modified to use long double on systems
- that provide this data type.
-03-02-09 A bug in which a function located in the first directory in FPATH
- would not be found when the last component of PATH was . and the
- current directory was one of the directories in PATH has been fixed.
-03-02-07 The trap and kill builtin commands now accept a leading SIG prefix
- on the signal names as documented.
-03-02-05 A bug in the expansion of ${var/$pattern}, when pattern contained
- \[ has been fixed.
-03-02-05 A bug in which .sh.match[n], n>0, was not being set for substring
- matches with % and %% has been fixed.
-03-01-15 A bug in which getopts did not work for numerical arguments specified
- as n#var in the getopts string has been fixed.
-03-01-09 A bug in which using ${.sh.match} multiple times could lead to
- a memory exception has been fixed.
-03-01-06 A bug in the expansion of ${var/pattern/$string} in the case that
- $string contains \digit has been fixed.
-03-01-02 A -P option was added for systems such as Solaris 8 that support
- profile shell.
-03-01-02 For backward compatibility with ksh88, arithmetic expansion
- with ((...)) and let has been modified so that if x is a zero-filled
- variable, $x will not be treated as an octal constant.
-
-02-12-05 --- Release ksh93n+ ---
-02-11-30 A bug that can show up in evaluating arithmetic statements that
- are in an autoloaded function when the function is autoload from
- another function has been fixed.
-02-11-30 An optimization bug in which an expansion of the form ${!name.@},
- which occurred inside a for or a while loop, when name is a name
- reference, has been fixed.
-02-11-18 A bug in which modifying array variables in a subshell could leave
- side effects in the parent shell environment has been fixed.
-02-11-18 A memory leak when unsetting an associative array has been fixed.
-02-11-14 The code to display compound objects was rewritten to make
- it easier for runtime extensions to reuse this code.
-02-11-14 A change was made to allow runtime builtins to be notified when
- a signal is received so that cleanup can be performed.
-02-10-31 User applications can now trap the ALRM signal. Previously,
- the ALRM signal was used internally and could not be used
- by applications.
-02-10-31 A bug in which signals received while reading from a coprocess
- for which traps were set was not handled correctly has been fixed.
-02-10-31 A bug in which a file opened with exec inside a subshell could
- be closed before the subshell completed has been fixed.
-02-10-21 A bug in which setting PATH or FPATH inside a function might not
- take effect has been fixed.
-02-10-21 A bug which could cause a core dump when a local SECONDS variable
- is defined in a function has been fixed.
-02-10-15 A bug in which the associate array name operator ${!array[@]}
- could return the same name multiple times has been fixed.
-02-10-15 A bug in which the zero'th element of an associative array was
- not getting set when an assignment was made without a subscript
- specified has been fixed.
-
-02-09-30 --- Release ksh93n ---
-02-09-30 The maximum indexed array size was increased to 16Megs.
-02-09-30 A bug which could cause a core dump when changing attributes
- of associative array has been fixed.
-02-09-30 A bug in which exporting an array variable would not export the
- 0-th element has been fixed.
-02-09-30 A bug in which an array assignment of the form a=($a ...) would unset
- 'a' before the right hand side was evaluated has been fixed.
-02-09-27 A bug in which the error message for ${var?message} when var was
- null or unset did not contain the variable name var has been fixed.
-02-09-27 A bug in which closing file descriptors 0 through 2 could
- cause a subsequent here document to fail has been fixed.
-02-09-14 A bug in whence which occurs when the specified name contained
- a / has been fixed.
-02-09-14 A bug in the parser for strings of the form name$((expr))=value
- has been fixed.
-02-09-14 A for loop optimization bug in which the number of elements in
- an array was treated as an invariant has been fixed.
-02-09-09 A bug in which redirection or closing of a file descriptor between
- 3 and 9 could cause a subsequent here document to fail has been
- fixed.
-02-09-09 A bug in which a background job was not removed from the job list
- when a subshell completed has been fixed, for example (prog&).
-02-09-03 A bug in which an assignment of the form name=(integer x=3)
- could be interpretted as an array assignment rather than a
- compound variable assignment has been fixed.
-02-08-19 A command completion bug which occurred on file systems that
- are case insensitive has been fixed.
-02-08-19 A bug which could lead to an exception on some systems (for
- example FREEBSD) which occurred when setting PATH has been fixed.
-02-08-11 A bug in arithmetic rounding in which a value input as a decimal
- string would output as a rounded version of the string has
- been fixed.
-02-08-11 A bug in which the last character could be deleted from shell
- traces and from whence when called from a multibyte locale
- has been fixed.
-02-08-01 A bug which could cause a core dump to occur when a shell script
- is executed while a coprocess is running that has closed the
- output pipe has been fixed.
-02-08-01 A bug in which command completion in multibyte mode could
- corrupt memory for long command lines has been fixed.
-
-02-06-17 --- Release ksh93n- ---
-02-06-17 A bug in which user defined macros could cause a core dump in
- with MULTIBYTE mode has been fixed.
-02-06-17 A bug in which printf format specifiers of the form %2$s were causing
- a core dump has been fixed.
-02-06-17 A bug in which setting stty to noecho mode did not prevent the
- echoing of characters by ksh when emacs or viraw mode
- was enabled has been fixed.
-02-06-17 A bug in which background job completion could cause the sleep
- builtin to terminate prematurely has been fixed.
-02-06-17 A bug in which the shell could core dump if getopts was called
- when the OPTIND variable contained a negative value has been fixed.
-02-06-10 The edit mode prompt has been modified to handle escape sequences.
-02-06-10 A bug which occurred for interactive shells in which the builtin
- cat command was used in command substitution on a file whose
- size was larger than PIPE_BUF has been fixed.
-02-06-10 A bug in which the trap on ERR was not being processed when
- set inside a function has been fixed.
-02-06-07 A bug in which function definitions could cause the history count
- to be decremented by one (and even become negative) has been fixed.
-02-06-05 A bug in read in which share mode could be enabled has been fixed.
-02-05-28 A bug which could occur when the last command of a script was
- a case statement and the action selected ended in ;& instead of ;;
- has been fixed.
-02-05-23 A bug with unary + introduced in ksh93k has been fixed.
-02-05-07 A bug in substitutions of the form ${var/pattern/string} in which
- a backslash was inserted in the replacement string when it contained
- a special pattern character has been fixed.
-02-05-01 A bug in the emacs edit mode which occurred in versions compiled
- for multibyte character sets which occurred when a repeated search
- was requested after a long line had been returned for the previous
- search has been fixed.
-02-04-02 vi and emacs edit modes were modified so that tab completion is
- disabled when invoked from the read built-in.
-
-02-03-26 --- Release ksh93m+ ---
-02-03-26 A bug in which \ was not handled correctly when used in file
- expansion has been fixed.
-02-02-18 A bug in which lines beginning with a # were deleted from here
- documents when the here-document delimiter was followed by
- a comment has been fixed.
-02-12-06 An optimization bug in which ${!x[@]) was treated as invariant in
- a for loop has been fixed.
-02-02-06 A bug in which the ERR trap is not cleared for a script invoked
- by name from within a function has been fixed.
-02-01-08 A bug in which a shell script executed from within a subshell
- could cause this script to have an invalid pointer leading
- to a memory fault has been fixed.
-02-01-07 Added here documents of the form <<< word (as per zsh) which
- is equivalent to << delim\nword\ndelim.
-02-01-07 A bug in which the first word of a compound assignment,
- x=(word ...), was treated as a reserved word has been fixed.
-02-01-07 A bug in the handling of \ when noglob was enabled and a
- substitution of the form ${word op pattern} occurred in the
- same word has been fixed.
-02-01-07 A compilation option, CMDLIB_BLTIN in the file OPTION, has
- been added. When this options is set, all commands implemented
- in libcmd become shell builtin commands by default.
-02-01-07 A bug in which builtin foo, where foo is already a builtin
- would result in the builtin foo getting removed has been fixed.
-02-01-07 A bug which the shell executed a command found in the current
- directory when PATH have no valid directories has been fixed.
-01-11-28 The value of $? was not being set when called with exit.
-01-11-28 If the last command was of the form (...) and a trap on EXIT or
- ERR was set, and the command inside () modified the trap, then
- the original trap wasn't executed.
-01-11-26 The value for 0 is now preceded by the base number when
- the base was not 10.
-01-11-26 The default has compilation mode has been changes so that
- viraw mode will always be on.
-
-01-10-31 --- Release ksh93m ---
-01-10-31 A for loop optimizer bug for subshells contained withing for
- loops has been fixed.
-01-10-16 typeset without arguments no longer outputs variable names
- that do not have any attributes that are set.
-01-10-16 A bug introduced in ksh93l in which assignments specified with
- the exec built-in were not being expanded properly has been
- fixed.
-01-10-11 An optimization bug in which ${!x) was treated as invariant in
- a for loop has been fixed.
-01-10-11 Unsigned integer variables in bases other than 10 are printed now
- expand in that base with the base prefix.
-01-10-10 A number of typos in the self generating man pages for shell
- built-ins have been fixed.
-01-10-04 The self generated man pages for hist and fc were not working
- correctly and have been fixed.
-01-10-03 Yet another optimizer bug in which shell patterns were
- treated as invariants has been fixed.
-01-09-27 Two bugs relating to multibyte history searches and to find
- have been fixed.
-01-09-27 A bug introduced in ksh93k in which the PATH searching was
- not restored after running a command with an assignment list
- has been fixed.
-01-09-26 A bug in which a zero filled field was treated as octal when
- converted to integer has been fixed.
-01-09-26 Yet another bug in the optimization of for loops related to
- recursive functions with break or continue statements has been fixed.
-01-09-25 The exponentiation operator ** was added to the shell arithmetic
- evaluation. It has higher precedence than * and is left
- associative.
-01-09-25 The code was modified to use the ast multibyte macros
- and functions for handing multibyte locales.
-01-09-25 The expansion ${parameter:offset:length} now handles negative
- offsets which cause offsets to be measured from the end.
-01-09-25 Some spelling errors in the documentation were corrected.
-01-09-24 The /dev/tcp/host/port and /dev/udp/host/port now allow
- the ports to be specified by service name.
-01-09-24 The change staring with ksh93g in which the the appropriate
- library path variable is prepended with a corresponding library
- directory has been modified. With the new method, only the
- library path defined in the file named .paths in the directory
- where the executable is found will be modified. See the
- man page for more details.
-01-09-23 The .fpath file (see ksh93h) is no longer looked for in each
- directory on the path to locate function directories. The
- file named .paths is used instead.
-01-09-23 A bug in which IFS was not being restored after being changed in
- a subshell has been fixed.
-01-09-16 With the vi and emacs edit modes, after a list of command
- or functions is generated with = or M-= respectively,
- any element from the list can be pasted on the command line
- by preceding the = or M-= with a numeric parameter specifying
- the position on the list.
-01-09-16 A bug in ksh93l caused command completion not to find aliases
- and functions. Command listing from the edit mode was presented
- in reverse order. This has been fixed.
-01-09-13 Another bug in the optimization of for loops related to subshells
- when traps were set has been fixed.
-01-09-07 A change in ksh93l caused brace expansion to stop working
- and this has been fixed.
-01-09-04 A bug introduced in ksh93k in which an arithmetic statement
- within a function that used name references did not follow the
- reference has been fixed.
-01-09-04 A bug introduced in ksh93l in which export -p did not prefix
- each export with the word export has been fixed.
-01-08-29 A bug in multibyte input which occurred when a partial multibyte
- character was received has been fixed.
-01-08-29 A bug introduced in ksh93l which could cause a core dump
- when an assignment list containing PATH is specified inside
- command substitution has been fixed.
-01-08-09 Another bug in the optimization of for loops in ksh93l caused
- errors in recursive functions using local variables that
- contained for loops has been fixed.
-01-07-27 A bug in which IFS would be unset after a command substitution
- inside a here document has been fixed.
-01-07-26 To conform to the POSIX standard, if you invoked ksh name,
- and name does not contain a /, it will first try to run
- one in the current directory whether it is executable or not
- before doing a path search for an executable script. Earlier
- versions first checked for an executable script using the
- PATH variable.
-01-07-23 A bug in which unset -f invoked in a subshell could unset a
- function defined in the parent has been fixed.
-01-07-16 A bug in the optimization of for loops in ksh93l caused
- name references to be treated as invariants has been fixed.
-01-07-09 A bug in which a discipline function applied to a local variable
- could cause a shell exception has been fixed. Discipline
- functions can only be specified for global variables.
-
-01-06-18 --- Release ksh93l ---
-01-06-18 A bug in assigning integers larger than can be represented as
- long integers to floating point variables has been fixed.
-01-06-18 A bug in the handling of unsigned integers (typeset -ui) has
- been fixed.
-01-06-04 The evaluation of the PS1 prompt no longer effects the value
- of the $? variable.
-01-06-01 A small memory leak from subshells has been fixed.
-01-05-22 A bug in which attributes for variables that did not have
- values would be lost after a subshell has been fixed.
-01-05-22 The %R format has been added to convert a shell pattern into
- an extended regular expression.
-01-05-22 The escape sequences \e, \cX, \C[.collating-element.], and
- \x{hex} have been added to ASCII-C strings and to printf format
- strings.
-01-05-20 Patterns of the form {n}(pattern) and {m,n}(pattern) are now
- recognized. The first form matches exactly n of pattern whereas,
- the second form matches from m to n instances of pattern.
-01-05-20 The shell allows *-(pattern), +-(pattern), ?-(pattern),
- {m,n}-(pattern}, and @-(pattern) to cause the minimal
- match of pattern to be selected whenever possible rather
- than the maximal (greedy) match.
-01-05-20 The character class [:word:] has been added to patterns.
- The word class is the union of [:alnum:] and the character _.
-01-05-20 Inside (...) pattern groups, the \ character is now treated
- specially even when in an enclosing character class. The
- sequences, \w, \d, \s are equivalent to the character classes
- word, digit, and space respectively. The sequences \W, \D,
- and \S are their complement sets.
-01-05-20 The shell now recognizes pattern groups of the form
- ~(options:pattern) where options or :pattern can be omitted.
- Options use the letters + and - to enable and disable options
- respectively. The option letters g (greedy), i (ignore case)
- are used to cause maximal matching and to cause case
- insensitive matching respectively. If :pattern is also
- specified, these options are only in effect while this
- pattern is being processed. Otherwise, these options remain
- in effect until the end of the pattern group that they are contained
- in or until another ~(...) is encountered. These pattern groups
- are not counted with respect to group numbering.
-01-05-14 When edit completion, expansion, or listing occurs in the
- middle of a quoted string, the leading quote is ignored when
- performing the completion, expansion, or listing.
-01-05-14 A small memory leak from subshells has been fixed.
-01-05-10 A bug in which open files were not restored after a subshell
- that had used exec to replace a file has been fixed.
-01-05-10 Redirection to a null file name now generates an error message.
-01-05-09 The shell now rejects some invalid parameter substitutions that
- were previously processed in undefined ways.
-01-05-09 A bug in which the output of select was not flushed before the
- read when input did not come from the terminal has been fixed.
-01-05-08 A bug in which job ids would not be freed for interactive shells
- when subshells ran built-ins in the background has been fixed.
-01-05-08 The FPATH variable now requires an explicit . to cause the
- current directory to be treated as a function directory.
-01-05-08 A bug in read -n when echo mode was disabled has been fixed.
-01-05-07 A bug in which function definitions could be listed as part
- of the history has been fixed.
-01-04-30 This release uses a new and often much faster pattern matcher than
- earlier releases.
-01-04-30 An optimizer now eliminates invariant parameter expansions from
- for while and until loops.
-01-04-30 The variable .sh.match is set after each pattern match (# % or /)
- in a variable substitution. The variable .sh.match is an
- indexed array with element 0 being the complete match.
- The array is only valid until the next subsequent pattern
- match or until the value of the variable changes which ever
- comes first.
-01-04-30 A self generating man page has been added to shcomp. Also,
- shcomp now stops compiling when it finds an exit or exec
- command and copies the remainder so that it can be used
- for standard input.
-01-04-30 The shcomp command was modified so that it can work in an
- EBCIDIC environment and that binary scripts are portable
- across environments.
-01-04-30 A bug in the handling of a trailing : in PATH has been fixed.
-01-04-30 A bug in which the builtin version of a command would get invoked
- even though the full pathname for the command was specified
- has been fixed.
-01-04-30 A bug in which read would loose the last character when
- reading the last line of a file that did not contain a new-line
- character has been fixed.
-01-04-23 A bug on some systems in which in vi mode the end of file
- character and end of line character could be swapped has
- been fixed.
-01-04-23 A bug on some systems in which invoking a shell script that
- did not have execute permission could set the exit value to
- 127 rather than 126 has been fixed.
-01-04-20 A bug in which read -n from a pipe would block if fewer than
- n characters was received has been fixed.
-01-04-09 A bug in which invalid patterns, for example, ) by itself,
- was not treated as a string has been fixed so that if i=')',
- then [[ $i == $i ]] is true.
-01-04-09 The shell arithmetic now interprets C character constants.
-01-04-09 A bug in which a non-zero return from a function defined
- with the function reserved word did not trigger the ERR
- trap or exit with set -e has been fixed.
-01-04-02 A bug on some systems, in which characters above 127 were
- not displayed correctly in vi or emacs edit mode has been fixed.
-01-04-02 A bug on some systems, introduced in the 'k' point release, in
- which the erase character in viraw mode was moving the cursor
- to the left without erasing the character has been fixed.
-01-04-02 On some systems the wcwith() function was returning a wrong
- value for characters and caused characters to be displayed
- incorrectly from the shell edit modes. A work around for
- this problem has been added.
-01-03-26 A bug in which valid scripts could produce syntax errors
- when run with locales that considered characters such as "'"
- to be space characters has been fixed.
-01-03-20 A bug in which an syntax error in an arithmetic expression
- entered interactively could cause the shell to go into
- an infinite loop outputting the error message has been fixed.
-01-03-10 ksh93 accepts -l as a synonym for -L in test on systems for
- which /bin/test -l tests for symbolic links.
-01-03-10 A bug in parsing scripts in which { and } are used in place of
- in and esac in case statements embedded in compound commands
- has been fixed. Use of { and } for in and esac is obsolete.
-01-03-06 A bug in which an argument of the form foo=bar was not
- being passed correctly to a traced function whose name
- was foo has been fixed.
-01-03-02 Using $(trap -p name) did not print the name of the current
- trap setting for trap name.
-01-02-26 Exported floating point variables gave incorrect results
- when passing them to ksh88. This has been fixed.
-01-02-25 A race condition in which a coprocess which completed too quickly
- would not allow subsequent coprocesses to start has been fixed.
-01-02-25 The 'g' format specifier is now handled by printf. It had
- inadvertently been omitted.
-01-02-20 The + was not being displayed during an execution trace
- with the += assignment operator.
-01-02-19 The error message which occurs when the interpreter name
- defined on the #! line does not exist is more informative.
-01-02-19 A bug in which $0 would not be set correctly when a
- script with #! was invoked by full pathname from the
- directory of the script has been fixed.
-01-02-19 A shell script did not always pick up tty mode changes
- made by external commands such as stty which could
- effect the behavior of read.
-01-02-19 The -u, -g, and -k unary tests did not give the correct
- results when used with negation and this has been fixed.
-
-01-02-05 --- Release ksh93k+ ---
-01-02-05 The sequence \<newline> inside $'...' was not incrementing
- the line count and this has been fixed.
-01-02-05 Modified expansion of "${@-}" so that if no arguments are set
- it results in null string rather than nothing.
-01-02-02 memory leak problem with local variables in functions fixed.
-01-01-25 allow arithmetic expressions with float%int and treat them
- as ((int)float)%int rather than as an error.
-01-01-19 read -n1 was not working and has been fixed.
-01-01-17 ksh now handles the case in which a here document in command
- substitution $() is terminated by the trailing ). Previously,
- a new-line was needed at the end of the delimiter word.
-01-01-02 A bug in which a KEYBD trap would cause a multi-line token
- to be processed incorrectly has been fixed.
-00-12-10 Arithmetic integer constants can now have L and U suffices.
-00-12-10 A bug in the processing of arithmetic expressions with compound
- variables when the -n option is on has been fixed.
-00-12-08 A bug in M-f and M-b from emacs mode has been fixed. This
- bug only occurs when ksh93 is compiled without MULTIBYTE enabled.
-00-11-29 A bug in which jobs -p would yield 0 for background
- jobs run in a script has been fixed.
-00-11-21 A bug in integer arrays in which the number of elements is
- incorrect when the ++ operator is applied to a non-existing
- element has been fixed. For example, integer x; ((x[3]++)).
-00-11-20 A timing bug in which the shell could reset the terminal
- group to the wrong value in the case that the a new process
- changes the terminal group during startup has been fixed.
-
-00-10-27 --- Release ksh93k ---
-00-10-27 Using tab for completion now works only when applied
- after a non-blank character at the end of the current line.
- In other case a tab is inserted.
-00-10-27 A bug in the emacs edit mode for ^X^E has been fixed.
- The ^X^E sequence is supposed to invoke the full editor
- on the current command.
-00-10-18 A bug in which expansions of the form ${var//pattern/string}
- did not work correctly when pattern was '/' or "/" has
- been fixed.
-00-10-18 The output format for indexed arrays in compound variables
- has been modified so that it can be used as input.
-00-10-18 Assignments with name references (typeset -n) will now
- implicitly unreference an existing name reference.
-00-10-17 A bug the += append operator when a single array element
- is appended to a variable that is not an array has been fixed.
-00-10-16 A bug in which the SIGCONT signal was being sent to
- each process will kill -0 or kill -n 0 has been fixed.
-00-10-12 The arithmetic evaluation portion has been rewritten to
- perform a number of optimizations.
-00-10-10 A bug in which name prefix matching ${!name.*} was not
- checking name to see if it was a name reference has been fixed.
-00-09-26 A bug in the multibyte version in which the width of for
- non-printing characters was not correct has been fixed.
-00-09-12 Made changes to get multibyte editing work on UWIN for windows
-00-09-12 A bug in which multibyte characters would be displayed incorrectly
- has been fixed.
-00-08-08 Removed build dependency on iswprint() and iswalph().
-00-07-20 In some cases the read builtin would read more than a single
- line from a pipe on standard input and therefore leave the seek
- position in the wrong location.
-00-07-05 If the directory / is on the path, a / will not be inserted
- between the directory and the file name during path searching
- to avoid searching // for systems that treat this specially.
-00-06-26 A bug in which on rare occasions wait could return before all
- jobs have completed has been fixed.
-00-06-21 A bug in which backspace did not work correctly during the
- R replace directive in vi-mode has been fixed.
-00-06-12 Added variable name completion/expansion/listing to the set of
- completions. Variable name completions begin with $ or "$ followed
- by a letter.
-00-05-09 --- Release ksh93j ---
-00-05-09 Modified command substitution to avoid using /tmp files when
- run on read-only file systems.
-00-04-17 Modified printf to handle '%..Xc' and '%..Xs' options where X
- is not an alpha character. Previous versions core dumped with this.
-00-04-10 Changes to multibyte editing code were made to use standard
- ISO C functions rather than methods devised before the standard.
-00-04-09 Add %H options to printf to output strings with <"'&\t> properly
- converted for use in HTML and XML documents.
-00-04-07 Modified getopts builtin to handle \f...\f in usage string
- by invoking specified function.
-00-04-04 Added self generating man pages for bg, fc, fg, disown, jobs,
- hist, let, ., and ulimit.
-00-03-30 The append operator += has been added and can be used
- for all assignments, strings, arrays, and compound variables.
-00-03-30 Code was modified in several places to support automatic
- generation of C locale dictionaries.
-00-03-28 A bug in which the set and trap commands invoked with --name
- type arguments would terminate the invoking script has
- been fixed.
-00-03-27 A bug in which the library path variable was not updated
- correctly on some systems as described in the 'g' point
- release has been fixed.
-00-03-07 printf now returns a non-zero exit status when one of
- its arguments cannot be converted to the given type.
-00-03-05 The return value and error message for a command that
- was found on the path but was not executable was set
- incorrectly.
-00-03-05 A prototype for ioctl() was removed from the vi edit mode.
-
-00-01-28 --- Release ksh93i ---
-00-01-28 Most of the built-in commands and ksh itself are now
- self documenting. Running command --man will produce
- screen output. Running command --html produces the
- man page in html format.
-00-01-28 The getopts builtin can process command description
- strings to produce man pages.
-00-01-28 A bug in which a script could terminate when getopts
- encountered an error when invoked inside a function
- has been fixed.
-00-01-28 When a symbolic link was specified as the name of
- the script to invoke by name, the value of $0 was
- set to the real file name rather than the link name
- in some cases and this has been fixed.
-00-01-28 A bug in which the precision given as an argument
- to printf was not working has been fixed.
-
-99-03-31 --- Release ksh93h ---
-99-03-31 The PATH search algorithm has been modified to look
- for a file named .fpath in each bin directory and if
- found, to search for functions in this directory if
- it cannot find the command in that directory.
-99-03-31 When performing pathname expansion, the shell checks
- to see whether each directory it reads is case sensitive
- or not, and performs the matching accordingly.
-99-03-31 The %T format for printing formatted date/time.
-99-03-31 The emacs and vi modes now handle arrow keys when
- they use standard ANSI escape sequences.
-99-03-31 The TAB key can be used for completion in emacs and viraw mode.
-99-03-31 A bug in setting .sh.editchar during the KEYBD trap
- for the MULTIBYTE option was fixed in release ksh93h.
-99-03-31 A bug in shcomp for compilation of unary operators with [[...]]
- has been fixed.
-99-03-31 A bug in which the value of $? was changed when executing
- a keyboard trap has been fixed.
-99-03-31 The handling of SIGCHLD has been changed so that the
- trap is not triggered while executing trap commands
- to avoid recursive trap calls.
-99-03-31 A bug in which a local variable in a function declared readonly
- would generated an error when the function went out of
- scope has been fixed.
-99-03-31 A bug in which \<new_line> entered from the keyboard
- with the KEYBD trap enabled has been fixed.
-99-03-31 The error message for a misplaced ((, for example print ((3),
- was often garbled and has been fixed.
-99-03-31 A bug in the KEYBD trap in which escape sequences of the form
- <ESC>[#~ were not being handled as a unit has been fixed.
-99-03-31 A bug in which ksh would consider expressions like [[ (a) ]]
- as syntax errors has been fixed.
-99-03-31 A function defined as foo() without a function body
- was not reported as a syntax error.
-99-03-31 A bug in which ksh could run out of file descriptors when
- a stream was repeatedly opened with exec and read from
- has been fixed.
-
-98-04-30 --- Release ksh93g ---
-98-04-30 The pipefail option has been added. With pipefail
- enabled, a pipeline will not complete until all
- commands are complete, and the return value will
- be that of the last command to fail, or zero if
- all complete successfully.
-98-04-30 The name-value pair library uses the cdt library rather
- than the hash library. This change should be transparent
- to applications.
-98-04-30 On the U/WIN version for Window 95 and Windows NT,
- when a directory beginning with a letter followed by
- a colon is given to cd, it is assumed to be an absolute
- directory
-98-04-30 When an executable is found on a given path,
- the appropriate library path variable is prepended
- with a corresponding library directory.
-98-04-30 A bug in which a name reference could be created to
- itself and later cause the shell to get into an infinite
- loop has been fixed.
-98-04-30 A bug in shcomp relating to compound variables was fixed.
-98-04-30 A bug introduced in ksh93e in which leading 0's in -Z
- fields caused the value to be treated as octal for arithmetic
- evaluation has been fixed.
-98-04-30 A bug when a name reference with a shorter name than
- the variable it references was the subject of a compound
- assignment has been fixed.
-98-04-30 A bug which in which assignment to array variables in
- a subshell could effect the parent shell has been
- fixed.
-98-04-30 read name?prompt was putting a 0 byte at the end of the
- prompt on standard error.
-98-04-30 A bug in [[ string1 > string2 ]] when ksh was run with -x
- has been fixed.
-98-04-30 A bug in which the escape character was not processed
- correctly inside {...} when brace expansion is enabled
- has been fixed, for example {\$foo}.
-98-04-30 A bug in line continuation in here-documents has been
- fixed.
-98-04-30 The default base when not specified with typeset -i is
- 10 in accordance with the documentation. Previously,
- the value was determined by the first assignment.
-98-04-30 A parsing bug in which a # preceded alphanumeric
- characters inside a command substitution caused
- a syntax error to be reported has been fixed.
-98-04-30 A bug in which a decimal constant represented as 10#ddd
- where ddd was more than five digits generated a syntax
- error has been fixed.
-98-04-30 A bug in here document expansion in which ${...} expansions
- were split across buffer boundaries has been fixed.
-98-04-30 The sh_fun() function now takes third argument which
- is an argument list for the invoked discipline function
- or built-in.
-98-04-30 A callback function can be installed which will give
- notification of file duplications and file closes.
-98-04-30 When ksh is compiled on systems that do not use fork()
- current option settings where not propagated to sub-shells.
-
-97-06-30 --- Release ksh93f ---
-97-06-30 Hostnames in addition to host addresses can be given in
- /dev/tcp/host/port virtual file names.
-97-06-30 File name completion and expansion now quotes special
- characters in file names from both emacs and vi edit modes.
-97-06-30 An empty for list behave like a for list with null expansions.
- It produces a warning message with sh -n.
-97-06-30 The code has been modified to work with EBCDIC as well as ASCII.
-97-06-30 A bug which would cause the secondary prompt to be
- displayed when a user entered a literal carriage
- return has been fixed.
-97-06-30 A bug which caused ksh read -s name to core dump was
- fixed.
-97-06-30 A bug with the expansion of \} and \] inside double
- quoted strings that also contained variable expansions
- has been fixed
-97-06-30 Changes in the ksh93e point release caused autoload
- functions invoked from within command substitution
- to fail. This has been fixed.
-97-06-30 A bug in the processing of here-documents that could
- prevent variable substitution to occur after $(...) command
- substitution for long here documents has been fixed.
-97-06-30 A bug caused by a race condition that could cause SIGTERM
- to be ignored by a child process has been fixed.
-97-06-30 A bug which prevented the startup of a coprocess immediately
- after killing a running coprocess has been fixed.
-97-06-30 ulimit foobar, where foobar is not an arithmetic
- expression, now gives an error message as it did with ksh88
- instead of setting the file size limit to 0.
-97-06-30 A bug which could cause an interactive shell to terminate when
- the last process of a pipeline was a POSIX function was fixed.
-97-06-30 A bug which could cause command substitution of a shell script
- to core dump has been fixed.
-97-06-30 A security hole was fixed in suid_exec.
-97-06-30 Arithmetic functions such as pow() that take more than
- one argument, did not work if arguments other than the
- first contained parenthesized sub-expression.
-97-06-30 The error message from a script containing an incomplete
- arithmetic expression has been corrected.
-97-06-30 A bug which caused a core dump on some machines when
- the value of a name reference contained a positional
- parameter and the name reference was not defined inside
- a function has been fixed.
-97-06-30 Arithmetic expressions now correctly handle hexadecimal
- constants.
-97-06-30 A bug in which integer variables could be expanded
- with a leading 10# when declared with typeset -i
- multiple times has been corrected.
-97-06-30 A bug in which IFS wasn't correctly restored when
- set within command substitution has been fixed.
-97-06-30 The _ character is now considered as part of a word
- with the M-f and M-b emacs directives as it was in ksh88.
-97-06-30 A bug in brace pattern expansions that caused expressions
- such as {foo\,bar,bam} to expand incorrectly have been fixed.
-
-
-96-07-31 --- Release ksh93e ---
-96-07-31 The math functions, atan2, hypot, fmod, and pow were added.
-96-07-31 When a shared library is loaded, if the function lib_init()
- is defined in the library, it is invoked the first time that
- the library is loaded with builtin -f library.
-96-07-31 The k-shell information abstraction database option, KIA,
- has been revamped.
-96-07-31 Empty command substitutions of the form $() now work.
- whence -v foo now gives the correct result after calling
- builtin -d foo.
-96-07-31 A bug in right to left arithmetic assignment for which
- the arithmetic expression (( y = x = 1.5 )) did not
- yield 1 for y when x was declared typeset -i was fixed.
-96-07-31 printf has been fixed to handle format containing \0
- and/or \0145 correctly. In addition, characters following
- %b in the format string are no longer displayed when
- the operand contains \c.
-96-07-31 A bug in printf that could cause the %E format to
- produce unnormalized results has been fixed.
-96-07-31 A bug which causes some arithmetic expressions to be
- incorrectly evaluated as integer expressions rather
- that floating point has been fixed.
-96-07-31 Functions defined inside a subshell no longer remain
- defined when the subshell completes.
-96-07-31 The error message from sh -c ';echo foo' has been
- corrected.
-96-07-31 The format for umask -S has been changed to agree
- with the specification in the POSIX standard.
-96-07-31 A bug that caused side effects in subscript evaluation
- when tracing was enabled for subscripts using ++ or --
- has been fixed.
-96-07-31 To conform to the Posix standard getopts has been changed
- so that the option char is set to ? when it returns with
- a non-zero exit status.
-96-07-31 The handling of \} inside ${name...} has been fixed so
- that the \ quotes the }.
-96-07-31 A bug that caused the read builtin to resume execution
- after processing a trap has been fixed.
-96-07-31 [[ -s file ]] has been fixed so that if file is open
- by ksh, it is flushed first.
-96-07-31 In some cases attributes and sizes for non exported
- variables weren't being reset before running a script.
-96-07-31 The value of TMOUT was affected by changes make to
- it in a subshell.
-96-07-31 The jobs command did not reflect changes make by
- sending the CONT signal to a command.
-96-07-31 The error message for ksh -o unknown was incorrect.
-96-07-31 Functions invoked as name=value name, did not use
- values from the calling scope when evaluating value.
-96-07-31 A bug in which the shell would reexecute previously
- executed code when a shell script or coprocess was
- run in the background has been fixed.
-96-07-31 A bug in which an empty here-document would leave
- a file descriptor open has been fixed.
-96-07-31 A bug in which $(set -A array ...) would leave a
- side effect has been fixed.
-96-07-31 A discipline function for a global variable defined
- within a function defined with the function keyword,
- incorrectly created a local variable of the same name
- and applied the discipline to it.
-
-95-08-28 --- Release ksh93d ---
-95-08-28 The \ character was not handled correctly in replacement
- patterns with ${x/pattern/replace}.
-95-08-28 A bug with read in which the line did not end with
- a new-line has been fixed.
-95-08-28 A bug in file name generation which sometimes
- appended a . for filenames that ended in / has
- been fixed.
-95-08-28 If a process is waited for after a status has
- been returned by a previous wait, wait now
- returns 127.
-95-08-28 A bug with hist (fc) -e which prevented a command
- to re-executed after it had been edited has been fixed.
-95-08-28 A bug which prevented quoting from removing the meaning
- of unary test operators has been fixed.
-95-08-28 A bug with typeahead and KEYBOARD traps with the
- MULTIBYTE option set has been fixed.
-95-08-28 Builtin functions can take a third argument which is
- a void*.
-95-08-28 The nv_scan() function can restrict the scope of a walk
- to the top scope.
-
-95-04-31 --- Release ksh93c ---
-95-04-31 The expansion of "$@" was incorrect when $1 was the null
- string.
-95-04-31 A bug which could incorrectly report a syntax error in
- a backquoted expression when a $ was preceded by \\
- has been fixed.
-95-04-31 A bug which prevented the shell from exiting after
- reporting an error when failing to open a script
- has been fixed.
-95-04-31 A bug that could lead to memory corruption when a
- large here document that required parameter or command
- substitution was expanded has been fixed.
-95-04-31 A bug that could cause a core dump on some systems
- after ksh detected an error when reading a function
- has been fixed.
-95-04-31 A bug which could cause a coprocess to hang when
- reading from a process that has terminated has been fixed.
-95-04-31 A bug which caused a script to terminate when set -e
- was on and the first command of and && or || list
- failed has been fixed.
-95-04-31 A bug with here documents inside $(...) when the delimiter
- word is an identifier has been fixed.
-95-04-31 A bug which caused $0 to display the wrong value when
- a script was invoked as an argument to the . command
- and the eval command has been fixed.
-95-04-31 A bug that could cause the built-in sleep to hang
- has been fixed.
-95-04-31 A bug introduces in 12/28/93b which caused the backslash
- to be removed when it was followed by digit inside double
- quotes in some instances has been fixed.
-95-04-31 A bug which could cause a core dump if ksh was invoked with
- standard input closed has been fixed.
-95-04-31 A bug which could cause a core dump if typeset -A was
- specified for an existing variable has been fixed.
-95-04-31 Variables that were unset but had attributes such as readonly
- and export were not listed with readonly, export and typeset.
-95-04-31 Several problems with signals have been fixed.
-95-04-31 A bug which prevented ulimit -t from working has been fixed.
- Also, a bug in which failed ulimits could cause a core dump
- has also been fixed.
-95-04-31 A bug in expansion of the form ${name/#pattern/string} and
- ${name/%pattern/string} has been fixed.
-95-04-31 A bug which caused read -r on a line that contained only
- blanks to get a non-null value has been fixed.
-95-04-31 A bug introduced in the 'a' point release in which
- ${x='\\'} expanded to \ when x was unset has been fixed.
-95-04-31 A bug which prevented a trap on EXIT from being executed
- when the last command in a script was a function invocation
- has been fixed.
-95-04-31 A bug which caused an interactive shell ignore input when
- standard error was redirected to a file with exec,
- and then restored with exec 2>&1 has been fixed.
-95-04-31 An interactive shell turns on monitor mode even when
- standard error has been redirected to a file.
-95-04-31 A bug which could cause standard input to be incorrectly
- positioned for the last command of a script has been fixed.
-95-04-31 A bug in the edit modes which allowed walking back in
- the history file for more than HISTSIZE commands has
- been fixed.
-95-04-31 A bug which could cause a core dump if variable TMPDIR was
- changed between two command substitutions has been fixed.
-95-04-31. A bug which prevented a trap on EXIT from being cleared
- has been fixed.
-95-04-31 A bug fixed for the v directive in vi MULTIBYTE has been
- fixed.
-95-04-31 Code to for IFS handling of multibyte characters has
- been added.
-95-04-31 The displaying of multibyte strings in export, readonly,
- typeset, and execution traces has been fixed.
-95-04-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-95-04-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-95-04-31 A few changes have been made to the name-value library
- that affect built-ins that use disciplines. The
- changes allow disciplines to be shared by variables
- and should make it possible to add new disciplines
- without recompilation.
-95-04-31 The name-value library interface has undergone significant
- change for this revision. See the new nval.3 man page.
-
-94-12-31 --- Release ksh93b ---
-94-12-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-94-12-31 If IFS contains two consecutive identical characters belonging
- to the [:space:] class, then this character is treated as
- a non-space delimiter so that each instance will delimit
- a field. For example, IFS=$'\t\t' will cause two consecutive
- tabs to delimit a null field.
-94-12-31 The getopts command has a -a name option that specifies a
- name that will be used for usage messages.
-94-12-31 A bug which caused unset RANDOM to dump core has been
- fixed.
-94-12-31 A bug which prevented return for terminating a profile
- or ENV file has been fixed.
-94-12-31 A bug which prevented standard input from being
- directed to /dev/null for background jobs when
- monitor mode was turned off has been fixed.
-94-12-31 Statements of the form typeset -options var[expr]=value
- did not perform substitutions on expr as expected.
-94-12-31 A bug which prevented the shell from sending a HUP
- signal to some background jobs that were not disowned
- has been fixed.
-94-12-31 A bug which allowed a script to trap signals that are
- ignored at the time that the shell was invoked by exec
- has been fixed.
-94-12-31 A bug which could cause a core dump when a discipline
- function was unset within a discipline was fixed.
-94-12-31 The typeset builtin now accepts a first argument of
- + or - for compatibility with ksh88.
-94-12-31 For compatibility with ksh88, the results of expansions
- of command arguments will treat the extended character
- match characters ()|& as ordinary characters.
-94-12-31 A bug which caused read to fail on a file that was
- open for read/write with <> when the first operation
- was print or printf has been fixed.
-94-12-31 When a job is suspended, it is put on the top of
- the job list as required by the POSIX standard.
-94-12-31 The value of OPTARG when an option that required
- an argument but didn't have one was incorrect in the
- case the the option string began with a :.
-94-12-31 A bug which caused the terminal to get into a bad
- state with some KEYBD traps in vi-mode has been fixed.
-94-12-31 A bug which caused an invalid trap to cause a script
- to terminate, rather than just return an error, has
- been fixed.
-94-12-31 Backreferencing sub-expressions in patterns and replacement
- strings now works.
-94-12-31 A bug in chmod which caused the -R option to fail has
- been fixed.
-94-12-31 More signal names have been added for Solaris
-
-94-06-30 --- Release ksh93a ---
-94-06-30 An expansion bug which causes portions of a word after
- a $((...)) expansion that contains a nested $var expansion
- to be lost has been fixed.
-94-06-30 A bug that caused a core dump when a script that did not
- have PWD set and did a cd inside command substitution
- has been fixed.
-94-06-30 A bug which caused a core dump on some machines when
- the LANG variable was assigned to has been fixed.
-94-06-30 A bug which incorrectly handled set disciplines that
- performed arithmetic evaluation when the discipline
- was called from the arithmetic evaluator has been fixed.
-94-06-30 A bug caused by an EXIT trap inside a function that
- was executed in a subshell was fixed.
-94-06-30 If foo is a function, and not a program, then command foo
- now reports that foo isn't found rather than invoking foo.
-94-06-30 The previous version incorrectly listed -A as an
- invocation option. The -A option is only for set.
-94-06-30 A bug was fixed which caused ksh to loop when execution trace
- was enabled and the PS4 prompt required command substitution.
-94-06-30 A bug which could cause the job control switch character
- to be disabled when a script that enabled monitor mode
- terminated was fixed.
-94-06-30 A bug in the macro expansion global replacement operator //,
- when the pattern began with a [ or +( has been fixed.
-94-06-30 A bug which prevented ~ expansion from occurring when
- it was terminated with a colon inside an assignment
- has been fixed.
-94-06-30 A bug in the dot command which prevented autoload functions
- from working has been fixed.
-94-06-30 A bug which caused a variable to be unset if the
- its value were expanded inside a set discipline has
- been fixed.
-94-06-30 Whence -a now longer reports that a defined function
- is undefined.
-94-06-30 A bug on some systems in which $0 would be incorrect
- in scripts invoked by name has been fixed.
-94-06-30 Here documents with an empty body now work.
-94-06-30 A bug which disabled argument passing and resetting
- of options for a script invoked by name inside a
- function has been fixed.
-94-06-30 A bug in which an EXIT trap set the caller of a function
- would be executed if a command called inside a function
- was not found has been fixed.
-94-06-30 A bug which allowed a script to trap signals that are
- ignored at the time that the shell was invoked has
- been fixed.
-94-06-30 A bug which caused 2<&1- when applied to a shell built-in
- to leave standard input closed has been fixed.
-94-06-30 A bug which caused the shell to incorrectly parse
- $() command substitutions with nested case statements
- has been fixed.
-
diff --git a/usr/src/lib/libshell/common/TYPES b/usr/src/lib/libshell/common/TYPES
deleted file mode 100644
index 6eb6f41b5e..0000000000
--- a/usr/src/lib/libshell/common/TYPES
+++ /dev/null
@@ -1,182 +0,0 @@
-
-The ability for users to define types has been added to ksh93t.
-Here is a quick summary of how types are defined and used in ksh93t.
-This is still a work in progress so some changes and additions
-are likely.
-
-A type can be defined either by a shared library or by using the new
-typeset -T option to the shell. The method for defining types via
-a shared library is not described here. However, the source file
-bltins/enum.c is an example of a builtin that creates enumeration types.
-
-By convention, typenames begin with a capitol letter and end in _t.
-To define a type, use
- typeset -T Type_t=(
- definition
- )
-where definition contains assignment commands, declaration commands,
-and function definitions. A declaration command (for example typeset,
-readonly, and export), is a built-in that differs from other builtins in
-that tilde substitution is performed on arguments after an =, assignments
-do not have to precede the command name, and field splitting and pathname
-expansion is not performed on the arguments.
-For example,
- typeset -T Pt_t=(
- float -h 'length in inches' x=1
- float -h 'width in inches' y=0
- integer -S count=0
- len()
- {
- print -r $((sqrt(_.x*_.x + _.y*_.y)))
- }
- set()
- {
- (( _.count++))
- }
- )
-
-defines a type Pt_t that has three variables x, y, and count defined as well
-as the discipline functions len and set. The variable x has an initial value
-of 1 and the variable y has an initial value of 0. The new -h option argument,
-is used for documentations purposes as described later and is ignored outside
-of a type definition.
-
-
-The variable count has the new -S attribute which means that it is shared
-between all instances of the type. The -S option to typeset is ignored
-outside of a type definition. Note the variable named _ that is used inside
-the function definition for len and set. It will be a reference to the
-instance of Pt_t that invoked the function. The functions len and set
-could also have been defined with function len and function set, but
-since there are no local variables, the len() and set() form are more
-efficient since they don't need to set up a context for local variables
-and for saving and restoring traps.
-
-If the discipline function named create is defined it will be
-invoked when creating each instance for that type. A function named
-create cannot be defined by any instance.
-
-When a type is defined, a declaration built-in command by this name
-is added to ksh. As with other shell builtins, you can get the man page
-for this newly added command by invoking Pt_t --man. The information from
-the -h options will be embedded in this man page. Any functions that
-use getopts to process arguments will be cross referenced on the generated
-man page.
-
-Since Pt_t is now a declaration command it can be used in the definition
-of other types, for example
- typeset -T Rect_t=( Pt_t ur ll)
-
-Because a type definition is a command, it can be loaded on first reference
-by putting the definition into a file that is found on FPATH.
-Thus, if this definition is in a file named Pt_t on FPATH, then
-a program can create instances of Pt_t without first including
-the definition.
-
-A type definition is readonly and cannot be unset. Unsetting non-shared
-elements of a type restores them to their default value. Unsetting a
-shared element has no effect.
-
-The Pt_t command is used to create an instance of Pt_t.
- Pt_t p1
-creates an instance named p1 with the initial value for p1.x set to 1
-and the initial value of p1.y set to 0.
- Pt_t p2=(x=3 y=4)
-creates an instance with the specified initial values. The len function
-gives the distance of the point to the origin. Thus, p1.len will output
-1 and p2.len will output 5.
-
-ksh93t also introduces a more efficient command substitution mechanism.
-Instead of $(command), the new command substitution ${ command;}
-can be used. Unlike (and ) which are always special, the { and } are
-reserved words and require the space after { and a newline or ; before }.
-Unlike $(), the ${ ;} command substitution executes the command in
-the current shell context saving the need to save and restore
-changes, therefore also allowing side effects.
-
-When trying to expand an element of a type, if the element does not exist,
-ksh will look for a discipline function with that name and treat this as if
-it were the ${ ;} command substitution. Thus, ${p1.len} is equivalent to
-${ p1.len;} and within an arithmetic expression, p1.len will be expanded
-via the new command substitution method.
-
-The type of any variable can be obtained from the new prefix
-operator @. Thus, ${@p1} will output Pt_t.
-
-By default, each instance inherits all the discipline functions defined
-by the type definition other than create. However, each instance can define
-a function by the same name that will override this definition.
-However, only discipline functions with the same name as those defined
-by the type or the standard get, set, append, and unset disciplines
-can be defined by each instance.
-
-Each instance of the type Pt_t behaves like a compound variable except
-that only the variables defined by the type can be referenced or set.
-Thus, p2.x=9 is valid, but p2.z=9 is not. Unless a set discipline function
-does otherwise, the value of $p1 will be expanded to the form of a compound
-variable that can be used for reinput into ksh.
-
-If the variables var1 and var2 are of the same type, then the assignment
- var2=var1
-will create a copy of the variable var1 into var2. This is equivalent to
- eval var2="$var1"
-but is faster since the variable does not need to get expanded or reparsed.
-
-The type Pt_t can be referenced as if it were a variable using the name
-.sh.type.Pt_t. To change the default point location for subsequent
-instances of Pt_t, you can do
- .sh.type.Pt_t=(x=5 y=12)
-so that
- Pt_t p3
- p3.len
-would be 13.
-
-Types can be defined for simple variables as well as for compound
-objects such as Pt_t. In this case, the variable named . inside
-the definition refers to the real value for the variable. For example,
-the type definition
- typeset -T Time_t=(
- integer .=0
- _='%H:%M:%S'
- get()
- {
- .sh.value=$(printf "%(${_._})T" "#$((_))" )
- }
- set()
- {
- .sh.value=$(printf "%(%#)T" "${.sh.value}")
-
- }
- )
-
-The sub-variable name _ is reserved for data used by discipline functions
-and will not be included with data written with the %B option to printf.
-In this case it is used to specify a date format.
-
-In this case
- Time_t t1 t2=now
-will define t1 as the time at the beginning of the epoch and t2
-as the current time. Unlike the previous case, $t2 will output
-the current time in the date format specified by the value t2._.
-However, the value of ${t2.} will expand the instance to a form
-that can be used as input to the shell.
-
-Finally, types can be derived from an existing type. If the first
-element in a type definition is named _, then the new type
-consists of all the elements and discipline functions from the
-type of _ extended by elements and discipline functions defined
-by new type definition. For example,
-
- typeset -T Pq_t=(
- Pt_t _
- float z=0.
- len()
- {
- print -r $((sqrt(_.x*_.x + _.y*_.y + _.z*_.z)))
- }
- )
-
-defines a new type Pq_t which is based on Pq_t and contains an additional
-field z and a different len discipline function. It is also possible
-to create a new type Pt_t based on the original Pt_t. In this case
-the original Pt_t is no longer accessible.
diff --git a/usr/src/lib/libshell/misc/images/callouts/1.png b/usr/src/lib/libshell/misc/images/callouts/1.png
deleted file mode 100644
index 608fad3596..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/1.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/10.png b/usr/src/lib/libshell/misc/images/callouts/10.png
deleted file mode 100644
index 39e55197cf..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/10.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/2.png b/usr/src/lib/libshell/misc/images/callouts/2.png
deleted file mode 100644
index 5444738841..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/2.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/3.png b/usr/src/lib/libshell/misc/images/callouts/3.png
deleted file mode 100644
index 64b87c7151..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/3.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/4.png b/usr/src/lib/libshell/misc/images/callouts/4.png
deleted file mode 100644
index c308193ac4..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/4.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/5.png b/usr/src/lib/libshell/misc/images/callouts/5.png
deleted file mode 100644
index 24799f0a43..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/5.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/6.png b/usr/src/lib/libshell/misc/images/callouts/6.png
deleted file mode 100644
index 8919a670cd..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/6.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/7.png b/usr/src/lib/libshell/misc/images/callouts/7.png
deleted file mode 100644
index e30e8a70cb..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/7.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/8.png b/usr/src/lib/libshell/misc/images/callouts/8.png
deleted file mode 100644
index 3e35c8827c..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/8.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/9.png b/usr/src/lib/libshell/misc/images/callouts/9.png
deleted file mode 100644
index ed2f14b4eb..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/9.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_bourne.png b/usr/src/lib/libshell/misc/images/tag_bourne.png
deleted file mode 100644
index f1f78e3a25..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_bourne.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_i18n.png b/usr/src/lib/libshell/misc/images/tag_i18n.png
deleted file mode 100644
index 559929df2a..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_i18n.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh.png b/usr/src/lib/libshell/misc/images/tag_ksh.png
deleted file mode 100644
index b33d5a7aa1..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh88.png b/usr/src/lib/libshell/misc/images/tag_ksh88.png
deleted file mode 100644
index d36dc0f5f5..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh88.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh93.png b/usr/src/lib/libshell/misc/images/tag_ksh93.png
deleted file mode 100644
index 357ee3c50a..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh93.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_l10n.png b/usr/src/lib/libshell/misc/images/tag_l10n.png
deleted file mode 100644
index be89f7a163..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_l10n.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_perf.png b/usr/src/lib/libshell/misc/images/tag_perf.png
deleted file mode 100644
index fcb2960852..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_perf.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/shell_styleguide.docbook b/usr/src/lib/libshell/misc/shell_styleguide.docbook
deleted file mode 100644
index 0376912d1f..0000000000
--- a/usr/src/lib/libshell/misc/shell_styleguide.docbook
+++ /dev/null
@@ -1,1464 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V5.0//EN" "http://www.oasis-open.org/docbook/xml/5.0b5/dtd/docbook.dtd" [
- <!ENTITY tag_bourneonly '<inlinemediaobject><imageobject><imagedata fileref="images/tag_bourne.png"></imagedata></imageobject><textobject><phrase>[Bourne]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_kshonly '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh.png"></imagedata></imageobject><textobject><phrase>[ksh]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_ksh88only '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh88.png"></imagedata></imageobject><textobject><phrase>[ksh88]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_ksh93only '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh93.png"></imagedata></imageobject><textobject><phrase>[ksh93]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_performance '<inlinemediaobject><imageobject><imagedata fileref="images/tag_perf.png"></imagedata></imageobject><textobject><phrase>[perf]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_i18n '<inlinemediaobject><imageobject><imagedata fileref="images/tag_i18n.png"></imagedata></imageobject><textobject><phrase>[i18n]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_l10n '<inlinemediaobject><imageobject><imagedata fileref="images/tag_l10n.png"></imagedata></imageobject><textobject><phrase>[l10n]</phrase></textobject></inlinemediaobject> '>
-]>
-<!--
-
- CDDL HEADER START
-
- The contents of this file are subject to the terms of the
- Common Development and Distribution License (the "License").
- You may not use this file except in compliance with the License.
-
- You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- or http://www.opensolaris.org/os/licensing.
- See the License for the specific language governing permissions
- and limitations under the License.
-
- When distributing Covered Code, include this CDDL HEADER in each
- file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- If applicable, add the following below this CDDL HEADER, with the
- fields enclosed by brackets "[]" replaced with your own identifying
- information: Portions Copyright [yyyy] [name of copyright owner]
-
- CDDL HEADER END
-
--->
-
-<!--
-
- Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- Use is subject to license terms.
-
--->
-
-<!-- tag images were created like this:
-$ (text="perf" ;
- pbmtext -nomargins -lspace 0 -builtin fixed "${text}" |
- pbmtopgm 1 1 |
- pgmtoppm 1.0,1.0,1.0-0,0,0 /dev/stdin |
- ppmtogif |
- giftopnm |
- pnmtopng >"tag_${text}.png")
--->
-
-<!-- compile with:
-xsltproc &minus;&minus;stringparam generate.section.toc.level 0 \
- &minus;&minus;stringparam toc.max.depth 3 \
- &minus;&minus;stringparam toc.section.depth 12 \
- &minus;&minus;xinclude -o opensolaris_shell_styleguide.html /usr/share/sgml/docbook/docbook-xsl-stylesheets-1.69.1/html/docbook.xsl opensolaris_shell_styleguide.docbook
--->
-
-<article
- xmlns:xlink="http://www.w3.org/1999/xlink"
- xmlns="http://docbook.org/ns/docbook"
- xml:lang="en">
- <!-- xmlns:xi="http://www.w3.org/2001/XInclude" -->
-
- <info>
- <title><emphasis>[DRAFT]</emphasis> Bourne/Korn Shell Coding Conventions</title>
-
- <!-- subtitle abuse -->
- <subtitle>
- This page is currently work-in-progress until it is approved by the OS/Net community. Please send any comments to
- <email>shell-discuss@opensolaris.org</email>.
- </subtitle>
-
-
- <authorgroup>
-<!--
- <author><personname>David G. Korn</personname><email>dgk@research.att.com</email></author>
- <author><personname>Roland Mainz</personname><email>roland.mainz@nrubsig.org</email></author>
- <author><personname>Mike Shapiro</personname><email>mike.shapiro@sun.com</email></author>
--->
- <author><orgname>OpenSolaris.org</orgname></author>
- </authorgroup>
- </info>
-
-<section xml:id="intro">
- <title>Intro</title>
- <para>This document describes the shell coding style used for all the SMF script changes integrated into (Open)Solaris.</para>
- <para>All new SMF shell code should conform to this coding standard, which is intended to match our existing C coding standard.</para>
- <para>When in doubt, think "what would be the C-Style equivalent ?" and "What does the POSIX (shell) standard say ?"</para>
-</section><!-- end of intro -->
-
-
-<section xml:id="rules">
- <title>Rules</title>
-
-
-
- <section xml:id="general">
- <title>General</title>
-
- <section xml:id="basic_format">
- <title>Basic Format</title>
- <para>Similar to <literal>cstyle</literal>, the basic format is that all
- lines are indented by TABs or eight spaces, and continuation lines (which
- in the shell end with "\") are indented by an equivalent number of TABs
- and then an additional four spaces, e.g.
-<programlisting>
-cp foo bar
-cp some_realllllllllllllllly_realllllllllllllly_long_path \
- to_another_really_long_path
-</programlisting>
- </para>
- <para>The encoding used for the shell scripts is either <literal>ASCII</literal>
- or <literal>UTF-8</literal>, alternative encodings are only allowed when the
- application requires this.</para>
- </section>
-
-
- <section xml:id="commenting">
- <title>Commenting</title>
- <para>Shell comments are preceded by the '<literal>#</literal>' character. Place
- single-line comments in the right-hand margin. Use an extra '<literal>#</literal>'
- above and below the comment in the case of multi-line comments:
-<programlisting>
-cp foo bar # Copy foo to bar
-
-#
-# Modify the permissions on bar. We need to set them to root/sys
-# in order to match the package prototype.
-#
-chown root bar
-chgrp sys bar
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="interpreter_magic">
- <title>Interpreter magic</title>
- <para>The proper interpreter magic for your shell script should be one of these:
-<programlisting>
-#!/bin/sh Standard Bourne shell script
-#!/bin/ksh -p Standard Korn shell 88 script. You should always write ksh
- scripts with -p so that ${ENV} (if set by the user) is not
- sourced into your script by the shell.
-#!/bin/ksh93 Standard Korn shell 93 script (-p is not needed since ${ENV} is
- only used for interactive shell sessions).
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="harden_your_script_against_unexpected_input">
- <title>Harden the script against unexpected (user) input</title>
- <para>Harden your script against unexpected (user) input, including
- command line options, filenames with blanks (or other special
- characters) in the name, or file input</para>
- </section>
-
-
- <section xml:id="use_builtin_commands">
- <title>&tag_kshonly;&tag_performance;Use builtin commands if the shell provides them</title>
- <para>
- Use builtin commands if the shell provides them. For example ksh93s+
- (ksh93, version 's+') delivered with Solaris (as defined by PSARC 2006/550)
- supports the following builtins:
- <simplelist type="inline">
- <member>basename</member>
- <member>cat</member>
- <member>chgrp</member>
- <member>chmod</member>
- <member>chown</member>
- <member>cmp</member>
- <member>comm</member>
- <member>cp</member>
- <member>cut</member>
- <member>date</member>
- <member>dirname</member>
- <member>expr</member>
- <member>fds</member>
- <member>fmt</member>
- <member>fold</member>
- <member>getconf</member>
- <member>head</member>
- <member>id</member>
- <member>join</member>
- <member>ln</member>
- <member>logname</member>
- <member>mkdir</member>
- <member>mkfifo</member>
- <member>mv</member>
- <member>paste</member>
- <member>pathchk</member>
- <member>rev</member>
- <member>rm</member>
- <member>rmdir</member>
- <member>stty</member>
- <member>tail</member>
- <member>tee</member>
- <member>tty</member>
- <member>uname</member>
- <member>uniq</member>
- <member>wc</member>
- <member>sync</member>
- </simplelist>
- Those builtins can be enabled via <literal>$ builtin name_of_builtin #</literal> in shell
- scripts (note that ksh93 builtins implement exact POSIX behaviour - some
- commands in Solaris <filename>/usr/bin/</filename> directory implement pre-POSIX behaviour.
- Add <literal>/usr/xpg6/bin/:/usr/xpg4/bin</literal> before
- <filename>/usr/bin/</filename> in <envar>${PATH}</envar> to test whether your script works with
- the XPG6/POSIX versions)
- </para>
- </section>
-
-
- <section xml:id="use_blocks_not_subshells">
- <title>&tag_performance;Use blocks and not subshells if possible</title>
- <para>Use blocks and not subshells if possible, e.g. use
- <literal>$ { print "foo" ; print "bar" ; }</literal> instead of
- <literal>$ (print "foo" ; print "bar") #</literal> - blocks are
- faster since they do not require to save the subshell context (ksh93) or
- trigger a shell child process (Bourne shell, bash, ksh88 etc.)
- </para>
- </section>
-
-
- <section xml:id="use_long_options_for_set_builtin">
- <title>&tag_kshonly; use long options for "<literal>set</literal>"</title>
- <para>use long options for "<literal>set</literal>", for example instead of <literal>$ set -x #</literal>
- use <literal>$ set -o xtrace #</literal> to make the code more readable.</para>
- </section>
-
-
- <section xml:id="use_posix_command_substitutions_syntax">
- <title>&tag_kshonly; Use <literal>$(...)</literal> instead of <literal>`...`</literal> command substitutions</title>
- <para>Use <literal>$(...)</literal> instead of <literal>`...`</literal> - <literal>`...`</literal>
- is an obsolete construct in ksh+POSIX sh scripts and <literal>$(...)</literal>.is a cleaner design,
- requires no escaping rules, allows easy nesting etc.</para>
-
- <note><title>&tag_ksh93only; <literal>${ ...;}</literal>-style command substitutions</title>
- <para>ksh93 has support for an alternative version of command substitutions with the
- syntax <literal>${ ...;}</literal> which do not run in a subshell.
- </para></note>
- </section>
-
-
- <section xml:id="put_command_substitution_result_in_quotes">
- <title>&tag_kshonly; Always put the result of a <literal>$(...)</literal> or
- <literal>$( ...;)</literal> command substitution in quotes</title>
- <para>Always put the result of <literal>$( ... )</literal> or <literal>$( ...;)</literal> in
- quotes (e.g. <literal>foo="$( ... )"</literal> or <literal>foo="$( ...;)"</literal>) unless
- there is a very good reason for not doing it</para>
- </section>
-
-
- <section xml:id="always_set_path">
- <title>Scripts should always set their <envar>PATH</envar></title>
- <para>Scripts should always set their <envar>PATH</envar> to make sure they do not use
- alternative commands by accident (unless the value of <envar>PATH</envar> is well-known
- and guaranteed to be set by the caller)</para>
- </section>
-
-
- <section xml:id="make_sure_commands_are_available">
- <title>Make sure that commands from other packages/applications are really installed on the machine</title>
- <para>Scripts should make sure that commands in optional packages are really
- there, e.g. add a "precheck" block in scipts to avoid later failure when
- doing the main job</para>
- </section>
-
-
- <section xml:id="check_usage_of_boolean_variables">
- <title>Check how boolean values are used/implemented in your application</title>
- <para>Check how boolean values are used in your application.</para>
- <para>For example:
-<programlisting>
-mybool=0
-# do something
-if [ $mybool -eq 1 ] ; then do_something_1 ; fi
-</programlisting>
-could be rewritten like this:
-<programlisting>
-mybool=false # (valid values are "true" or "false", pointing
-# to the builtin equivalents of /bin/true or /bin/false)
-# do something
-if ${mybool} ; then do_something_1 ; fi
-</programlisting>
-or
-<programlisting>
-integer mybool=0 # values are 0 or 1
-# do something
-if (( mybool==1 )) ; then do_something_1 ; fi
-</programlisting>
- </para>
- </section>
-
- <section xml:id="shell_uses_characters_not_bytes">
- <title>&tag_i18n;The shell always operates on <emphasis>characters</emphasis> not bytes</title>
- <para>Shell scripts operate on characters and <emphasis>not</emphasis> bytes.
- Some locales use multiple bytes (called "multibyte locales") to represent one character</para>
-
- <note><para>ksh93 has support for binary variables which explicitly
- operate on bytes, not characters. This is the <emphasis>only</emphasis> allowed
- exception.</para></note>
- </section>
-
-
- <section xml:id="multibyte_locale_input">
- <title>&tag_i18n;Multibyte locales and input</title>
- <para>Think about whether your application has to handle file names or
- variables in multibyte locales and make sure all commands used in your
- script can handle such characters (e.g. lots of commands in Solaris's
- <filename>/usr/bin/</filename> are <emphasis>not</emphasis> able to handle such values - either use ksh93
- builtin constructs (which are guaranteed to be multibyte-aware) or
- commands from <filename>/usr/xpg4/bin/</filename> and/or <filename>/usr/xpg6/bin</filename>)
- </para>
- </section>
-
-
- <section xml:id="use_external_filters_only_for_large_datasets">
- <title>&tag_performance;Only use external filters like <literal>grep</literal>/<literal>sed</literal>/<literal>awk</literal>/etc.
- if you want to process lots of data with them</title>
- <para>Only use external filters like <literal>grep</literal>/<literal>sed</literal>/<literal>awk</literal>/etc.
- if a significant amount of data is processed by the filter or if
- benchmarking shows that the use of builtin commands is significantly slower
- (otherwise the time and resources needed to start the filter are
- far greater then the amount of data being processed,
- creating a performance problem).</para>
- <para>For example:
-<programlisting>
-if [ "$(echo "$x" | egrep '.*foo.*')" != "" ] ; then
- do_something ;
-done
-</programlisting>
-can be re-written using ksh93 builtin constructs, saving several
-<literal>|fork()|+|exec()|</literal>'s:
-<programlisting>
-if [[ "${x}" == ~(E).*foo.* ]] ; then
- do_something ;
-done
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_dashdash_if_first_arg_is_variable">
- <title>If the first operand of a command is a variable, use <literal>--</literal></title>
- <para>If the first operand of a command is a variable, use <literal>--</literal>
- for any command that accepts this as end of argument to
- avoid problems if the variable expands to a value starting with <literal>-</literal>.
- </para>
- <note><para>
- At least
- <simplelist type="inline">
- <member>print</member>
- <member>/usr/bin/fgrep</member><member>/usr/xpg4/bin/fgrep</member>
- <member>/usr/bin/grep</member> <member>/usr/xpg4/bin/grep</member>
- <member>/usr/bin/egrep</member><member>/usr/xpg4/bin/egrep</member>
- </simplelist>
- support <literal>--</literal> as "end of arguments"-terminator.
- </para></note>
- </section>
-
- <section xml:id="use_export">
- <title>&tag_kshonly;&tag_performance;Use <literal>$ export FOOBAR=val #</literal> instead of
- <literal>$ FOOBAR=val ; export FOOBAR #</literal></title>
- <para>Use <literal>$ export FOOBAR=val # instead of $ FOOBAR=val ; export FOOBAR #</literal> -
- this is much faster.</para>
- </section>
-
-
- <section xml:id="use_subshell_around_set_dashdash_usage">
- <title>Use a subshell (e.g. <literal>$ ( mycmd ) #</literal>) around places which use
- <literal>set -- $(mycmd)</literal> and/or <literal>shift</literal></title>
- <para>Use a subshell (e.g. <literal>$ ( mycmd ) #</literal>) around places which use
- <literal>set -- $(mycmd)</literal> and/or <literal>shift</literal> unless the variable
- affected is either a local one or if it's guaranteed that this variable will no longer be used
- (be careful for loadable functions, e.g. ksh/ksh93's <literal>autoload</literal> !!!!)
- </para>
- </section>
-
-
- <section xml:id="be_careful_with_tabs_in_script_code">
- <title>Be careful with using TABS in script code, they are not portable
- between editors or platforms</title>
- <para>Be careful with using TABS in script code, they are not portable
- between editors or platforms.</para>
- <para>If you use ksh93 use <literal>$'\t'</literal> to include TABs in sources, not the TAB character itself.</para>
- </section>
-
-
- <section xml:id="centralise_error_exit">
- <title>If you have multiple points where your application exits with an error
- message create a central function for this purpose</title>
- <para>If you have multiple points where your application exits with an error
- message create a central function for this, e.g.
-<programlisting>
-if [ -z "$tmpdir" ] ; then
- print -u2 "mktemp failed to produce output; aborting."
- exit 1
-fi
-if [ ! -d $tmpdir ] ; then
- print -u2 "mktemp failed to create a directory; aborting."
- exit 1
-fi
-</programlisting>
-should be replaced with
-<programlisting>
-function fatal_error
-{
- print -u2 "${progname}: $*"
- exit 1
-}
-# do something (and save ARGV[0] to variable "progname")
-if [ -z "$tmpdir" ] ; then
- fatal_error "mktemp failed to produce output; aborting."
-fi
-if [ ! -d "$tmpdir" ] ; then
- fatal_error "mktemp failed to create a directory; aborting."
-fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_set_o_nounset">
- <title>&tag_kshonly; Think about using <literal>$ set -o nounset #</literal> by default</title>
- <para>Think about using <literal>$ set -o nounset #</literal> by default (or at least during the
- script's development phase) to catch errors where variables are used
- when they are not set (yet), e.g.
-<screen>
-$ <userinput>(set -o nounset ; print ${foonotset})</userinput>
-<computeroutput>/bin/ksh93: foonotset: parameter not set</computeroutput>
-</screen>
- </para>
- </section>
-
-
- <section xml:id="avoid_eval_builtin">
- <title>Avoid using <literal>eval</literal> unless absolutely necessary</title>
- <para>Avoid using <literal>eval</literal> unless absolutely necessary. Subtle things
- can happen when a string is passed back through the shell
- parser. You can use name references to avoid uses such as
- <literal>eval $name="$value"</literal>.
- </para>
- </section>
-
-
- <section xml:id="use_concatenation_operator">
- <title>&tag_ksh93only;Use the string/array concatenation operator <literal>+=</literal></title>
- <para>Use <literal>+=</literal> instead of manually adding strings/array elements, e.g.
-<programlisting>
-foo=""
-foo="${foo}a"
-foo="${foo}b"
-foo="${foo}c"
-</programlisting>
-should be replaced with
-<programlisting>
-foo=""
-foo+="a"
-foo+="b"
-foo+="c"
-</programlisting>
- </para>
- </section>
-
- <section xml:id="use_source_not_dot">
- <title>&tag_ksh93only;Use <literal>source</literal> instead of '<literal>.</literal> '(dot)
- to include other shell script fragments</title>
- <para>Use <literal>source</literal> instead of '<literal>.</literal>'
- (dot) to include other shell script fragments - the new form is much
- more readable than the tiny dot and a failure can be caught within the script.</para>
- </section>
-
-
- <section xml:id="use_builtin_localisation_support">
- <title>&tag_ksh93only;&tag_performance;&tag_l10n;Use <literal>$"..."</literal> instead of
- <literal>gettext ... "..."</literal> for strings that need to be localized for different locales</title>
- <para>Use $"..." instead of <literal>gettext ... "..."</literal> for strings that need to be
- localized for different locales. <literal>gettext</literal> will require a
- <literal>fork()+exec()</literal> and
- reads the whole catalog each time it's called, creating a huge overhead for localisation
- (and the <literal>$"..."</literal> is easier to use, e.g. you only have to put a
- <literal>$</literal> in front of the catalog and the string will be localised).
- </para>
- </section>
-
-
- <section xml:id="use_set_o_noglob">
- <title>&tag_kshonly;&tag_performance;Use <literal>set -o noglob</literal> if you do not need to expand files</title>
- <para>If you don't expect to expand files, you can do set <literal>-f</literal>
- (<literal>set -o noglob</literal>) as well. This way the need to use <literal>""</literal> is
- greatly reduced.</para>
- </section>
-
-
- <section xml:id="use_empty_ifs_to_handle_spaces">
- <title>&tag_ksh93only;Use <literal>IFS=</literal> to avoid problems with spaces in filenames</title>
- <para>Unless you want to do word splitting, put <literal>IFS=</literal>
- at the beginning of a command. This way spaces in
- file names won't be a problem. You can do
- <literal>IFS='delims' read -r</literal> line
- to override <envar>IFS</envar> just for the <literal>read</literal> command. However,
- you can't do this for the <literal>set</literal> builtin.</para>
- </section>
-
-
- <section xml:id="set_locale_when_comparing_against_localised_output">
- <title>Set the message locale if you process output of tools which may be localised</title>
- <para>Set the message locale (<envar>LC_MESSAGES</envar>) if you process output of tools which may be localised</para>
- <example><title>Set <envar>LC_MESSAGES</envar> when testing for specific outout of the <filename>/usr/bin/file</filename> utility:</title>
-<programlisting>
-# set french as default message locale
-export LC_MESSAGES=fr_FR.UTF-8
-
-...
-
-# test whether the file "/tmp" has the filetype "directory" or not
-# we set LC_MESSAGES to "C" to ensure the returned message is in english
-if [[ "$(LC_MESSAGES=C file /tmp)" = *directory ]] ; then
- print "is a directory"
-fi
-</programlisting>
- <note><para>The environment variable <envar>LC_ALL</envar> always
- overrides any other <envar>LC_*</envar> environment variables
- (and <envar>LANG</envar>, too),
- including <envar>LC_MESSAGES</envar>.
- if there is the chance that <envar>LC_ALL</envar> may be set
- replace <envar>LC_MESSAGES</envar> with <envar>LC_ALL</envar>
- in the example above.</para></note>
- </example>
- </section>
-
- <section xml:id="cleanup_after_yourself">
- <title>Cleanup after yourself.</title>
- <para>Cleanup after yourself. For example ksh/ksh93 have an <literal>EXIT</literal> trap which
- is very useful for this.
- </para>
- <note><para>
- Note that the <literal>EXIT</literal> trap is executed for a subshell and each subshell
- level can run it's own <literal>EXIT</literal> trap, for example
-<screen>
-$ <userinput>(trap "print bam" EXIT ; (trap "print snap" EXIT ; print "foo"))</userinput>
-<computeroutput>foo
-snap
-bam</computeroutput>
-</screen>
- </para></note>
- </section>
-
- <section xml:id="use_proper_exit_code">
- <title>Use a proper <literal>exit</literal> code</title>
- <para>Explicitly set the exit code of a script, otherwise the exit code
- from the last command executed will be used which may trigger problems
- if the value is unexpected.</para>
- </section>
-
-
- <section xml:id="shell_lint">
- <title>&tag_ksh93only;Use <literal>shcomp -n scriptname.sh /dev/null</literal> to check for common errors</title>
- <para>Use <literal>shcomp -n scriptname.sh /dev/null</literal> to
- check for common problems (such as insecure, depreciated or ambiguous constructs) in shell scripts.</para>
- </section>
- </section><!-- end of general -->
-
-
-
-
-
- <section xml:id="functions">
- <title>Functions</title>
-
- <section xml:id="use_functions">
- <title>Use functions to break up your code</title>
- <para>Use functions to break up your code into smaller, logical blocks.</para>
- </section>
-
- <section xml:id="do_not_reserved_keywords_for_function_names">
- <title>Do not use function names which are reserved keywords in C/C++/JAVA or the POSIX shell standard</title>
- <para>Do not use function names which are reserved keywords (or function names) in C/C++/JAVA or the POSIX shell standard
- (to avoid confusion and/or future changes/updates to the shell language).
- </para>
- </section>
-
- <section xml:id="use_ksh_style_function_syntax">
- <title>&tag_kshonly;&tag_performance;Use ksh-style <literal>function</literal></title>
- <para>It is <emphasis>highly</emphasis> recommended to use ksh style functions
- (<literal>function foo { ... }</literal>) instead
- of Bourne-style functions (<literal>foo() { ... }</literal>) if possible
- (and local variables instead of spamming the global namespace).</para>
-
- <warning><para>
- The difference between old-style Bourne functions and ksh functions is one of the major differences
- between ksh88 and ksh93 - ksh88 allowed variables to be local for Bourne-style functions while ksh93
- conforms to the POSIX standard and will use a function-local scope for variables declared in
- Bourne-style functions.</para>
- <para>Example (note that "<literal>integer</literal>" is an alias for "<literal>typeset -li</literal>"):
-<programlisting>
-# new style function with local variable
-$ ksh93 -c 'integer x=2 ; function foo { integer x=5 ; } ; print "x=$x"
-; foo ; print "x=$x" ;'
-x=2
-x=2
-# old style function with an attempt to create a local variable
-$ ksh93 -c 'integer x=2 ; foo() { integer x=5 ; } ; print "x=$x" ; foo ;
-print "x=$x" ;'
-x=2
-x=5
-</programlisting>
-
- <uri xlink:href="http://www.opensolaris.org/os/project/ksh93-integration/docs/ksh93r/general/compatibility/">usr/src/lib/libshell/common/COMPATIBILITY</uri>
- says about this issue:
-<blockquote><para>
-Functions, defined with name() with ksh-93 are compatible with
-the POSIX standard, not with ksh-88. No local variables are
-permitted, and there is no separate scope. Functions defined
-with the function name syntax, maintain compatibility.
-This also affects function traces.
-</para></blockquote>
-(this issue also affects <filename>/usr/xpg4/bin/sh</filename> in Solaris 10 because it is based on ksh88. This is a bug.).
- </para></warning>
-
- </section>
-
-
- <section xml:id="use_proper_return_code">
- <title>Use a proper <literal>return</literal> code</title>
- <para>Explicitly set the return code of a function - otherwise the exit code
- from the last command executed will be used which may trigger problems
- if the value is unexpected.</para>
- <para>The only allowed exception is if a function uses the shell's <literal>errexit</literal> mode to leave
- a function, subshell or the script if a command returns a non-zero exit code.
- </para>
- </section>
-
- <section xml:id="use_fpath_to_load_common_code">
- <title>&tag_kshonly;Use <envar>FPATH</envar> to load common functions, not <literal>source</literal></title>
- <para>
- Use the ksh <envar>FPATH</envar> (function path) feature to load functions which are shared between scripts
- and not <literal>source</literal> - this allows to load such a function on demand and not all at once.</para>
- </section>
-
- </section><!-- end of functions -->
-
-
-
-
- <section xml:id="if_for_while">
- <title><literal>if</literal>, <literal>for</literal> and <literal>while</literal></title>
-
- <section xml:id="if_for_while_format">
- <title>Format</title>
- <para>To match <literal>cstyle</literal>, the shell token equivalent to the <literal>C</literal>
- "<literal>{</literal>" should appear on the same line, separated by a
- "<literal>;</literal>", as in:
-<programlisting>
-if [ "$x" = "hello" ] ; then
- echo $x
-fi
-
-if [[ "$x" = "hello" ]] ; then
- print $x
-fi
-
-for i in 1 2 3; do
- echo $i
-done
-
-for ((i=0 ; i &lt; 3 ; i++)); do
- print $i
-done
-
-while [ $# -gt 0 ]; do
- echo $1
- shift
-done
-
-while (( $# &gt; 0 )); do
- print $1
- shift
-done
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="test_builtin">
- <title><literal>test</literal> Builtin</title>
- <para>DO NOT use the test builtin. Sorry, executive decision.</para>
- <para>In our Bourne shell, the <literal>test</literal> built-in is the same as the "["
- builtin (if you don't believe me, try "type test" or refer to <filename>usr/src/cmd/sh/msg.c</filename>).</para>
- <para>
- So please do not write:
-<programlisting>
-if test $# -gt 0 ; then
-</programlisting>
-instead use:
-<programlisting>
-if [ $# -gt 0 ] ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_ksh_test_syntax">
- <title>&tag_kshonly;&tag_performance;Use "<literal>[[ expr ]]</literal>" instead of "<literal>[ expr ]</literal>"</title>
- <para>Use "<literal>[[ expr ]]</literal>" instead of "<literal>[ expr ]</literal>" if possible
- since it avoids going through the whole pattern expansion/etc. machinery and
- adds additional operators not available in the Bourne shell, such as short-circuit
- <literal>&amp;&amp;</literal> and <literal>||</literal>.
- </para>
- </section>
-
-
- <section xml:id="use_posix_arithmetic_expressions">
- <title>&tag_kshonly; Use "<literal>(( ... ))</literal>" for arithmetic expressions</title>
- <para>Use "<literal>(( ... ))</literal>" instead of "<literal>[ expr ]</literal>"
- or "<literal>[[ expr ]]</literal>" expressions.
- </para>
- <para>
- Example: Replace
-<programlisting>
-i=5
-# do something
-if [ $i -gt 5 ] ; then
-</programlisting>
-with
-<programlisting>
-i=5
-# do something
-if (( i &gt; 5 )) ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="compare_exit_code_using_math">
- <title>&tag_kshonly;&tag_performance;Compare exit code using arithmetic expressions expressions</title>
- <para>Use POSIX arithmetic expressions to test for exit/return codes of commands and functions.
- For example turn
-<programlisting>
-if [ $? -gt 0 ] ; then
-</programlisting>
-into
-<programlisting>
-if (( $? &gt; 0 )) ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_builtin_commands_in_loops">
- <title>&tag_bourneonly; Use builtin commands in conditions for <literal>while</literal> endless loops</title>
- <para>Make sure that your shell has a "<literal>true</literal>" builtin (like ksh93) when
- executing endless loops like <literal>$ while true ; do do_something ; done #</literal> -
- otherwise each loop cycle runs a <literal>|fork()|+|exec()|</literal>-cycle to run
- <filename>/bin/true</filename>
- </para>
- </section>
-
-
- <section xml:id="single_line_if_statements">
- <title>Single-line if-statements</title>
- <para>It is permissible to use <literal>&amp;&amp;</literal> and <literal>||</literal> to construct
- shorthand for an "<literal>if</literal>" statement in the case where the if statement has a
- single consequent line:
-<programlisting>
-[ $# -eq 0 ] &amp;&amp; exit 0
-</programlisting>
-instead of the longer:
-<programlisting>
-if [ $# -eq 0 ]; then
- exit 0
-fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="exit_status_and_if_for_while">
- <title>Exit Status and <literal>if</literal>/<literal>while</literal> statements</title>
- <para>Recall that "<literal>if</literal>" and "<literal>while</literal>"
- operate on the exit status of the statement
- to be executed. In the shell, zero (0) means true and non-zero means false.
- The exit status of the last command which was executed is available in the $?
- variable. When using "<literal>if</literal>" and "<literal>while</literal>",
- it is typically not necessary to use
- <literal>$?</literal> explicitly, as in:
-<programlisting>
-grep foo /etc/passwd &gt;/dev/null 2>&amp;1
-if [ $? -eq 0 ]; then
- echo "found"
-fi
-</programlisting>
-Instead, you can more concisely write:
-<programlisting>
-if grep foo /etc/passwd &gt;/dev/null 2>&amp;1; then
- echo "found"
-fi
-</programlisting>
-Or, when appropriate:
-<programlisting>
-grep foo /etc/passwd &gt;/dev/null 2>&amp;1 &amp;&amp; echo "found"
-</programlisting>
- </para>
- </section>
-
- </section><!-- end of if/for/while -->
-
-
-
-
-
-
- <section xml:id="variables">
- <title>Variable types, naming and usage</title>
-
- <section xml:id="names_should_be_lowercase">
- <title>Names of local, non-environment, non-constant variables should be lowercase</title>
- <para>Names of variables local to the current script which are not exported to the environment
- should be lowercase while variable names which are exported to the
- environment should be uppercase.</para>
- <para>The only exception are global constants (=global readonly variables,
- e.g. <literal>$ float -r M_PI=3.14159265358979323846 #</literal> (taken from &lt;math.h&gt;))
- which may be allowed to use uppercase names, too.
- </para>
-
- <warning><para>
- Uppercase variable names should be avoided because there is a good chance
- of naming collisions with either special variable names used by the shell
- (e.g. <literal>PWD</literal>, <literal>SECONDS</literal> etc.).
- </para></warning>
- </section>
-
- <section xml:id="do_not_reserved_keywords_for_variable_names">
- <title>Do not use variable names which are reserved keywords/variable names in C/C++/JAVA or the POSIX shell standard</title>
- <para>Do not use variable names which are reserved keywords in C/C++/JAVA or the POSIX shell standard
- (to avoid confusion and/or future changes/updates to the shell language).
- </para>
- <note>
- <para>The Korn Shell and the POSIX shell standard have many more
- reserved variable names than the original Bourne shell. All
- these reserved variable names are spelled uppercase.
- </para>
- </note>
- </section>
-
- <section xml:id="use_brackets_around_long_names">
- <title>Always use <literal>'{'</literal>+<literal>'}'</literal> when using variable
- names longer than one character</title>
- <para>Always use <literal>'{'</literal>+<literal>'}'</literal> when using
- variable names longer than one character unless a simple variable name is
- followed by a blank, <literal>/</literal>, <literal>;</literal>, or <literal>$</literal>
- character (to avoid problems with array,
- compound variables or accidental misinterpretation by users/shell)
-<programlisting>
-print "$foo=info"
-</programlisting>
-should be rewritten to
-<programlisting>
-print "${foo}=info"
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="quote_variables_containing_filenames_or_userinput">
- <title><emphasis>Always</emphasis> put variables into quotes when handling filenames or user input</title>
- <para><emphasis>Always</emphasis> put variables into quotes when handling filenames or user input, even if
- the values are hardcoded or the values appear to be fixed. Otherwise at
- least two things may go wrong:
- <itemizedlist>
- <listitem><para>a malicious user may be able to exploit a script's inner working to
- infect his/her own code</para></listitem>
- <listitem><para>a script may (fatally) misbehave for unexpected input (e.g. file names
- with blanks and/or special symbols which are interpreted by the shell)</para></listitem>
- </itemizedlist>
- </para>
-
- <note><para>
- As alternative a script may set <literal>IFS='' ; set -o noglob</literal> to turn off the
- interpretation of any field seperators and the pattern globbing.
- </para></note>
- </section>
-
-
-
- <section xml:id="use_typed_variables">
- <title>&tag_kshonly;&tag_performance;Use typed variables if possible.</title>
- <para>For example the following is very
- inefficient since it transforms the integer values to strings and back
- several times:
-<programlisting>
-a=0
-b=1
-c=2
-# more code
-if [ $a -lt 5 -o $b -gt c ] ; then do_something ; fi
-</programlisting>
-This could be rewritten using ksh constructs:
-<programlisting>
-integer a=0
-integer b=1
-integer c=2
-# more code
-if (( a &lt; 5 || b &gt; c )) ; then do_something ; fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="store_lists_in_arrays">
- <title>&tag_ksh93only; Store lists in arrays or associative arrays</title>
- <para>Store lists in arrays or associative arrays - this is usually easier
- to manage.</para>
- <para>
- For example:
-<programlisting>
-x="
-/etc/foo
-/etc/bar
-/etc/baz
-"
-echo $x
-</programlisting>
-can be replaced with
-<programlisting>
-typeset -a mylist
-mylist[0]="/etc/foo"
-mylist[1]="/etc/bar"
-mylist[2]="/etc/baz"
-print "${mylist[@]}"
-</programlisting>
-or (ksh93-style append entries to a normal (non-associative) array)
-<programlisting>
-typeset -a mylist
-mylist+=( "/etc/foo" )
-mylist+=( "/etc/bar" )
-mylist+=( "/etc/baz" )
-print "${mylist[@]}"
-</programlisting>
- </para>
- <note>
- <title>Difference between expanding arrays with mylist[@] and mylist[*] subscript operators</title>
- <para>
- Arrays may be expanded using two similar subscript operators, @ and *. These subscripts
- differ only when the variable expansion appears within double quotes. If the variable expansion
- is between double-quotes, "${mylist[*]}" expands to a single string with the value of each array
- member separated by the first character of the <envar>IFS</envar> variable, and "${mylist[@]}"
- expands each element of name to a separate string.
- </para>
- <example><title>Difference between [@] and [*] when expanding arrays</title>
-<programlisting>
-typeset -a mylist
-mylist+=( "/etc/foo" )
-mylist+=( "/etc/bar" )
-mylist+=( "/etc/baz" )
-IFS=","
-printf "mylist[*]={ 0=|%s| 1=|%s| 2=|%s| 3=|%s| }\n" "${mylist[*]}"
-printf "mylist[@]={ 0=|%s| 1=|%s| 2=|%s| 3=|%s| }\n" "${mylist[@]}"
-</programlisting>
-<para>will print:</para>
-<screen>
-<computeroutput>mylist[*]={ 0=|/etc/foo,/etc/bar,/etc/baz| 1=|| 2=|| 3=|| }
-mylist[@]={ 0=|/etc/foo| 1=|/etc/bar| 2=|/etc/baz| 3=|| }
-</computeroutput>
-</screen>
- </example>
- </note>
- </section>
-
-
- <section xml:id="use_compound_variables_or_lists_for_grouping">
- <title>&tag_ksh93only; Use compound variables or associative arrays to group similar variables together</title>
- <para>Use compound variables or associative arrays to group similar variables together.</para>
- <para>
- For example:
-<programlisting>
-box_width=56
-box_height=10
-box_depth=19
-echo "${box_width} ${box_height} ${box_depth}"
-</programlisting>
-could be rewritten to ("associative array"-style)
-<programlisting>
-typeset -A -E box=( [width]=56 [height]=10 [depth]=19 )
-print -- "${box[width]} ${box[height]} ${box[depth]}"
-</programlisting>
-or ("compound variable"-style
-<programlisting>
-box=(
- float width=56
- float height=10
- float depth=19
- )
-print -- "${box.width} ${box.height} ${box.depth}"
-</programlisting>
- </para>
- </section>
- </section><!-- end of variables -->
-
-
-
-
-
-
-
- <section xml:id="io">
- <title>I/O</title>
-
- <section xml:id="avoid_echo">
- <title>Avoid using the "<literal>echo</literal>" command for output</title>
- <para>The behaviour of "<literal>echo</literal>" is not portable
- (e.g. System V, BSD, UCB and ksh93/bash shell builtin versions all
- slightly differ in functionality) and should be avoided if possible.
- POSIX defines the "<literal>printf</literal>" command as replacement
- which provides more flexible and portable behaviour.</para>
-
- <note>
- <title>&tag_kshonly;Use "<literal>print</literal>" and not "<literal>echo</literal>" in Korn Shell scripts</title>
- <para>Korn shell scripts should prefer the "<literal>print</literal>"
- builtin which was introduced as replacement for "<literal>echo</literal>".</para>
- <caution>
- <para>Use <literal>$ print -- ${varname}" #</literal> when there is the slightest chance that the
- variable "<literal>varname</literal>" may contain symbols like "-". Or better use "<literal>printf</literal>"
- instead, for example
-<programlisting>
-integer fx
-# do something
-print $fx
-</programlisting>
-may fail if "f" contains a negative value. A better way may be to use
-<programlisting>
-integer fx
-# do something
-printf "%d\n" fx
-</programlisting>
- </para>
- </caution>
- </note>
- </section>
-
- <section xml:id="use_redirect_not_exec_to_open_files">
- <title>&tag_ksh93only;Use <literal>redirect</literal> and not <literal>exec</literal> to open files</title>
- <para>Use <literal>redirect</literal> and not <literal>exec</literal> to open files - <literal>exec</literal>
- will terminate the current function or script if an error occurs while <literal>redirect</literal>
- just returns a non-zero exit code which can be caught.</para>
-<para>Example:
-<programlisting>
-if redirect 5&lt;/etc/profile ; then
- print "file open ok"
- head &lt;&amp;5
-else
- print "could not open file"
-fi
-</programlisting>
- </para>
- </section>
-
- <section xml:id="group_identical_redirections_together">
- <title>&tag_performance;Avoid redirections per command when the output goes into the same file,
- e.g. <literal>$ echo "foo" &gt;xxx ; echo "bar" &gt;&gt;xxx ; echo "baz" &gt;&gt;xxx #</literal></title>
- <para>Each of the redirections above trigger an
- <literal>|open()|,|write()|,|close()|</literal>-sequence. It is much
- more efficient (and faster) to group the rediction into a block,
- e.g. <literal>{ echo "foo" ; echo "bar" ; echo "baz" } &gt;xxx #</literal></para>
- </section>
-
-
- <section xml:id="avoid_using_temporary_files">
- <title>&tag_performance;Avoid the creation of temporary files and store the values in variables instead</title>
- <para>Avoid the creation of temporary files and store the values in variables instead if possible</para>
- <para>
- Example:
-<programlisting>
-ls -1 &gt;xxx
-for i in $(cat xxx) ; do
- do_something ;
-done
-</programlisting>
-can be replaced with
-<programlisting>
-x="$(ls -1)"
-for i in ${x} ; do
- do_something ;
-done
-</programlisting>
- </para>
- <note><para>ksh93 supports binary variables (e.g. <literal>typeset -b varname</literal>) which can hold any value.</para></note>
- </section>
-
-
- <section xml:id="create_subdirs_for_multiple_temporary_files">
- <title>If you create more than one temporary file create an unique subdir</title>
- <para>If you create more than one temporary file create an unique subdir for
- these files and make sure the dir is writable. Make sure you cleanup
- after yourself (unless you are debugging).
- </para>
- </section>
-
-
- <section xml:id="use_dynamic_file_descriptors">
- <title>&tag_ksh93only;Use {n}&lt;file instead of fixed file descriptor numbers</title>
- <para>When opening a file use {n}&lt;file, where <envar>n</envar> is an
- integer variable rather than specifying a fixed descriptor number.</para>
- <para>This is highly recommended in functions to avoid that fixed file
- descriptor numbers interfere with the calling script.</para>
-<example><title>Open a network connection and store the file descriptor number in a variable</title>
-<programlisting>
-function cat_http
-{
- integer netfd
-
-...
-
- # open TCP channel
- redirect {netfd}&lt;&gt;"/dev/tcp/${host}/${port}"
-
- # send HTTP request
- request="GET /${path} HTTP/1.1\n"
- request+="Host: ${host}\n"
- request+="User-Agent: demo code/ksh93 (2007-08-30; $(uname -s -r -p))\n"
- request+="Connection: close\n"
- print "${request}\n" &gt;&amp;${netfd}
-
- # collect response and send it to stdout
- cat &lt;&amp;${netfd}
-
- # close connection
- exec {netfd}&lt;&amp;-
-
-...
-
-}
-</programlisting>
-</example>
- </section>
-
-
- <section xml:id="use_inline_here_documents">
- <title>&tag_ksh93only;&tag_performance;Use inline here documents
- instead of <literal>echo "$x" | command</literal></title>
- <para>Use inline here documents, for example
-<programlisting>
-command &lt;&lt;&lt; $x
-</programlisting>
- rather than
-<programlisting>
-print -r -- "$x" | command
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_read_r">
- <title>&tag_ksh93only;Use the <literal>-r</literal> option of <literal>read</literal> to read a line</title>
- <para>Use the <literal>-r</literal> option of <literal>read</literal> to read a line.
- You never know when a line will end in <literal>\</literal> and without a
- <literal>-r</literal> multiple
- lines can be read.</para>
- </section>
-
-
- <section xml:id="print_compound_variables_using_print_C">
- <title>&tag_ksh93only;Print compound variables using <literal>print -C varname</literal> or <literal>print -v varname</literal></title>
- <para>Print compound variables using <literal>print -C varname</literal> or
- <literal>print -v varname</literal> to make sure that non-printable characters
- are correctly encoded.</para>
-<example><title>Print compound variable with non-printable characters</title>
-<programlisting>
-compound x=(
- a=5
- b="hello"
- c=(
- d=9
- e="$(printf "1\v3")" <co xml:id="co.vertical_tab1" />
- )
-)
-print -v x
-</programlisting>
-<para>will print:</para>
-<screen>
-<computeroutput>(
- a=5
- b=hello
- c=(
- d=9
- e=$'1\0133' <co xml:id="co.vertical_tab2" />
- )
-)</computeroutput>
-</screen>
-<calloutlist>
- <callout arearefs="co.vertical_tab1 co.vertical_tab2">
- <para>vertical tab, <literal>\v</literal>, octal=<literal>\013</literal>.</para>
- </callout>
-</calloutlist>
-</example>
- </section>
-
- <section xml:id="command_name_before_redirections">
- <title>Put the command name and arguments before redirections</title>
- <para>Put the command name and arguments before redirections.
- You can legally do <literal>$ &gt; file date</literal> instead of <literal>date &gt; file</literal>
- but don't do it.</para>
- </section>
-
- <section xml:id="enable_gmacs_editor_mode_for_user_prompts">
- <title>&tag_ksh93only;Enable the <literal>gmacs</literal> editor
- mode when reading user input using the <literal>read</literal> builtin</title>
- <para>Enable the <literal>gmacs</literal>editor mode before reading user
- input using the <literal>read</literal> builtin to enable the use of
- cursor+backspace+delete keys in the edit line</para>
-<example><title>Prompt user for a string with gmacs editor mode enabled</title>
-<programlisting>
-set -o gmacs <co xml:id="co.enable_gmacs" />
-typeset inputstring="default value"
-...
-read -v<co xml:id="co.read_v" /> inputstring<co xml:id="co.readvar" />?"Please enter a string: "<co xml:id="co.prompt" />
-...
-printf "The user entered the following string: '%s'\n" "${inputstring}"
-
-...
-</programlisting>
-<calloutlist>
- <callout arearefs="co.enable_gmacs">
- <para>Enable gmacs editor mode.</para>
- </callout>
- <callout arearefs="co.read_v">
- <para>The value of the variable is displayed and used as a default value.</para>
- </callout>
- <callout arearefs="co.readvar">
- <para>Variable used to store the result.</para>
- </callout>
- <callout arearefs="co.prompt">
- <para>Prompt string which is displayed in stderr.</para>
- </callout>
-</calloutlist>
-</example>
- </section>
- </section><!-- end of I/O -->
-
-
-
-
-
-
- <section xml:id="math">
- <title>Math</title>
-
- <section xml:id="use_builtin_arithmetic_expressions">
- <title>&tag_kshonly;&tag_performance;Use builtin arithmetic expressions instead of external applications</title>
- <para>Use builtin (POSIX shell) arithmetic expressions instead of
- <filename>expr</filename>,
- <filename>bc</filename>,
- <filename>dc</filename>,
- <filename>awk</filename>,
- <filename>nawk</filename> or
- <filename>perl</filename>.
- </para>
- <note>
- <para>ksh93 supports C99-like floating-point arithmetic including special values
- such as
- <simplelist type="inline">
- <member>+Inf</member>
- <member>-Inf</member>
- <member>+NaN</member>
- <member>-NaN</member>
- </simplelist>.
- </para>
- </note>
- </section>
-
-
- <section xml:id="use_floating_point_arithmetic_expressions">
- <title>&tag_ksh93only; Use floating-point arithmetic expressions if
- calculations may trigger a division by zero or other exceptions</title>
- <para>Use floating-point arithmetic expressions if calculations may
- trigger a division by zero or other exceptions - floating point arithmetic expressions in
- ksh93 support special values such as <literal>+Inf</literal>/<literal>-Inf</literal> and
- <literal>+NaN</literal>/<literal>-NaN</literal> which can greatly simplify testing for
- error conditions, e.g. instead of a <literal>trap</literal> or explicit
- <literal>if ... then... else</literal> checks for every sub-expression
- you can check the results for such special values.
- </para>
- <para>Example:
-<screen>
-$ <userinput>ksh93 -c 'integer i=0 j=5 ; print -- "x=$((j/i)) "'</userinput>
-<computeroutput>ksh93: line 1: j/i: divide by zero</computeroutput>
-$ <userinput>ksh93 -c 'float i=0 j=-5 ; print -- "x=$((j/i)) "'</userinput>
-<computeroutput>x=-Inf</computeroutput>
-</screen>
- </para>
- </section>
-
-
- <section xml:id="use_printf_a_for_passing_float_values">
- <title>&tag_ksh93only; Use <literal>printf "%a"</literal> when passing floating-point values</title>
- <para>Use <literal>printf "%a"</literal> when passing floating-point values between scripts or
- as output of a function to avoid rounding errors when converting between
- bases.</para>
- <para>
- Example:
-<programlisting>
-function xxx
-{
- float val
-
- (( val=sin(5.) ))
- printf "%a\n" val
-}
-float out
-(( out=$(xxx) ))
-xxx
-print -- $out
-</programlisting>
-This will print:
-<programlisting>
--0.9589242747
--0x1.eaf81f5e09933226af13e5563bc6p-01
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="put_constants_into_readonly_variables">
- <title>&tag_kshonly;&tag_performance;Put constant values into readonly variables</title>
- <para>Put constant values into readonly variables</para>
- <para>For example:
-<programlisting>
-float -r M_PI=3.14159265358979323846
-</programlisting>
-or
-<programlisting>
-float M_PI=3.14159265358979323846
-readonly M_PI
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="avoid_unnecessary_string_number_conversions">
- <title>&tag_kshonly;&tag_performance;Avoid string to number
- (and/or number to string) conversions in arithmetic expressions
- expressions</title>
- <para>Avoid string to number and/or number to string conversions in
- arithmetic expressions expressions to avoid performance degradation
- and rounding errors.</para>
- <example><title>(( x=$x*2 )) vs. (( x=x*2 ))</title>
-<programlisting>
-float x
-...
-(( x=$x*2 ))
-</programlisting>
-<para>
-will convert the variable "x" (stored in the machine's native
-<literal>|long double|</literal> datatype) to a string value in base10 format,
-apply pattern expansion (globbing), then insert this string into the
-arithmetic expressions and parse the value which converts it into the internal |long double| datatype format again.
-This is both slow and generates rounding errors when converting the floating-point value between
-the internal base2 and the base10 representation of the string.
-</para>
-<para>
-The correct usage would be:
-</para>
-<programlisting>
-float x
-...
-(( x=x*2 ))
-</programlisting>
-<para>
-e.g. omit the '$' because it's (at least) redundant within arithmetic expressions.
-</para>
- </example>
-
-
- <example><title>x=$(( y+5.5 )) vs. (( x=y+5.5 ))</title>
-<programlisting>
-float x
-float y=7.1
-...
-x=$(( y+5.5 ))
-</programlisting>
-<para>
-will calculate the value of <literal>y+5.5</literal>, convert it to a
-base-10 string value amd assign the value to the floating-point variable
-<literal>x</literal> again which will convert the string value back to the
-internal |long double| datatype format again.
-</para>
-<para>
-The correct usage would be:
-</para>
-<programlisting>
-float x
-float y=7.1
-...
-(( x=y+5.5 ))
-</programlisting>
-<para>
-i.e. this will save the string conversions and avoid any base2--&gt;base10--&gt;base2-conversions.
-</para>
- </example>
- </section>
-
-
- <section xml:id="set_lc_numeric_when_using_floating_point">
- <title>&tag_ksh93only;Set <envar>LC_NUMERIC</envar> when using floating-point constants</title>
- <para>Set <envar>LC_NUMERIC</envar> when using floating-point constants to avoid problems with radix-point
- representations which differ from the representation used in the script, for example the <literal>de_DE.*</literal> locale
- use ',' instead of '.' as default radix point symbol.</para>
- <para>For example:
-<programlisting>
-# Make sure all math stuff runs in the "C" locale to avoid problems with alternative
-# radix point representations (e.g. ',' instead of '.' in de_DE.*-locales). This
-# needs to be set _before_ any floating-point constants are defined in this script)
-if [[ "${LC_ALL}" != "" ]] ; then
- export \
- LC_MONETARY="${LC_ALL}" \
- LC_MESSAGES="${LC_ALL}" \
- LC_COLLATE="${LC_ALL}" \
- LC_CTYPE="${LC_ALL}"
- unset LC_ALL
-fi
-export LC_NUMERIC=C
-...
-float -r M_PI=3.14159265358979323846
-</programlisting>
- </para>
-
- <note><para>The environment variable <envar>LC_ALL</envar> always overrides all other <envar>LC_*</envar> variables,
- including <envar>LC_NUMERIC</envar>. The script should always protect itself against custom <envar>LC_NUMERIC</envar> and
- <envar>LC_ALL</envar> values as shown in the example above.
- </para></note>
- </section>
-
-
-
- </section><!-- end of math -->
-
-
-
-
-
-
- <section xml:id="misc">
- <title>Misc</title>
-
- <section xml:id="debug_use_lineno_in_ps4">
- <title>Put <literal>[${LINENO}]</literal> in your <envar>PS4</envar></title>
- <para>Put <literal>[${LINENO}]</literal> in your <envar>PS4</envar> prompt so that you will get line
- numbers with you run with <literal>-x</literal>. If you are looking at performance
- issues put <literal>$SECONDS</literal> in the <envar>PS4</envar> prompt as well.</para>
- </section>
-
- </section><!-- end of misc -->
-
-
-
-
-</section><!-- end of RULES -->
-
-
-
-
-</article>
diff --git a/usr/src/lib/libsmartsshd/Makefile b/usr/src/lib/libsmartsshd/Makefile
new file mode 100644
index 0000000000..50d9545ef1
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.lib
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+_msg:
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libsmartsshd/Makefile.com b/usr/src/lib/libsmartsshd/Makefile.com
new file mode 100644
index 0000000000..914aab055b
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/Makefile.com
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY= libsmartsshd.a
+VERS= .1
+OBJECTS= sshd-plugin.o
+
+include ../../Makefile.lib
+include ../../Makefile.rootfs
+
+SRCDIR = ../common
+SRCS = $(OBJECTS:%.o=$(SRCDIR)/%.c)
+
+CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -D_FILE_OFFSET_BITS=64
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -ldoor
+
+$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/libsmartsshd/amd64/Makefile b/usr/src/lib/libsmartsshd/amd64/Makefile
new file mode 100644
index 0000000000..31be0ef7e6
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/amd64/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libsmartsshd/common/llib-lsmartsshd b/usr/src/lib/libsmartsshd/common/llib-lsmartsshd
new file mode 100644
index 0000000000..ace7017d41
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/common/llib-lsmartsshd
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*LINTLIBRARY*/
+/*PROTOLIB1*/
+/*
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
diff --git a/usr/src/lib/libsmartsshd/common/mapfile-vers b/usr/src/lib/libsmartsshd/common/mapfile-vers
new file mode 100644
index 0000000000..ce5fc7a9c0
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/common/mapfile-vers
@@ -0,0 +1,45 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate_1.1 {
+ global:
+ sshd_user_rsa_key_allowed;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libsmartsshd/common/sshd-plugin.c b/usr/src/lib/libsmartsshd/common/sshd-plugin.c
new file mode 100644
index 0000000000..4f0f0bc1ad
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/common/sshd-plugin.c
@@ -0,0 +1,123 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <openssl/rsa.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOG_OOM(SZ) fprintf(stderr, "Unable to alloca %d bytes\n", SZ)
+
+static const char *DOOR = "/var/tmp/._joyent_sshd_key_is_authorized";
+static const char *REQ_FMT_STR = "%s %d %s"; /* name uid fp */
+static const int RETURN_SZ = 2;
+
+static const int MAX_ATTEMPTS = 2;
+static const int SLEEP_PERIOD = 1;
+
+static int
+sshd_allowed_in_capi(struct passwd *pw, const char *fp)
+{
+ int allowed = 0;
+ int fd = -1;
+ int blen = 0;
+ int attempts = 0;
+ char *buf = NULL;
+ door_arg_t door_args = {0};
+
+ if (pw == NULL || fp == NULL)
+ return (0);
+
+ blen = snprintf(NULL, 0, REQ_FMT_STR, pw->pw_name, pw->pw_uid, fp) + 1;
+
+ buf = (char *)alloca(blen);
+ if (buf == NULL) {
+ LOG_OOM(blen);
+ return (0);
+ }
+
+ (void) snprintf(buf, blen, REQ_FMT_STR, pw->pw_name, pw->pw_uid, fp);
+ door_args.data_ptr = buf;
+ door_args.data_size = blen;
+
+ door_args.rsize = RETURN_SZ;
+ door_args.rbuf = alloca(RETURN_SZ);
+ if (door_args.rbuf == NULL) {
+ LOG_OOM(RETURN_SZ);
+ return (0);
+ }
+ memset(door_args.rbuf, 0, RETURN_SZ);
+
+ do {
+ fd = open(DOOR, O_RDWR);
+ if (fd < 0)
+ perror("smartplugin: open (of door FD) failed");
+
+ if (door_call(fd, &door_args) < 0) {
+ perror("smartplugin: door_call failed");
+ } else {
+ allowed = atoi(door_args.rbuf);
+ munmap(door_args.rbuf, door_args.rsize);
+ return (allowed);
+ }
+ if (++attempts < MAX_ATTEMPTS)
+ sleep(SLEEP_PERIOD);
+ } while (attempts < MAX_ATTEMPTS);
+
+ return (0);
+}
+
+int
+sshd_user_rsa_key_allowed(struct passwd *pw, RSA *key, const char *fp)
+{
+ return sshd_allowed_in_capi(pw, fp);
+}
+
+int
+sshd_user_dsa_key_allowed(struct passwd *pw, DSA *key, const char *fp)
+{
+ return sshd_allowed_in_capi(pw, fp);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/usr/src/lib/libsmartsshd/i386/Makefile b/usr/src/lib/libsmartsshd/i386/Makefile
new file mode 100644
index 0000000000..2bfe8001d9
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libsocket/inet/getaddrinfo.c b/usr/src/lib/libsocket/inet/getaddrinfo.c
index ec72a9974e..99b769e41e 100644
--- a/usr/src/lib/libsocket/inet/getaddrinfo.c
+++ b/usr/src/lib/libsocket/inet/getaddrinfo.c
@@ -69,6 +69,17 @@ const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
#define AI_MASK (AI_PASSIVE | AI_CANONNAME | AI_NUMERICHOST \
| AI_ADDRCONFIG | AI_NUMERICSERV | AI_V4MAPPED | AI_ALL)
#define ANY 0
+
+/*
+ * This is a private, undocumented, flag that getaddrinfo() uses for
+ * getipnodebyname(). In the case of AI_ADDRCONFIG && AI_V4MAPPED, if there are
+ * no IPv6 addresses, getaddrinfo() should return non-IPv4 mapped addresses. On
+ * the flip side, getipnodebyname() is defined by RFC 2553 to explicitly do so.
+ * Therefore this private flag indicates to getaddrinfo that we shouldn't do
+ * this.
+ */
+#define AI_ADDRINFO 0x8000
+
/* function prototypes for used by getaddrinfo() routine */
static int get_addr(int family, const char *hostname, struct addrinfo *aip,
struct addrinfo *cur, ushort_t port, int version);
@@ -602,7 +613,7 @@ get_addr(int family, const char *hostname, struct addrinfo *aip, struct
/* if hostname argument is literal, name service doesn't get called */
if (family == PF_UNSPEC) {
hp = getipnodebyname(_hostname, AF_INET6, AI_ALL |
- aip->ai_flags | AI_V4MAPPED, &errnum);
+ aip->ai_flags | AI_V4MAPPED | AI_ADDRINFO, &errnum);
} else {
hp = getipnodebyname(_hostname, family, aip->ai_flags, &errnum);
}
diff --git a/usr/src/lib/libumem/amd64/umem_genasm.c b/usr/src/lib/libumem/amd64/umem_genasm.c
index 00cc18ab67..ba68cb2d37 100644
--- a/usr/src/lib/libumem/amd64/umem_genasm.c
+++ b/usr/src/lib/libumem/amd64/umem_genasm.c
@@ -69,8 +69,6 @@
#include <umem_impl.h>
#include "umem_base.h"
-#include <stdio.h>
-
const int umem_genasm_supported = 1;
static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
static size_t umem_genasm_msize = 576;
diff --git a/usr/src/lib/libvnd/Makefile b/usr/src/lib/libvnd/Makefile
new file mode 100644
index 0000000000..1352adb8e4
--- /dev/null
+++ b/usr/src/lib/libvnd/Makefile
@@ -0,0 +1,50 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = libvnd.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+TYPECHECK_LIB = libvnd.so.1
+TYPELIST = \
+ vnd_ioc_attach_t \
+ vnd_ioc_link_t \
+ vnd_ioc_unlink_t \
+ vnd_ioc_buf_t \
+ vnd_ioc_info_t
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS) $(TYPECHECK)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libvnd/Makefile.com b/usr/src/lib/libvnd/Makefile.com
new file mode 100644
index 0000000000..3c6896de11
--- /dev/null
+++ b/usr/src/lib/libvnd/Makefile.com
@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+LIBRARY = libvnd.a
+VERS = .1
+OBJECTS = libvnd.o
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/libvnd/amd64/Makefile b/usr/src/lib/libvnd/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libvnd/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libvnd/common/libvnd.c b/usr/src/lib/libvnd/common/libvnd.c
new file mode 100644
index 0000000000..8972f6cf5a
--- /dev/null
+++ b/usr/src/lib/libvnd/common/libvnd.c
@@ -0,0 +1,550 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdio.h>
+#include <zone.h>
+#include <assert.h>
+#include <sys/sysmacros.h>
+
+#include <sys/vnd.h>
+#include <libvnd.h>
+
+struct vnd_handle {
+ int vh_fd;
+ uint32_t vh_errno;
+ int vh_syserr;
+};
+
+static const char *vnd_strerror_tbl[] = {
+ "no error", /* VND_E_SUCCESS */
+ "not enough memory available", /* VND_E_NOMEM */
+ "no such datalink", /* VND_E_NODATALINK */
+ "datalink not of type DL_ETHER", /* VND_E_NOTETHER */
+ "unknown dlpi failure", /* VND_E_DLPIINVAL */
+ "DL_ATTACH_REQ failed", /* VND_E_ATTACHFAIL */
+ "DL_BIND_REQ failed", /* VND_E_PROMISCFAIL */
+ "DL_PROMISCON_REQ failed", /* VND_E_PROMISCFAIL */
+ "DLD_CAPAB_DIRECT enable failed", /* VND_E_DIRECTFAIL */
+ "bad datalink capability", /* VND_E_CAPACKINVAL */
+ "bad datalink subcapability", /* VND_E_SUBCAPINVAL */
+ "bad dld version", /* VND_E_DLDBADVERS */
+ "failed to create kstats", /* VND_E_KSTATCREATE */
+ "no such vnd link", /* VND_E_NODEV */
+ "netstack doesn't exist", /* VND_E_NONETSTACK */
+ "device already associated", /* VND_E_ASSOCIATED */
+ "device already attached", /* VND_E_ATTACHED */
+ "device already linked", /* VND_E_LINKED */
+ "invalid name", /* VND_E_BADNAME */
+ "permission denied", /* VND_E_PERM */
+ "no such zone", /* VND_E_NOZONE */
+ "failed to initialize vnd stream module", /* VND_E_STRINIT */
+ "device not attached", /* VND_E_NOTATTACHED */
+ "device not linked", /* VND_E_NOTLINKED */
+ "another device has the same link name", /* VND_E_LINKEXISTS */
+ "failed to create minor node", /* VND_E_MINORNODE */
+ "requested buffer size is too large", /* VND_E_BUFTOOBIG */
+ "requested buffer size is too small", /* VND_E_TOOSMALL */
+ "unable to obtain exclusive access to dlpi link, link busy",
+ /* VND_E_DLEXCL */
+ "DLD direct capability not supported over data link",
+ /* VND_E_DIRECTNOTSUP */
+ "invalid property size", /* VND_E_BADPROPSIZE */
+ "invalid property", /* VND_E_BADPROP */
+ "property is read only", /* VND_E_PROPRDONLY */
+ "unexpected system error", /* VND_E_SYS */
+ "capabilities invalid, pass-through module detected",
+ /* VND_E_CAPABPASS */
+ "unknown error" /* VND_E_UNKNOWN */
+};
+
+vnd_errno_t
+vnd_errno(vnd_handle_t *vhp)
+{
+ return (vhp->vh_errno);
+}
+
+const char *
+vnd_strerror(vnd_errno_t err)
+{
+ if (err >= VND_E_UNKNOWN)
+ err = VND_E_UNKNOWN;
+ return (vnd_strerror_tbl[err]);
+}
+
+int
+vnd_syserrno(vnd_handle_t *vhp)
+{
+ return (vhp->vh_syserr);
+}
+
+const char *
+vnd_strsyserror(int err)
+{
+ return (strerror(err));
+}
+
+static int
+vnd_ioc_return(vnd_handle_t *vhp, uint32_t err)
+{
+ if (err != VND_E_SUCCESS) {
+ vhp->vh_errno = err;
+ vhp->vh_syserr = 0;
+ } else {
+ if (errno == EFAULT)
+ abort();
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+ return (-1);
+}
+
+void
+vnd_close(vnd_handle_t *vhp)
+{
+ int ret;
+
+ if (vhp->vh_fd >= 0) {
+ ret = close(vhp->vh_fd);
+ assert(ret == 0);
+ }
+ free(vhp);
+}
+
+static int
+vnd_link(vnd_handle_t *vhp, const char *name)
+{
+ vnd_ioc_link_t vil;
+
+ if (strlen(name) >= VND_NAMELEN) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ (void) strlcpy(vil.vil_name, name, sizeof (vil.vil_name));
+ vil.vil_errno = VND_E_SUCCESS;
+ if (ioctl(vhp->vh_fd, VND_IOC_LINK, &vil) != 0)
+ return (vnd_ioc_return(vhp, vil.vil_errno));
+
+ return (0);
+}
+
+static vnd_handle_t *
+vnd_open_ctl(vnd_errno_t *vnderr, int *syserr)
+{
+ int fd;
+ vnd_handle_t *vhp;
+
+ vhp = malloc(sizeof (vnd_handle_t));
+ if (vhp == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+ bzero(vhp, sizeof (vnd_handle_t));
+
+ fd = open("/dev/vnd/ctl", O_RDWR);
+ if (fd < 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ free(vhp);
+ return (NULL);
+ }
+
+ vhp->vh_fd = fd;
+ return (vhp);
+}
+
+vnd_handle_t *
+vnd_create(const char *zonename, const char *datalink, const char *linkname,
+ vnd_errno_t *vnderr, int *syserr)
+{
+ int ret;
+ vnd_handle_t *vhp;
+ vnd_ioc_attach_t via;
+ zoneid_t zid;
+
+ if (strlen(datalink) >= VND_NAMELEN) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_BADNAME;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+
+ vhp = vnd_open_ctl(vnderr, syserr);
+ if (vhp == NULL)
+ return (NULL); /* errno set for us */
+
+ if (zonename != NULL) {
+ zid = getzoneidbyname(zonename);
+ if (zid == -1) {
+ vnd_close(vhp);
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOZONE;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+ via.via_zoneid = zid;
+ } else {
+ via.via_zoneid = -1;
+ }
+
+ (void) strlcpy(via.via_name, datalink, sizeof (via.via_name));
+ via.via_errno = VND_E_SUCCESS;
+ if (ioctl(vhp->vh_fd, VND_IOC_ATTACH, &via) != 0) {
+ if (via.via_errno != VND_E_SUCCESS) {
+ if (vnderr != NULL)
+ *vnderr = via.via_errno;
+ if (syserr != NULL)
+ *syserr = 0;
+ } else {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ }
+ vnd_close(vhp);
+ return (NULL);
+ }
+
+ ret = vnd_link(vhp, linkname);
+ if (ret != 0) {
+ if (vnderr != NULL)
+ *vnderr = vhp->vh_errno;
+ if (syserr != NULL)
+ *syserr = vhp->vh_syserr;
+ vnd_close(vhp);
+ return (NULL);
+ }
+
+ if (vnderr != NULL)
+ *vnderr = VND_E_SUCCESS;
+ if (syserr != NULL)
+ *syserr = 0;
+
+ return (vhp);
+}
+
+vnd_handle_t *
+vnd_open(const char *zone, const char *link, vnd_errno_t *vnderr, int *syserr)
+{
+ int fd, ret;
+ char path[MAXPATHLEN];
+ vnd_handle_t *vhp;
+
+ if (zone != NULL)
+ ret = snprintf(path, sizeof (path), "/dev/vnd/zone/%s/%s",
+ zone, link);
+ else
+ ret = snprintf(path, sizeof (path), "/dev/vnd/%s", link);
+
+ if (ret >= sizeof (path)) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_BADNAME;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ return (NULL);
+ }
+
+ vhp = malloc(sizeof (vnd_handle_t));
+ if (vhp == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ ret = close(fd);
+ assert(ret == 0);
+ return (NULL);
+ }
+
+ bzero(vhp, sizeof (vnd_handle_t));
+ vhp->vh_fd = fd;
+
+ return (vhp);
+}
+
+int
+vnd_unlink(vnd_handle_t *vhp)
+{
+ vnd_ioc_unlink_t viu;
+ viu.viu_errno = VND_E_SUCCESS;
+
+ if (ioctl(vhp->vh_fd, VND_IOC_UNLINK, &viu) != 0)
+ return (vnd_ioc_return(vhp, viu.viu_errno));
+
+ return (0);
+}
+
+int
+vnd_pollfd(vnd_handle_t *vhp)
+{
+ return (vhp->vh_fd);
+}
+
+int
+vnd_walk(vnd_walk_cb_t func, void *arg, vnd_errno_t *vnderr, int *syserr)
+{
+ vnd_handle_t *vhp;
+ vnd_ioc_list_t vl;
+ vnd_ioc_info_t *viip;
+ int i, ret;
+
+ vl.vl_nents = 0;
+ vl.vl_ents = NULL;
+
+ vhp = vnd_open_ctl(vnderr, syserr);
+ if (vhp == NULL)
+ return (-1); /* errno is set for us */
+
+ /* VND_IOC_LIST only returns generic errnos */
+ if (ioctl(vhp->vh_fd, VND_IOC_LIST, &vl) != 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ (void) vnd_ioc_return(vhp, VND_E_SUCCESS);
+ vnd_close(vhp);
+
+ return (-1);
+ }
+
+ if (vl.vl_actents == 0) {
+ vnd_close(vhp);
+ return (0);
+ }
+
+ viip = malloc(sizeof (vnd_ioc_info_t) * vl.vl_actents);
+ if (viip == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ vnd_close(vhp);
+ return (-1);
+ }
+
+ vl.vl_nents = vl.vl_actents;
+ vl.vl_ents = viip;
+
+ if (ioctl(vhp->vh_fd, VND_IOC_LIST, &vl) != 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ (void) vnd_ioc_return(vhp, VND_E_SUCCESS);
+ free(viip);
+ vnd_close(vhp);
+ return (-1);
+ }
+
+ ret = 0;
+ for (i = 0; i < MIN(vl.vl_nents, vl.vl_actents); i++) {
+ if (func((vnd_info_t *)(viip + i), arg) != 0) {
+ ret = 1;
+ break;
+ }
+ }
+
+ free(viip);
+ vnd_close(vhp);
+
+ return (ret);
+}
+
+static int
+vnd_prop_readonly(vnd_handle_t *vhp)
+{
+ vhp->vh_syserr = 0;
+ vhp->vh_errno = VND_E_PROPRDONLY;
+ return (-1);
+}
+
+/*ARGSUSED*/
+static int
+vnd_prop_getbuf(vnd_handle_t *vhp, int cmd, void *buf, size_t len)
+{
+ vnd_ioc_buf_t vib;
+ vnd_prop_buf_t *vpbp = (vnd_prop_buf_t *)buf;
+ vib.vib_errno = 0;
+
+ if (ioctl(vhp->vh_fd, cmd, &vib) != 0)
+ return (vnd_ioc_return(vhp, vib.vib_errno));
+
+ vpbp->vpb_size = vib.vib_size;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+vnd_prop_setbuf(vnd_handle_t *vhp, int cmd, void *buf, size_t len)
+{
+ vnd_ioc_buf_t vib;
+ vnd_prop_buf_t *vpbp = (vnd_prop_buf_t *)buf;
+
+ vib.vib_errno = 0;
+ vib.vib_size = vpbp->vpb_size;
+ if (ioctl(vhp->vh_fd, cmd, &vib) != 0)
+ return (vnd_ioc_return(vhp, vib.vib_errno));
+
+ return (0);
+}
+
+typedef int (*vpt_prop_f)(vnd_handle_t *, int, void *, size_t);
+typedef struct vnd_prop_tab {
+ vnd_prop_t vpt_prop;
+ size_t vpt_size;
+ int vpt_ioctl_get;
+ int vpt_ioctl_set;
+ vpt_prop_f vpt_get;
+ vpt_prop_f vpt_set;
+} vnd_prop_tab_t;
+
+static vnd_prop_tab_t vnd_props[] = {
+ { VND_PROP_RXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF, vnd_prop_getbuf, vnd_prop_setbuf},
+ { VND_PROP_TXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF, vnd_prop_getbuf, vnd_prop_setbuf },
+ { VND_PROP_MAXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETMAXBUF,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MINTU, sizeof (vnd_prop_buf_t), VND_IOC_GETMINTU,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MAXTU, sizeof (vnd_prop_buf_t), VND_IOC_GETMAXTU,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MAX }
+};
+
+static int
+vnd_prop(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len,
+ boolean_t get)
+{
+ vnd_prop_tab_t *vpt;
+
+ for (vpt = vnd_props; vpt->vpt_prop != VND_PROP_MAX; vpt++) {
+ if (vpt->vpt_prop != prop)
+ continue;
+
+ if (len != vpt->vpt_size) {
+ vhp->vh_errno = VND_E_BADPROPSIZE;
+ vhp->vh_syserr = 0;
+ return (-1);
+ }
+
+ if (get == B_TRUE) {
+ return (vpt->vpt_get(vhp, vpt->vpt_ioctl_get, buf,
+ len));
+ } else {
+ if (vpt->vpt_set == NULL)
+ return (vnd_prop_readonly(vhp));
+ return (vpt->vpt_set(vhp, vpt->vpt_ioctl_set, buf,
+ len));
+ }
+ }
+
+ vhp->vh_errno = VND_E_BADPROP;
+ vhp->vh_syserr = 0;
+ return (-1);
+}
+
+int
+vnd_prop_get(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len)
+{
+ return (vnd_prop(vhp, prop, buf, len, B_TRUE));
+}
+
+int
+vnd_prop_set(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len)
+{
+ return (vnd_prop(vhp, prop, buf, len, B_FALSE));
+}
+
+int
+vnd_prop_writeable(vnd_prop_t prop, boolean_t *write)
+{
+ vnd_prop_tab_t *vpt;
+
+ for (vpt = vnd_props; vpt->vpt_prop != VND_PROP_MAX; vpt++) {
+ if (vpt->vpt_prop != prop)
+ continue;
+
+ *write = (vpt->vpt_set != NULL);
+ return (0);
+ }
+
+ return (-1);
+}
+
+int
+vnd_prop_iter(vnd_handle_t *vhp, vnd_prop_iter_f func, void *arg)
+{
+ int i;
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ if (func(vhp, i, arg) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+int
+vnd_frameio_read(vnd_handle_t *vhp, frameio_t *fiop)
+{
+ int ret;
+
+ ret = ioctl(vhp->vh_fd, VND_IOC_FRAMEIO_READ, fiop);
+ if (ret == -1) {
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+
+ return (ret);
+}
+
+int
+vnd_frameio_write(vnd_handle_t *vhp, frameio_t *fiop)
+{
+ int ret;
+
+ ret = ioctl(vhp->vh_fd, VND_IOC_FRAMEIO_WRITE, fiop);
+ if (ret == -1) {
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+
+ return (ret);
+}
diff --git a/usr/src/lib/libvnd/common/libvnd.h b/usr/src/lib/libvnd/common/libvnd.h
new file mode 100644
index 0000000000..ea92f113b6
--- /dev/null
+++ b/usr/src/lib/libvnd/common/libvnd.h
@@ -0,0 +1,84 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBVND_H
+#define _LIBVND_H
+
+/*
+ * libvnd interfaces
+ */
+
+#include <stdint.h>
+#include <sys/vnd_errno.h>
+#include <sys/frameio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBVND_NAMELEN 32
+
+typedef struct vnd_handle vnd_handle_t;
+
+extern vnd_handle_t *vnd_create(const char *, const char *, const char *,
+ vnd_errno_t *, int *);
+extern vnd_handle_t *vnd_open(const char *, const char *, vnd_errno_t *, int *);
+extern int vnd_unlink(vnd_handle_t *);
+extern void vnd_close(vnd_handle_t *);
+extern vnd_errno_t vnd_errno(vnd_handle_t *);
+extern int vnd_syserrno(vnd_handle_t *);
+extern const char *vnd_strerror(vnd_errno_t);
+extern const char *vnd_strsyserror(int);
+
+extern int vnd_pollfd(vnd_handle_t *);
+
+typedef struct vnd_info {
+ uint32_t vi_version;
+ zoneid_t vi_zone;
+ char vi_name[LIBVND_NAMELEN];
+ char vi_datalink[LIBVND_NAMELEN];
+} vnd_info_t;
+
+typedef int (*vnd_walk_cb_t)(vnd_info_t *, void *);
+extern int vnd_walk(vnd_walk_cb_t, void *, vnd_errno_t *, int *);
+
+typedef enum vnd_prop {
+ VND_PROP_RXBUF = 0,
+ VND_PROP_TXBUF,
+ VND_PROP_MAXBUF,
+ VND_PROP_MINTU,
+ VND_PROP_MAXTU,
+ VND_PROP_MAX
+} vnd_prop_t;
+
+typedef struct vnd_prop_buf {
+ uint64_t vpb_size;
+} vnd_prop_buf_t;
+
+extern int vnd_prop_get(vnd_handle_t *, vnd_prop_t, void *, size_t);
+extern int vnd_prop_set(vnd_handle_t *, vnd_prop_t, void *, size_t);
+extern int vnd_prop_writeable(vnd_prop_t, boolean_t *);
+
+typedef int (*vnd_prop_iter_f)(vnd_handle_t *, vnd_prop_t, void *);
+extern int vnd_prop_iter(vnd_handle_t *, vnd_prop_iter_f, void *);
+
+extern int vnd_frameio_read(vnd_handle_t *, frameio_t *);
+extern int vnd_frameio_write(vnd_handle_t *, frameio_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVND_H */
diff --git a/usr/src/lib/libvnd/common/llib-lvnd b/usr/src/lib/libvnd/common/llib-lvnd
new file mode 100644
index 0000000000..80a4229e32
--- /dev/null
+++ b/usr/src/lib/libvnd/common/llib-lvnd
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libvnd.h>
diff --git a/usr/src/lib/libvnd/common/mapfile-vers b/usr/src/lib/libvnd/common/mapfile-vers
new file mode 100644
index 0000000000..0eb862ab60
--- /dev/null
+++ b/usr/src/lib/libvnd/common/mapfile-vers
@@ -0,0 +1,55 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+#
+# TODO When this makes it into illumos we should make it a public interface
+#
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ vnd_create;
+ vnd_close;
+ vnd_errno;
+ vnd_frameio_read;
+ vnd_frameio_write;
+ vnd_open;
+ vnd_pollfd;
+ vnd_prop_get;
+ vnd_prop_iter;
+ vnd_prop_set;
+ vnd_prop_writeable;
+ vnd_strerror;
+ vnd_strsyserror;
+ vnd_syserrno;
+ vnd_unlink;
+ vnd_walk;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libvnd/i386/Makefile b/usr/src/lib/libvnd/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libvnd/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libwanboot/Makefile.com b/usr/src/lib/libwanboot/Makefile.com
index ed69ec0b8a..1e6310f9ba 100644
--- a/usr/src/lib/libwanboot/Makefile.com
+++ b/usr/src/lib/libwanboot/Makefile.com
@@ -60,7 +60,7 @@ include ../../Makefile.lib
LIBS += $(LINTLIB)
LDLIBS += -lnvpair -lresolv -lnsl -lsocket -ldevinfo -ldhcputil \
- -linetutil -lc -lcrypto -lssl
+ -linetutil -lc -lsunw_crypto -lsunw_ssl
CPPFLAGS = -I$(SRC)/common/net/wanboot/crypt $(CPPFLAGS.master)
CERRWARN += -_gcc=-Wno-switch
CERRWARN += -_gcc=-Wno-parentheses
diff --git a/usr/src/lib/libzdoor/Makefile b/usr/src/lib/libzdoor/Makefile
new file mode 100644
index 0000000000..50d9545ef1
--- /dev/null
+++ b/usr/src/lib/libzdoor/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.lib
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+_msg:
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libzdoor/Makefile.com b/usr/src/lib/libzdoor/Makefile.com
new file mode 100644
index 0000000000..09bde0f1b4
--- /dev/null
+++ b/usr/src/lib/libzdoor/Makefile.com
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY= libzdoor.a
+VERS= .1
+OBJECTS= zdoor.o \
+ zdoor-int.o \
+ ztree.o \
+ zerror.o
+
+include ../../Makefile.lib
+include ../../Makefile.rootfs
+
+SRCDIR = ../common
+SRCS = $(OBJECTS:%.o=$(SRCDIR)/%.c)
+
+CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -D_FILE_OFFSET_BITS=64
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lzonecfg -lcontract
+
+$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/libzdoor/amd64/Makefile b/usr/src/lib/libzdoor/amd64/Makefile
new file mode 100644
index 0000000000..31be0ef7e6
--- /dev/null
+++ b/usr/src/lib/libzdoor/amd64/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libzdoor/common/llib-lzdoor b/usr/src/lib/libzdoor/common/llib-lzdoor
new file mode 100644
index 0000000000..a21a904b11
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/llib-lzdoor
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*LINTLIBRARY*/
+/*PROTOLIB1*/
+/*
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <zdoor.h>
diff --git a/usr/src/lib/libzdoor/common/mapfile-vers b/usr/src/lib/libzdoor/common/mapfile-vers
new file mode 100644
index 0000000000..38eba57ee2
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/mapfile-vers
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate_1.1 {
+ global:
+ zdoor_handle_init;
+ zdoor_handle_destroy;
+ zdoor_open;
+ zdoor_close;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libzdoor/common/zdoor-int.c b/usr/src/lib/libzdoor/common/zdoor-int.c
new file mode 100644
index 0000000000..f77c1453d4
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor-int.c
@@ -0,0 +1,324 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/fork.h>
+#include <libcontract.h>
+#include <libzonecfg.h>
+#include <sys/contract/process.h>
+#include <sys/ctfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "zdoor-int.h"
+#include "zerror.h"
+
+#define ZDOOR_FMT_STR "/var/tmp/.%s"
+
+
+static int
+init_template(void)
+{
+ int fd = 0;
+ int err = 0;
+
+ fd = open64(CTFS_ROOT "/process/template", O_RDWR);
+ if (fd == -1)
+ return (-1);
+
+ err |= ct_tmpl_set_critical(fd, 0);
+ err |= ct_tmpl_set_informative(fd, 0);
+ err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
+ err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
+ if (err || ct_tmpl_activate(fd)) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+static int
+contract_latest(ctid_t *id)
+{
+ int cfd = 0;
+ int r = 0;
+ ct_stathdl_t st = {0};
+ ctid_t result = {0};
+
+ if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
+ return (errno);
+ if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
+ (void) close(cfd);
+ return (r);
+ }
+
+ result = ct_status_get_id(st);
+ ct_status_free(st);
+ (void) close(cfd);
+
+ *id = result;
+ return (0);
+}
+
+static int
+close_on_exec(int fd)
+{
+ int flags = fcntl(fd, F_GETFD, 0);
+ if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
+ return (0);
+ return (-1);
+}
+
+static int
+contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
+{
+ char path[PATH_MAX];
+ int n = 0;
+ int fd = 0;
+
+ if (type == NULL)
+ type = "all";
+
+ n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
+ if (n >= sizeof (path)) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ fd = open64(path, oflag);
+ if (fd != -1) {
+ if (close_on_exec(fd) == -1) {
+ int err = errno;
+ (void) close(fd);
+ errno = err;
+ return (-1);
+ }
+ }
+ return (fd);
+}
+
+static int
+contract_abandon_id(ctid_t ctid)
+{
+ int fd = 0;
+ int err = 0;
+
+ fd = contract_open(ctid, "all", "ctl", O_WRONLY);
+ if (fd == -1)
+ return (errno);
+
+ err = ct_ctl_abandon(fd);
+ (void) close(fd);
+
+ return (err);
+}
+
+/*
+ * zdoor_fattach(zone,service,door,detach_only) is heavily borrowed from
+ * zonestatd. Basically this forks, zone_enter's the targeted zone,
+ * fattaches to /var/tmp/.<service> with the door you've opened.
+ * detach_only gets passed in on door_stop to fdetach in the targeted zone.
+ * Note that this code really does require all the contract calls, which are
+ * all the static functions preceding this (have a look at zone_enter; without
+ * that code zone_enter will kick back EINVAL).
+ */
+int
+zdoor_fattach(zoneid_t zoneid, const char *service, int door, int detach_only)
+{
+ int fd = 0;
+ int len = 0;
+ int pid = 0;
+ int stat = 0;
+ int tmpl_fd = 0;
+ char path[MAXPATHLEN] = {0};
+ ctid_t ct = -1;
+
+ if (zoneid < 0) {
+ zdoor_debug("zdoor_fattach: zoneid < 0");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if (service == NULL) {
+ zdoor_debug("zdoor_fattach: NULL service");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if ((tmpl_fd = init_template()) < 0) {
+ zdoor_warn("zdoor_fattach: init contract for %d:%s failed",
+ zoneid, service);
+ return (ZDOOR_ERROR);
+ }
+
+ len = snprintf(NULL, 0, ZDOOR_FMT_STR, service) + 1;
+ if (len > MAXPATHLEN)
+ return (ZDOOR_ARGS_ERROR);
+ (void) snprintf(path, len, ZDOOR_FMT_STR, service);
+
+ zdoor_info("zdoor_fattach: ensuring %s", path);
+
+ pid = fork();
+ if (pid < 0) {
+ (void) ct_tmpl_clear(tmpl_fd);
+ zdoor_error("zdoor_fattach: unable to fork for zone_enter: %s",
+ strerror(errno));
+ return (ZDOOR_OK);
+ }
+
+ if (pid == 0) {
+ zdoor_debug("zdoor_fattach(CHILD): starting");
+ (void) ct_tmpl_clear(tmpl_fd);
+ (void) close(tmpl_fd);
+ if (zone_enter(zoneid) != 0) {
+ zdoor_debug("zdoor_fattach(CHILD): zone_enter fail %s",
+ strerror(errno));
+ if (errno == EINVAL) {
+ _exit(0);
+ }
+ _exit(1);
+ }
+ (void) fdetach(path);
+ (void) unlink(path);
+ if (detach_only) {
+ zdoor_debug("zdoor_fattach(CHILD): detach only, done");
+ _exit(0);
+ }
+ fd = open(path, O_CREAT|O_RDWR, 0644);
+ if (fd < 0) {
+ zdoor_debug("zdoor_fattach(CHILD): open failed: %s",
+ strerror(errno));
+ _exit(2);
+ }
+ if (fattach(door, path) != 0) {
+ zdoor_debug("zdoor_fattach(CHILD): fattach failed: %s",
+ strerror(errno));
+ _exit(3);
+ }
+ _exit(0);
+ }
+ if (contract_latest(&ct) == -1)
+ ct = -1;
+ (void) ct_tmpl_clear(tmpl_fd);
+ (void) close(tmpl_fd);
+ (void) contract_abandon_id(ct);
+
+ zdoor_debug("zdoor_fattach: waiting for child...");
+ while (waitpid(pid, &stat, 0) != pid)
+ ;
+ if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0) {
+ zdoor_debug(" child exited with success");
+ zdoor_debug("zdoor_fattach: returning ZDOOR_OK");
+ return (ZDOOR_OK);
+ }
+
+ zdoor_debug(" child exited with %d", WEXITSTATUS(stat));
+ zdoor_debug("zdoor_fattach: returning ZDOOR_ERROR");
+ return (ZDOOR_ERROR);
+}
+
+/*
+ * zdoor_zone_is_running(zone) returns 1 if the specified zone is running, or 0
+ * if it is any other state. It additionally eats any other errors it
+ * encounters and returns 0 upon encountering them.
+ */
+boolean_t
+zdoor_zone_is_running(zoneid_t zoneid)
+{
+ zone_state_t state;
+ char zone[ZONENAME_MAX];
+ if (zoneid < 0)
+ return (B_FALSE);
+
+ if (getzonenamebyid(zoneid, zone, ZONENAME_MAX) < 0)
+ return (B_FALSE);
+
+ if (!zone_get_state((char *)zone, &state) == Z_OK)
+ return (B_FALSE);
+
+ return (state == ZONE_STATE_RUNNING);
+}
+
+/*
+ * zdoor_cookie_create simply allocates and initializes
+ * memory. Returns NULL on any error.
+ */
+zdoor_cookie_t *
+zdoor_cookie_create(const char *zonename, const char *service,
+const void *biscuit)
+{
+ zdoor_cookie_t *cookie = NULL;
+
+ if (zonename == NULL || service == NULL)
+ return (NULL);
+
+ cookie = (zdoor_cookie_t *)calloc(1, sizeof (zdoor_cookie_t));
+ if (cookie == NULL) {
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+ cookie->zdc_biscuit = (void *)biscuit;
+ cookie->zdc_zonename = strdup((char *)zonename);
+ if (cookie->zdc_zonename == NULL) {
+ zdoor_cookie_free(cookie);
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+ cookie->zdc_service = strdup((char *)service);
+ if (cookie->zdc_service == NULL) {
+ zdoor_cookie_free(cookie);
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+
+ return (cookie);
+}
+
+/*
+ * zdoor_cookie_free(cookie) cleans up any memory associated with the
+ * specified cookie.
+ */
+void
+zdoor_cookie_free(zdoor_cookie_t *cookie)
+{
+ if (cookie == NULL)
+ return;
+
+ if (cookie->zdc_zonename != NULL) {
+ free(cookie->zdc_zonename);
+ cookie->zdc_zonename = NULL;
+ }
+
+ if (cookie->zdc_service != NULL) {
+ free(cookie->zdc_service);
+ cookie->zdc_service = NULL;
+ }
+
+ free(cookie);
+}
diff --git a/usr/src/lib/libzdoor/common/zdoor-int.h b/usr/src/lib/libzdoor/common/zdoor-int.h
new file mode 100644
index 0000000000..782452c426
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor-int.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZDOOR_INT_H
+#define _ZDOOR_INT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <pthread.h>
+#include <zdoor.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum zdoor_action_t {
+ ZDOOR_ACTION_NOOP,
+ ZDOOR_ACTION_STOP,
+ ZDOOR_ACTION_START
+} zdoor_action_t;
+
+struct zdoor_handle {
+ pthread_mutex_t zdh_lock;
+ void *zdh_zonecfg_handle;
+ void *zdh_ztree;
+};
+
+zdoor_cookie_t *zdoor_cookie_create(const char *zonename, const char *service,
+ const void *biscuit);
+
+void zdoor_cookie_free(zdoor_cookie_t *cookie);
+
+boolean_t zdoor_zone_is_running(zoneid_t zoneid);
+
+int zdoor_fattach(zoneid_t zoneid, const char *service, int door,
+ int detach_only);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZDOOR_INT_H */
diff --git a/usr/src/lib/libzdoor/common/zdoor.c b/usr/src/lib/libzdoor/common/zdoor.c
new file mode 100644
index 0000000000..f2996b4e2d
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor.c
@@ -0,0 +1,437 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libzonecfg.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stropts.h>
+#include <unistd.h>
+#include <zdoor.h>
+#include <zone.h>
+
+#include "zdoor-int.h"
+#include "zerror.h"
+#include "ztree.h"
+
+extern void *
+zonecfg_notify_bind(int(*func)(const char *zonename, zoneid_t zid,
+ const char *newstate, const char *oldstate,
+ hrtime_t when, void *p), void *p);
+
+extern void
+zonecfg_notify_unbind(void *handle);
+
+/*
+ * _callback(cookie, door_args...) is our private function that we tell
+ * the Solaris door API about. This function does some sanity checking on
+ * arguments and issues a callback to the owner of this door. That API
+ * will return us memory that needs to be sent back to the client on the
+ * other end of the door, but since the door_return API never gives you
+ * back control of the function, this does a simple alloca/memcpy and
+ * frees up the memory pointed to by the parent. While this really doesn't
+ * let a client do much other than pass a simple struct of primitives (or
+ * more likely more common a char *), that's the way the door API works,
+ * and so this isn't really imposing any restriction that didn't already
+ * need to be dealt with by someone. This is why the zdoor_result structure
+ * takes a char *, rather than a void * for the data pointer.
+ */
+static void
+_callback(void *cookie, char *argp, size_t arg_size, door_desc_t *dp,
+ uint_t n_desc)
+{
+ zdoor_result_t *result = NULL;
+ void *door_response = NULL;
+ int size = 0;
+ dtree_entry_t *entry = (dtree_entry_t *)cookie;
+
+ if (entry == NULL) {
+ zdoor_warn("_callback: NULL cookie? door_returning");
+ (void) door_return(NULL, 0, NULL, 0);
+ }
+
+ (void) pthread_mutex_lock(&entry->dte_parent->zte_parent->zdh_lock);
+ zdoor_debug("_callback: calling back with %p", entry->dte_cookie);
+ result = entry->dte_callback(entry->dte_cookie, argp, arg_size);
+ zdoor_debug("_callback: app callback returned %p", result);
+ (void) pthread_mutex_unlock(&entry->dte_parent->zte_parent->zdh_lock);
+
+ if (result == NULL) {
+ zdoor_debug("_callback: door_returning NULL");
+ (void) door_return(NULL, 0, NULL, 0);
+ }
+
+ if (result->zdr_data != NULL && result->zdr_size > 0) {
+ door_response = alloca(result->zdr_size);
+ if (door_response != NULL) {
+ size = result->zdr_size;
+ (void) memcpy(door_response,
+ (void *) result->zdr_data, size);
+ }
+ }
+
+ if (result->zdr_data != NULL)
+ free(result->zdr_data);
+ free(result);
+
+ zdoor_debug("_callback: door_returning %p, %d", door_response, size);
+ (void) door_return(door_response, size, NULL, 0);
+}
+
+static void
+zdoor_stop(dtree_entry_t *entry)
+{
+ zoneid_t zid = -1;
+
+ if (entry == NULL) {
+ zdoor_debug("zdoor_stop: NULL arguments");
+ return;
+ }
+
+ zdoor_debug("zdoor_stop: entry=%p, zone=%s, service=%s",
+ entry, entry->dte_parent->zte_zonename, entry->dte_service);
+
+ zid = getzoneidbyname(entry->dte_parent->zte_zonename);
+ (void) zdoor_fattach(zid, entry->dte_service, entry->dte_door, 1);
+ (void) door_revoke(entry->dte_door);
+ entry->dte_door = -1;
+
+ zdoor_debug("zdoor_stop returning");
+}
+
+/*
+ * zdoor_create is called both by the main API
+ * call zdoor_open, as well as by the zone_monitor code upon a zone restart
+ * (assuming it already has a door in it). This code assumes that the
+ * permissions were correct (e.g., the target door is not a GZ, that this
+ * program is being run out of the GZ), but does not assume that the target
+ * door file has not changed out from under us, so that is explicitly rechecked.
+ *
+ * This also assumes the parent has already locked handle.
+ */
+static int
+zdoor_create(dtree_entry_t *entry)
+{
+ int status = ZDOOR_OK;
+ zoneid_t zid = -1;
+ zdoor_handle_t handle = NULL;
+
+ if (entry == NULL) {
+ zdoor_debug("zdoor_create: NULL arguments");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ zdoor_debug("zdoor_create: entry=%p, zone=%s, service=%s",
+ entry, entry->dte_parent->zte_zonename, entry->dte_service);
+
+ handle = entry->dte_parent->zte_parent;
+
+ zid = getzoneidbyname(entry->dte_parent->zte_zonename);
+ if (zid < 0) {
+ zdoor_info("zdoor_create: %s is a non-existient zone",
+ entry->dte_parent->zte_zonename);
+ return (ZDOOR_ERROR);
+ }
+ if (!zdoor_zone_is_running(zid)) {
+ zdoor_debug("zdoor_create: %s is not running",
+ entry->dte_parent->zte_zonename);
+ return (ZDOOR_ZONE_NOT_RUNNING);
+ }
+
+ entry->dte_door = door_create(_callback, entry, 0);
+ zdoor_info("zdoor_create: door_create returned %d", entry->dte_door);
+ if (entry->dte_door < 0) {
+ zdoor_stop(entry);
+ return (ZDOOR_ERROR);
+ }
+
+ status = zdoor_fattach(zid, entry->dte_service, entry->dte_door, 0);
+
+ zdoor_debug("zdoor_create: returning %d", status);
+ return (status);
+}
+
+
+/*
+ * door_visitor(entry) is a callback from the ztree code that checks whether
+ * or not we should be taking some action on a given door. Note that the
+ * callpath to this API is:
+ * SYSTEM ->
+ * zone_monitor ->
+ * ztree_walk ->
+ * door_visitor
+ *
+ * Which is important to note that this API assumes that all things needing
+ * locking are locked by a parent caller (which is the zone_monitor).
+ */
+static void
+zdoor_visitor(dtree_entry_t *entry)
+{
+ if (entry == NULL) {
+ zdoor_info("zdoor_visitor: entered with NULL entry");
+ return;
+ }
+
+ zdoor_debug("zdoor_visitor: entered for entry=%p, service=%s",
+ entry, entry->dte_service);
+
+ if (entry->dte_parent->zte_action == ZDOOR_ACTION_STOP) {
+ zdoor_debug(" stopping zdoor");
+ zdoor_stop(entry);
+ } else if (entry->dte_parent->zte_action == ZDOOR_ACTION_START) {
+ zdoor_debug(" starting zdoor");
+ if (zdoor_create(entry) != ZDOOR_OK) {
+ zdoor_error("door_visitor: Unable to restart zdoor\n");
+ }
+ }
+}
+
+/*
+ * zone_monitor(zonename, zid, newstate, oldstate, when, cookie) is our
+ * registered callback with libzonecfg to notify us of any changes to a
+ * given zone. This activates a walk on all doors for a zone iff the state
+ * is changing from running or into running.
+ */
+static int
+zone_monitor(const char *zonename, zoneid_t zid, const char *newstate,
+ const char *oldstate, hrtime_t when, void *p)
+{
+ zdoor_handle_t handle = (zdoor_handle_t)p;
+ ztree_entry_t *entry = NULL;
+
+ if (handle == NULL) {
+ zdoor_warn("zone_monitor: entered with NULL handle?");
+ return (-1);
+ }
+
+ zdoor_info("zone_monitor: zone=%s, zid=%d, newst=%s, oldst=%s, p=%p",
+ zonename, zid, newstate, oldstate, p);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ entry = ztree_zone_find(handle, zonename);
+ if (entry != NULL) {
+ zdoor_debug(" found entry in ztree");
+ entry->zte_action = ZDOOR_ACTION_NOOP;
+ if (strcmp("running", newstate) == 0) {
+ if (strcmp("ready", oldstate) == 0)
+ entry->zte_action = ZDOOR_ACTION_START;
+ } else if (strcmp("shutting_down", newstate) == 0) {
+ if (strcmp("running", oldstate) == 0)
+ entry->zte_action = ZDOOR_ACTION_STOP;
+ }
+ zdoor_debug(" set state to: %d", entry->zte_action);
+ if (entry->zte_action != ZDOOR_ACTION_NOOP)
+ ztree_walk_doors(handle, zonename);
+ }
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+
+ zdoor_info("zone_monitor: returning");
+ return (0);
+}
+
+zdoor_handle_t
+zdoor_handle_init()
+{
+ zdoor_handle_t handle = NULL;
+
+ zdoor_debug("zdoor_handle_init entered");
+
+ handle = (zdoor_handle_t)calloc(1, sizeof (struct zdoor_handle));
+ if (handle == NULL) {
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+
+ (void) pthread_mutex_init(&(handle->zdh_lock), NULL);
+ handle->zdh_zonecfg_handle = zonecfg_notify_bind(zone_monitor, handle);
+ if (handle->zdh_zonecfg_handle == NULL) {
+ zdoor_error("zonecfg_notify_bind failure: %s", strerror(errno));
+ return (NULL);
+ }
+
+ zdoor_debug("zdoor_handle_init returning %p", handle);
+ return (handle);
+}
+
+void
+zdoor_handle_destroy(zdoor_handle_t handle)
+{
+ if (handle == NULL) {
+ zdoor_debug("zdoor_handle_destroy: NULL arguments");
+ return;
+ }
+
+ zdoor_debug("zdoor_handle_destroy: handle=%p", handle);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ zonecfg_notify_unbind(handle->zdh_zonecfg_handle);
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+ (void) pthread_mutex_destroy(&(handle->zdh_lock));
+ free(handle);
+}
+
+/*
+ * zdoor_open(zone, service, biscuit, callback) is the main public facing API in
+ * libzdoor. It will open a door with the name .[service] under
+ * [zonepath]/root/var/tmp, where [zonepath] is resolved on the fly. Note this
+ * API can only be invoked from the global zone, and will not allow you to open
+ * a zdoor in the global zone.
+ */
+int
+zdoor_open(zdoor_handle_t handle, const char *zonename, const char *service,
+ void *biscuit, zdoor_callback callback)
+{
+ zdoor_cookie_t *zdoor_cookie = NULL;
+ int rc = -1;
+ int status = ZDOOR_OK;
+ zoneid_t zid = -1;
+ dtree_entry_t *entry = NULL;
+
+ if (handle == NULL || zonename == NULL ||
+ service == NULL || callback == NULL) {
+ zdoor_debug("zdoor_open: NULL arguments");
+ return (ZDOOR_ARGS_ERROR);
+ }
+ zdoor_debug("zdoor_open: entered: handle=%p, zone=%s, service=%s",
+ handle, zonename, service);
+
+ if (getzoneid() != GLOBAL_ZONEID) {
+ zdoor_warn("zdoor_open: not invoked from global zone");
+ return (ZDOOR_NOT_GLOBAL_ZONE);
+ }
+
+
+ zid = getzoneidbyname(zonename);
+ if (zid < 0) {
+ zdoor_info("zdoor_open: %s is a non-existent zone", zonename);
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if (zid == GLOBAL_ZONEID) {
+ zdoor_warn("zdoor_open: zdoors not allowed in global zone");
+ return (ZDOOR_ZONE_FORBIDDEN);
+ }
+
+ if (!zdoor_zone_is_running(zid)) {
+ zdoor_info("zdoor_open: %s is not running", zonename);
+ return (ZDOOR_ZONE_NOT_RUNNING);
+ }
+
+ zdoor_cookie = zdoor_cookie_create(zonename, service, biscuit);
+ if (zdoor_cookie == NULL) {
+ OUT_OF_MEMORY();
+ return (ZDOOR_OUT_OF_MEMORY);
+ }
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ rc = ztree_zone_add(handle, zonename, zdoor_visitor);
+ if (rc != ZTREE_SUCCESS && rc != ZTREE_ALREADY_EXISTS) {
+ zdoor_debug("zdoor_open: unable to add zone to ztree: %d", rc);
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+ rc = ztree_door_add(handle, zonename, service, callback,
+ zdoor_cookie);
+ if (rc != ZTREE_SUCCESS) {
+ zdoor_debug("zdoor_open: unable to add door to ztree: %d", rc);
+ if (rc == ZTREE_ALREADY_EXISTS) {
+ zdoor_warn("service %s already has a zdoor", service);
+ }
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+
+ entry = ztree_door_find(handle, zonename, service);
+ if (entry == NULL) {
+ zdoor_debug("zdoor_open: unable to find door in ztree?");
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+ if (zdoor_create(entry) != ZDOOR_OK) {
+ zdoor_info("zdoor_open: zdoor_create failed.");
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+out:
+ if (status != ZDOOR_OK) {
+ zdoor_debug("zdoor_open: status not ok, stopping and cleaning");
+ zdoor_stop(entry);
+ ztree_door_remove(handle, entry);
+ zdoor_cookie_free(zdoor_cookie);
+ }
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+ zdoor_debug("zdoor_open: returning %d", status);
+ return (status);
+}
+
+/*
+ * zdoor_close(zone, service) unregisters a previously created zdoor, and
+ * returns the biscuit provided at creation time, so the caller can free it.
+ * Returns NULL on any error.
+ */
+void *
+zdoor_close(zdoor_handle_t handle, const char *zonename, const char *service)
+{
+ dtree_entry_t *entry = NULL;
+ zdoor_cookie_t *cookie = NULL;
+ void *biscuit = NULL;
+
+ if (handle == NULL || zonename == NULL || service == NULL) {
+ zdoor_debug("zdoor_close: NULL arguments");
+ return (NULL);
+ }
+
+ zdoor_debug("zdoor_close: entered handle=%p, zone=%s, service=%s",
+ handle, zonename, service);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+
+ entry = ztree_door_find(handle, zonename, service);
+ if (entry != NULL) {
+ zdoor_debug("zdoor_close: found door in ztree, stopping");
+ zdoor_stop(entry);
+ cookie = ztree_door_remove(handle, entry);
+ if (cookie != NULL) {
+ biscuit = cookie->zdc_biscuit;
+ zdoor_cookie_free(cookie);
+ }
+ } else {
+ zdoor_debug("zdoor_close: didn't find door in ztree");
+ }
+
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+
+ zdoor_debug("zdoor_close: returning %p", biscuit);
+ return (biscuit);
+}
diff --git a/usr/src/lib/libzdoor/common/zerror.c b/usr/src/lib/libzdoor/common/zerror.c
new file mode 100644
index 0000000000..5ccb449e1b
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zerror.c
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zerror.h"
+
+static const char *PREFIX = "%s ZDOOR:%s:T(%d): ";
+
+static const char *DEBUG_ENV_VAR = "ZDOOR_TRACE";
+
+static boolean_t
+is_debug_enabled()
+{
+ boolean_t enabled = B_FALSE;
+ const char *_envp = getenv(DEBUG_ENV_VAR);
+ if (_envp != NULL && atoi(_envp) >= 2)
+ enabled = B_TRUE;
+
+ return (enabled);
+}
+
+static boolean_t
+is_info_enabled()
+{
+ boolean_t enabled = B_FALSE;
+ const char *_envp = getenv(DEBUG_ENV_VAR);
+ if (_envp != NULL && atoi(_envp) >= 1)
+ enabled = B_TRUE;
+
+ return (enabled);
+}
+
+void
+zdoor_debug(const char *fmt, ...)
+{
+ va_list alist;
+
+ if (!is_debug_enabled())
+ return;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "DEBUG", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_info(const char *fmt, ...)
+{
+ va_list alist;
+
+ if (!is_info_enabled())
+ return;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "INFO", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_warn(const char *fmt, ...)
+{
+ va_list alist;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "WARN", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_error(const char *fmt, ...)
+{
+ va_list alist;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "ERROR", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
diff --git a/usr/src/lib/libzdoor/common/zerror.h b/usr/src/lib/libzdoor/common/zerror.h
new file mode 100644
index 0000000000..afc848fcea
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zerror.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZERROR_H
+#define _ZERROR_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void zdoor_debug(const char *fmt, ...);
+extern void zdoor_info(const char *fmt, ...);
+extern void zdoor_warn(const char *fmt, ...);
+extern void zdoor_error(const char *fmt, ...);
+
+#define OUT_OF_MEMORY() \
+ zdoor_error("Out of Memory at %s:%d", __FILE__, __LINE__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZERROR_H */
diff --git a/usr/src/lib/libzdoor/common/ztree.c b/usr/src/lib/libzdoor/common/ztree.c
new file mode 100644
index 0000000000..25c67f62fb
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/ztree.c
@@ -0,0 +1,344 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <search.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zerror.h"
+#include "ztree.h"
+
+
+/*
+ * ztree is just a helpful wrapper over a tsearch binary tree that deals with
+ * all of the libzdoor types.
+ *
+ * So what this ztree actually is is a tree of trees. The outer tree is a tree
+ * of zones, and each node holds a tree of doors.
+ */
+
+/*
+ * _ztree_compare(p1, p2) is the tsearch callback for comparing the "outer"
+ * tree (e.g., the one of zones).
+ */
+static int
+_ztree_compare(const void *p1, const void *p2)
+{
+ ztree_entry_t *z1 = (ztree_entry_t *)p1;
+ ztree_entry_t *z2 = (ztree_entry_t *)p2;
+
+ if (z1 == NULL && z2 == NULL)
+ return (0);
+ if (z1 == NULL && z2 != NULL)
+ return (-1);
+ if (z1 != NULL && z2 == NULL)
+ return (1);
+
+ return (strcmp(z1->zte_zonename, z2->zte_zonename));
+}
+
+/*
+ * _dtree_compare(p1, p2) is the tsearch callback for comparing the "inner"
+ * tree (e.g., the one of doors).
+ */
+static int
+_dtree_compare(const void *p1, const void *p2)
+{
+ dtree_entry_t *d1 = (dtree_entry_t *)p1;
+ dtree_entry_t *d2 = (dtree_entry_t *)p2;
+
+ if (d1 == NULL && d2 == NULL)
+ return (0);
+ if (d1 == NULL && d2 != NULL)
+ return (-1);
+ if (d1 != NULL && d2 == NULL)
+ return (1);
+
+ return (strcmp(d1->dte_service, d2->dte_service));
+}
+
+static void
+ztree_entry_free(ztree_entry_t *entry)
+{
+ if (entry == NULL)
+ return;
+
+ if (entry->zte_zonename != NULL)
+ free(entry->zte_zonename);
+
+ free(entry);
+}
+
+static void
+dtree_entry_free(dtree_entry_t *entry)
+{
+ if (entry == NULL)
+ return;
+
+ if (entry->dte_service)
+ free(entry->dte_service);
+
+ free(entry);
+}
+
+
+/*
+ * ztree_zone_add inserts a new zone into the tree iff
+ * there is not already an entry for that zone. This method returns one of
+ * four possible return codes, ZTREE_SUCCESS on :), ZTREE_ARGUMENT_ERROR if
+ * zone is NULL, ZTREE_ERROR if there is internal failure (e.g., OOM), and
+ * ZTREE_ALREADY_EXISTS if the zone is already in the tree.
+ */
+int
+ztree_zone_add(struct zdoor_handle *handle, const char *zonename,
+ztree_door_visitor visitor)
+{
+ ztree_entry_t *entry = NULL;
+ void *ret = NULL;
+ int status = ZTREE_SUCCESS;
+
+ if (handle == NULL || zonename == NULL)
+ return (ZTREE_ARGUMENT_ERROR);
+
+ entry = (ztree_entry_t *)calloc(1, sizeof (ztree_entry_t));
+ if (entry == NULL) {
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->zte_zonename = strdup(zonename);
+ if (entry->zte_zonename == NULL) {
+ ztree_entry_free(entry);
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->zte_action = ZDOOR_ACTION_NOOP;
+ entry->zte_parent = handle;
+ entry->zte_visitor = visitor;
+
+ ret = tsearch(entry, &(handle->zdh_ztree), _ztree_compare);
+ if (ret == NULL) {
+ ztree_entry_free(entry);
+ status = ZTREE_ERROR;
+ OUT_OF_MEMORY();
+ } else if ((*(ztree_entry_t **)ret) != entry) {
+ ztree_entry_free(entry);
+ status = ZTREE_ALREADY_EXISTS;
+ }
+
+ return (status);
+}
+
+
+/*
+ * ztree_zone_find returns the entry in the "outer" tree representing
+ * this zone, if it exists, NULL otherwise.
+ */
+ztree_entry_t *
+ztree_zone_find(struct zdoor_handle *handle, const char *zonename)
+{
+ ztree_entry_t key = {0};
+ void *ret = NULL;
+
+ if (handle == NULL || zonename == NULL)
+ return (NULL);
+
+ key.zte_zonename = (char *)zonename;
+ ret = tfind(&key, &(handle->zdh_ztree), _ztree_compare);
+
+ return (ret != NULL ? *(ztree_entry_t **)ret : NULL);
+}
+
+
+/*
+ * ztree_zone_remove removes an entry from the "outer" zone iff the
+ * zone exists. The cookie set by the creator is returned.
+ */
+void
+ztree_zone_remove(struct zdoor_handle *handle, ztree_entry_t *entry)
+{
+ if (handle == NULL || entry == NULL)
+ return;
+
+ tdelete(entry, &(handle->zdh_ztree), _ztree_compare);
+ ztree_entry_free(entry);
+}
+
+
+/*
+ * ztree_door_add inserts a new door into the inner tree iff
+ * there is not already an entry for that door. This method returns one of
+ * four possible return codes, ZTREE_SUCCESS on :), ZTREE_ARGUMENT_ERROR if
+ * zone is NULL, ZTREE_ERROR if there is internal failure (e.g., OOM), and
+ * ZTREE_ALREADY_EXISTS if the door is already in the tree.
+ */
+int
+ztree_door_add(struct zdoor_handle *handle, const char *zonename,
+const char *service, zdoor_callback callback, zdoor_cookie_t *cookie)
+{
+ dtree_entry_t *entry = NULL;
+ ztree_entry_t *znode = NULL;
+ void *ret = NULL;
+ int status = ZTREE_SUCCESS;
+
+ if (handle == NULL || zonename == NULL || service == NULL)
+ return (ZTREE_ARGUMENT_ERROR);
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return (ZTREE_NOT_FOUND);
+
+ entry = (dtree_entry_t *)calloc(1, sizeof (dtree_entry_t));
+ if (entry == NULL) {
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->dte_parent = znode;
+ entry->dte_callback = callback;
+ entry->dte_cookie = cookie;
+ entry->dte_service = strdup(service);
+ if (entry->dte_service == NULL) {
+ free(entry);
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+
+ ret = tsearch(entry, &(znode->zte_door_tree), _dtree_compare);
+ if (ret == NULL) {
+ dtree_entry_free(entry);
+ OUT_OF_MEMORY();
+ status = ZTREE_ERROR;
+ } else if ((*(dtree_entry_t **)ret) != entry) {
+ dtree_entry_free(entry);
+ status = ZTREE_ALREADY_EXISTS;
+ } else {
+ znode->zte_num_doors++;
+ }
+
+ return (status);
+}
+
+
+/*
+ * ztree_door_find returns the entry in the "inner" tree
+ * representing this zone, if it exists, NULL otherwise.
+ */
+dtree_entry_t *
+ztree_door_find(struct zdoor_handle *handle, const char *zonename,
+const char *service)
+{
+ dtree_entry_t key = {0};
+ ztree_entry_t *znode = NULL;
+ void *ret = NULL;
+
+ if (handle == NULL || zonename == NULL || service == NULL)
+ return (NULL);
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return (NULL);
+
+ key.dte_service = (char *)service;
+ ret = tfind(&key, &(znode->zte_door_tree), _dtree_compare);
+
+ return (ret != NULL ? *(dtree_entry_t **)ret : NULL);
+}
+
+
+/*
+ * ztree_door_remove(zone, door) removes a node from the inner tree iff
+ * both the door and zone exist. Note this frees the node as well. The
+ * cookie set by the creator is returned.
+ */
+zdoor_cookie_t *
+ztree_door_remove(struct zdoor_handle *handle, dtree_entry_t *entry)
+{
+ zdoor_cookie_t *cookie = NULL;
+ ztree_entry_t *znode = NULL;
+
+ if (handle == NULL || entry == NULL)
+ return (NULL);
+
+ znode = entry->dte_parent;
+ cookie = entry->dte_cookie;
+
+ tdelete(entry, &(znode->zte_door_tree), _dtree_compare);
+ dtree_entry_free(entry);
+
+ znode->zte_num_doors--;
+ if (znode->zte_num_doors == 0) {
+ zdoor_debug("ztree: zone %s has no doors left, removing",
+ znode->zte_zonename);
+ ztree_zone_remove(handle, znode);
+ }
+
+ return (cookie);
+}
+
+
+/*
+ * _ztree_door_visitor(nodep, which, depth) is the private function we use
+ * to wrap up tsearch's goofy API. We're really just using this to ensure
+ * zdoor doesn't get called > 1 times for a given entity in the ztree.
+ */
+static void
+_ztree_door_visitor(const void *nodep, const VISIT which, const int depth)
+{
+ dtree_entry_t *entry = *(dtree_entry_t **)nodep;
+
+ if (entry == NULL)
+ return;
+
+ switch (which) {
+ case preorder:
+ case endorder:
+ break;
+ case postorder:
+ case leaf:
+ if (entry->dte_parent->zte_visitor != NULL)
+ entry->dte_parent->zte_visitor(entry);
+ break;
+ }
+}
+
+
+/*
+ * ztree_walk_doors(zone) will proceed to visit every node in the "inner" tree
+ * for this zone, and callback the visitor that was registered on tree creation.
+ */
+void
+ztree_walk_doors(struct zdoor_handle *handle, const char *zonename)
+{
+ ztree_entry_t *znode = NULL;
+
+ if (handle == NULL || zonename == NULL)
+ return;
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return;
+
+ twalk(znode->zte_door_tree, _ztree_door_visitor);
+}
diff --git a/usr/src/lib/libzdoor/common/ztree.h b/usr/src/lib/libzdoor/common/ztree.h
new file mode 100644
index 0000000000..b46dace287
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/ztree.h
@@ -0,0 +1,88 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZTREE_H
+#define _ZTREE_H
+
+#include <zdoor.h>
+#include <zone.h>
+#include "zdoor-int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dtree_entry;
+
+typedef void (*ztree_door_visitor)(struct dtree_entry *entry);
+
+typedef struct ztree_entry {
+ char *zte_zonename;
+ zdoor_action_t zte_action;
+ int zte_num_doors;
+ void *zte_door_tree;
+ ztree_door_visitor zte_visitor;
+ struct zdoor_handle *zte_parent;
+} ztree_entry_t;
+
+typedef struct dtree_entry {
+ char *dte_service;
+ int dte_door;
+ zdoor_callback dte_callback;
+ zdoor_cookie_t *dte_cookie;
+ ztree_entry_t *dte_parent;
+} dtree_entry_t;
+
+#define ZTREE_SUCCESS 0
+#define ZTREE_ERROR -1
+#define ZTREE_ARGUMENT_ERROR -2
+#define ZTREE_ALREADY_EXISTS -3
+#define ZTREE_NOT_FOUND -4
+
+extern int ztree_zone_add(struct zdoor_handle *handle,
+ const char *zonename, ztree_door_visitor visitor);
+
+extern ztree_entry_t *ztree_zone_find(struct zdoor_handle *handle,
+ const char *zonename);
+
+extern void ztree_zone_remove(struct zdoor_handle *handle,
+ ztree_entry_t *entry);
+
+extern int ztree_door_add(struct zdoor_handle *handle, const char *zonename,
+ const char *service, zdoor_callback callback, zdoor_cookie_t *cookie);
+
+extern dtree_entry_t *ztree_door_find(struct zdoor_handle *handle,
+ const char *zonename, const char *service);
+
+extern zdoor_cookie_t *ztree_door_remove(struct zdoor_handle *handle,
+ dtree_entry_t *entry);
+
+extern void ztree_walk_doors(struct zdoor_handle *handle, const char *zonename);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZTREE_H */
diff --git a/usr/src/lib/libzdoor/i386/Makefile b/usr/src/lib/libzdoor/i386/Makefile
new file mode 100644
index 0000000000..2bfe8001d9
--- /dev/null
+++ b/usr/src/lib/libzdoor/i386/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 9aa5ba5905..eb8ae6ceb7 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -64,6 +64,7 @@ typedef enum zfs_error {
EZFS_PROPTYPE, /* property does not apply to dataset type */
EZFS_PROPNONINHERIT, /* property is not inheritable */
EZFS_PROPSPACE, /* bad quota or reservation */
+ EZFS_PROPCACHED, /* prop unavail since cachedprops flag set */
EZFS_BADTYPE, /* dataset is not of appropriate type */
EZFS_BUSY, /* pool or dataset is busy */
EZFS_EXISTS, /* pool or dataset already exists */
@@ -184,6 +185,7 @@ extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+extern void libzfs_set_cachedprops(libzfs_handle_t *, boolean_t);
extern void zfs_save_arguments(int argc, char **, char *, int);
extern int zpool_log_history(libzfs_handle_t *, const char *);
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index 05a2fdc9be..f2dd13f1af 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -329,6 +329,10 @@ get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
+ if (hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(hdl, zc) != 0)
+ return (-1);
+
(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) {
@@ -1909,6 +1913,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
case ZFS_PROP_NORMALIZE:
case ZFS_PROP_UTF8ONLY:
case ZFS_PROP_CASE:
+ if (zhp->zfs_hdl->libzfs_cachedprops) {
+ return (zfs_error(zhp->zfs_hdl, EZFS_PROPCACHED,
+ "property unavailable since not cached"));
+ }
+
if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) ||
zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
@@ -2149,6 +2158,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
char *str;
const char *strval;
boolean_t received = zfs_is_recvd_props_mode(zhp);
+ boolean_t printerr;
/*
* Check to see if this property applies to our object
@@ -2159,6 +2169,16 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
if (received && zfs_prop_readonly(prop))
return (-1);
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ zfs_prop_cacheable(prop)) {
+ printerr = zhp->zfs_hdl->libzfs_printerr;
+ libzfs_print_on_error(zhp->zfs_hdl, B_FALSE);
+ (void) zfs_error(zhp->zfs_hdl, EZFS_PROPCACHED,
+ "property unavailable since not cached");
+ libzfs_print_on_error(zhp->zfs_hdl, printerr);
+ return (-1);
+ }
+
if (src)
*src = ZPROP_SRC_NONE;
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index ad4142da3f..9d98718dcb 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -79,6 +80,7 @@ struct libzfs_handle {
libzfs_fru_t **libzfs_fru_hash;
libzfs_fru_t *libzfs_fru_list;
char libzfs_chassis_id[256];
+ boolean_t libzfs_cachedprops;
};
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
@@ -149,6 +151,7 @@ int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
size_t *);
zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *);
+int libzfs_cmd_set_cachedprops(libzfs_handle_t *, zfs_cmd_t *);
int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
diff --git a/usr/src/lib/libzfs/common/libzfs_iter.c b/usr/src/lib/libzfs/common/libzfs_iter.c
index f8ad5014cb..19ac0b2625 100644
--- a/usr/src/lib/libzfs/common/libzfs_iter.c
+++ b/usr/src/lib/libzfs/common/libzfs_iter.c
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -110,6 +111,10 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
return (0);
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(zhp->zfs_hdl, &zc) != 0)
+ return (-1);
+
if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
@@ -120,9 +125,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
* that the pool has since been removed.
*/
if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
- &zc)) == NULL) {
+ &zc)) == NULL)
continue;
- }
if ((ret = func(nzhp, data)) != 0) {
zcmd_free_nvlists(&zc);
@@ -147,15 +151,19 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
zhp->zfs_type == ZFS_TYPE_BOOKMARK)
return (0);
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(zhp->zfs_hdl, &zc) != 0)
+ return (-1);
+
if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
+
while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT,
&zc)) == 0) {
if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
- &zc)) == NULL) {
+ &zc)) == NULL)
continue;
- }
if ((ret = func(nzhp, data)) != 0) {
zcmd_free_nvlists(&zc);
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 675286c756..52141f6611 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -270,6 +270,12 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
case PROP_TYPE_NUMBER:
intval = zpool_get_prop_int(zhp, prop, &src);
+ if (literal && prop != ZPOOL_PROP_HEALTH) {
+ (void) snprintf(buf, len, "%llu",
+ (u_longlong_t)intval);
+ break;
+ }
+
switch (prop) {
case ZPOOL_PROP_SIZE:
case ZPOOL_PROP_ALLOCATED:
@@ -334,6 +340,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
default:
(void) snprintf(buf, len, "%llu", intval);
}
+
break;
case PROP_TYPE_INDEX:
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index 414d74ee20..044045853f 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -82,6 +82,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
case EZFS_PROPSPACE:
return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+ case EZFS_PROPCACHED:
+ return (dgettext(TEXT_DOMAIN, "property unavailable since "
+ "cachedprops flag set"));
case EZFS_BADTYPE:
return (dgettext(TEXT_DOMAIN, "operation not applicable to "
"datasets of this type"));
@@ -610,6 +613,42 @@ libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
hdl->libzfs_printerr = printerr;
}
+/*
+ * Set the value of the cachedprops flag. If the cachedprops flag is set,
+ * operations which get ZFS properties will only retrieve a property if the
+ * property is cached somewhere in memory.
+ *
+ * Consumers of libzfs should take care when setting this flag, as they will
+ * prevent themselves from listing the full set of ZFS properties.
+ *
+ * ZFS properties which always require disk I/O are ZPL properties (utf8only,
+ * normalization, etc.) and the volsize and volblocksize properties for volumes.
+ */
+void
+libzfs_set_cachedprops(libzfs_handle_t *hdl, boolean_t cachedprops)
+{
+ hdl->libzfs_cachedprops = cachedprops;
+}
+
+/*
+ * Adds a src nvlist to a zfs_cmd_t which specifies that only cached (i.e., will
+ * not require a disk access) properties should be retrieved.
+ */
+int
+libzfs_cmd_set_cachedprops(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+ nvlist_t *nvl;
+ int ret;
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE) != 0)
+ return (no_memory(hdl));
+
+ ret = zcmd_write_src_nvlist(hdl, zc, nvl);
+ nvlist_free(nvl);
+ return (ret);
+}
+
libzfs_handle_t *
libzfs_init(void)
{
@@ -645,6 +684,8 @@ libzfs_init(void)
zpool_feature_init();
libzfs_mnttab_init(hdl);
+ hdl->libzfs_cachedprops = B_FALSE;
+
return (hdl);
}
diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers
index 65f266996e..0e10528f9e 100644
--- a/usr/src/lib/libzfs/common/mapfile-vers
+++ b/usr/src/lib/libzfs/common/mapfile-vers
@@ -21,6 +21,7 @@
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
# MAPFILE HEADER START
#
@@ -59,6 +60,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
libzfs_init;
libzfs_mnttab_cache;
libzfs_print_on_error;
+ libzfs_set_cachedprops;
spa_feature_table;
zfs_allocatable_devs;
zfs_asprintf;
@@ -103,6 +105,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zfs_path_to_zhandle;
zfs_promote;
zfs_prop_align_right;
+ zfs_prop_cacheable;
zfs_prop_column_name;
zfs_prop_default_numeric;
zfs_prop_default_string;
diff --git a/usr/src/lib/libzonecfg/common/getzoneent.c b/usr/src/lib/libzonecfg/common/getzoneent.c
index 8155f7272a..76664fcc92 100644
--- a/usr/src/lib/libzonecfg/common/getzoneent.c
+++ b/usr/src/lib/libzonecfg/common/getzoneent.c
@@ -403,14 +403,6 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
*/
if (ze->zone_state >= 0) {
zone_state = zone_state_str(ze->zone_state);
-
- /*
- * If the caller is uninstalling this zone,
- * then wipe out the uuid. The zone's contents
- * are no longer known.
- */
- if (ze->zone_state < ZONE_STATE_INSTALLED)
- zone_uuid = "";
}
/* If a new name is supplied, use it. */
@@ -419,6 +411,12 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
if (ze->zone_path[0] != '\0')
zone_path = ze->zone_path;
+
+ /* If new UUID provided, replace it */
+ if (!uuid_is_null(ze->zone_uuid)) {
+ uuid_unparse(ze->zone_uuid, uuidstr);
+ zone_uuid = uuidstr;
+ }
break;
case PZE_REMOVE:
diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c
index 6ae73b9f3b..6ae93b142e 100644
--- a/usr/src/lib/libzonecfg/common/libzonecfg.c
+++ b/usr/src/lib/libzonecfg/common/libzonecfg.c
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Gary Mills
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, Joyent Inc. All rights reserved.
*/
#include <libsysevent.h>
@@ -78,6 +79,8 @@
#define ZONE_EVENT_PING_SUBCLASS "ping"
#define ZONE_EVENT_PING_PUBLISHER "solaris"
+#define DEBUGID_FILE "/etc/zones/did.txt"
+
/* Hard-code the DTD element/attribute/entity names just once, here. */
#define DTD_ELEM_ATTR (const xmlChar *) "attr"
#define DTD_ELEM_COMMENT (const xmlChar *) "comment"
@@ -85,6 +88,7 @@
#define DTD_ELEM_FS (const xmlChar *) "filesystem"
#define DTD_ELEM_FSOPTION (const xmlChar *) "fsoption"
#define DTD_ELEM_NET (const xmlChar *) "network"
+#define DTD_ELEM_NETATTR (const xmlChar *) "net-attr"
#define DTD_ELEM_RCTL (const xmlChar *) "rctl"
#define DTD_ELEM_RCTLVALUE (const xmlChar *) "rctl-value"
#define DTD_ELEM_ZONE (const xmlChar *) "zone"
@@ -104,10 +108,12 @@
#define DTD_ATTR_IPTYPE (const xmlChar *) "ip-type"
#define DTD_ATTR_DEFROUTER (const xmlChar *) "defrouter"
#define DTD_ATTR_DIR (const xmlChar *) "directory"
+#define DTD_ATTR_GNIC (const xmlChar *) "global-nic"
#define DTD_ATTR_LIMIT (const xmlChar *) "limit"
#define DTD_ATTR_LIMITPRIV (const xmlChar *) "limitpriv"
#define DTD_ATTR_BOOTARGS (const xmlChar *) "bootargs"
#define DTD_ATTR_SCHED (const xmlChar *) "scheduling-class"
+#define DTD_ATTR_MAC (const xmlChar *) "mac-addr"
#define DTD_ATTR_MATCH (const xmlChar *) "match"
#define DTD_ATTR_NAME (const xmlChar *) "name"
#define DTD_ATTR_PHYSICAL (const xmlChar *) "physical"
@@ -117,6 +123,7 @@
#define DTD_ATTR_SPECIAL (const xmlChar *) "special"
#define DTD_ATTR_TYPE (const xmlChar *) "type"
#define DTD_ATTR_VALUE (const xmlChar *) "value"
+#define DTD_ATTR_VLANID (const xmlChar *) "vlan-id"
#define DTD_ATTR_ZONEPATH (const xmlChar *) "zonepath"
#define DTD_ATTR_NCPU_MIN (const xmlChar *) "ncpu_min"
#define DTD_ATTR_NCPU_MAX (const xmlChar *) "ncpu_max"
@@ -129,6 +136,7 @@
#define DTD_ATTR_MODE (const xmlChar *) "mode"
#define DTD_ATTR_ACL (const xmlChar *) "acl"
#define DTD_ATTR_BRAND (const xmlChar *) "brand"
+#define DTD_ATTR_DID (const xmlChar *) "debugid"
#define DTD_ATTR_HOSTID (const xmlChar *) "hostid"
#define DTD_ATTR_USER (const xmlChar *) "user"
#define DTD_ATTR_AUTHS (const xmlChar *) "auths"
@@ -175,9 +183,12 @@ static struct alias {
{ALIAS_MAXSEMIDS, "zone.max-sem-ids", "privileged", "deny", 0},
{ALIAS_MAXLOCKEDMEM, "zone.max-locked-memory", "privileged", "deny", 0},
{ALIAS_MAXSWAP, "zone.max-swap", "privileged", "deny", 0},
+ {ALIAS_MAXPHYSMEM, "zone.max-physical-memory", "privileged", "deny",
+ 1048576},
{ALIAS_SHARES, "zone.cpu-shares", "privileged", "none", 0},
{ALIAS_CPUCAP, "zone.cpu-cap", "privileged", "deny", 0},
{ALIAS_MAXPROCS, "zone.max-processes", "privileged", "deny", 100},
+ {ALIAS_ZFSPRI, "zone.zfs-io-priority", "privileged", "none", 0},
{NULL, NULL, NULL, NULL, 0}
};
@@ -2065,6 +2076,32 @@ zonecfg_ifname_exists(sa_family_t af, char *ifname)
}
/*
+ * Turn an addr that looks like f:2:0:44:5:6C into 0f:02:00:44:05:6c
+ * We're expecting a dst of at least MAXMACADDRLEN size here.
+ */
+static void
+normalize_mac_addr(char *dst, const char *src, int len)
+{
+ char *p, *e, *sep = "";
+ long n;
+ char buf[MAXMACADDRLEN], tmp[4];
+
+ *dst = '\0';
+ (void) strlcpy(buf, src, sizeof (buf));
+ p = strtok(buf, ":");
+ while (p != NULL) {
+ n = strtol(p, &e, 16);
+ if (*e != NULL || n > 0xff)
+ return;
+ (void) snprintf(tmp, sizeof (tmp), "%s%02x", sep, n);
+ (void) strlcat(dst, tmp, len);
+
+ sep = ":";
+ p = strtok(NULL, ":");
+ }
+}
+
+/*
* Determines whether there is a net resource with the physical interface, IP
* address, and default router specified by 'tabptr' in the zone configuration
* to which 'handle' refers. 'tabptr' must have an interface, an address, a
@@ -2083,13 +2120,18 @@ zonecfg_ifname_exists(sa_family_t af, char *ifname)
int
zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
xmlNodePtr firstmatch;
int err;
char address[INET6_ADDRSTRLEN];
char physical[LIFNAMSIZ];
+ char mac[MAXMACADDRLEN];
+ char norm_mac[MAXMACADDRLEN];
+ char gnic[LIFNAMSIZ];
size_t addrspec; /* nonzero if tabptr has IP addr */
size_t physspec; /* nonzero if tabptr has interface */
+ size_t macspec; /* nonzero if tabptr has mac addr */
+ size_t gnicspec; /* nonzero if tabptr has gnic */
size_t defrouterspec; /* nonzero if tabptr has def. router */
size_t allowed_addrspec;
zone_iptype_t iptype;
@@ -2101,17 +2143,20 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
* Determine the fields that will be searched. There must be at least
* one.
*
- * zone_nwif_address, zone_nwif_physical, and zone_nwif_defrouter are
+ * zone_nwif_address, zone_nwif_physical, zone_nwif_defrouter,
+ * zone_nwif_mac, zone_nwif_vlan_id and zone_nwif_gnic are
* arrays, so no NULL checks are necessary.
*/
addrspec = strlen(tabptr->zone_nwif_address);
physspec = strlen(tabptr->zone_nwif_physical);
+ macspec = strlen(tabptr->zone_nwif_mac);
+ gnicspec = strlen(tabptr->zone_nwif_gnic);
defrouterspec = strlen(tabptr->zone_nwif_defrouter);
allowed_addrspec = strlen(tabptr->zone_nwif_allowed_address);
if (addrspec != 0 && allowed_addrspec != 0)
return (Z_INVAL); /* can't specify both */
if (addrspec == 0 && physspec == 0 && defrouterspec == 0 &&
- allowed_addrspec == 0)
+ allowed_addrspec == 0 && macspec == 0 && gnicspec == 0)
return (Z_INSUFFICIENT_SPEC);
if ((err = operation_prep(handle)) != Z_OK)
@@ -2138,6 +2183,19 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
physical, sizeof (physical)) != Z_OK ||
strcmp(tabptr->zone_nwif_physical, physical) != 0))
continue;
+ if (iptype == ZS_EXCLUSIVE && macspec != 0) {
+ if (fetchprop(cur, DTD_ATTR_MAC, mac, sizeof (mac)) !=
+ Z_OK)
+ continue;
+ normalize_mac_addr(norm_mac, mac, sizeof (norm_mac));
+ if (strcmp(tabptr->zone_nwif_mac, norm_mac) != 0)
+ continue;
+ }
+ if (iptype == ZS_EXCLUSIVE && gnicspec != 0 &&
+ (fetchprop(cur, DTD_ATTR_GNIC, gnic,
+ sizeof (gnic)) != Z_OK ||
+ strcmp(tabptr->zone_nwif_gnic, gnic) != 0))
+ continue;
if (iptype == ZS_SHARED && addrspec != 0 &&
(fetchprop(cur, DTD_ATTR_ADDRESS, address,
sizeof (address)) != Z_OK ||
@@ -2180,6 +2238,21 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_MAC, tabptr->zone_nwif_mac,
+ sizeof (tabptr->zone_nwif_mac))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_VLANID, tabptr->zone_nwif_vlan_id,
+ sizeof (tabptr->zone_nwif_vlan_id))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_GNIC, tabptr->zone_nwif_gnic,
+ sizeof (tabptr->zone_nwif_gnic))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
(err = fetchprop(cur, DTD_ATTR_ALLOWED_ADDRESS,
tabptr->zone_nwif_allowed_address,
sizeof (tabptr->zone_nwif_allowed_address))) != Z_OK)
@@ -2190,13 +2263,40 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
sizeof (tabptr->zone_nwif_defrouter))) != Z_OK)
return (err);
+ tabptr->zone_nwif_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_nwif_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
return (Z_OK);
}
static int
zonecfg_add_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
+ xmlNodePtr newnode, cur = handle->zone_dh_cur, valnode;
+ struct zone_res_attrtab *valptr;
int err;
newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_NET, NULL);
@@ -2212,13 +2312,40 @@ zonecfg_add_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
tabptr->zone_nwif_physical)) != Z_OK)
return (err);
/*
- * Do not add this property when it is not set, for backwards
- * compatibility and because it is optional.
+ * Do not add these properties when they are not set, for backwards
+ * compatibility and because they are optional.
*/
if ((strlen(tabptr->zone_nwif_defrouter) > 0) &&
((err = newprop(newnode, DTD_ATTR_DEFROUTER,
tabptr->zone_nwif_defrouter)) != Z_OK))
return (err);
+ if (strlen(tabptr->zone_nwif_mac) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_MAC,
+ tabptr->zone_nwif_mac)) != Z_OK)
+ return (err);
+ if (strlen(tabptr->zone_nwif_vlan_id) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_VLANID,
+ tabptr->zone_nwif_vlan_id)) != Z_OK)
+ return (err);
+ if (strlen(tabptr->zone_nwif_gnic) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_GNIC,
+ tabptr->zone_nwif_gnic)) != Z_OK)
+ return (err);
+
+ for (valptr = tabptr->zone_nwif_attrp; valptr != NULL;
+ valptr = valptr->zone_res_attr_next) {
+ valnode = xmlNewTextChild(newnode, NULL, DTD_ELEM_NETATTR,
+ NULL);
+ err = newprop(valnode, DTD_ATTR_NAME,
+ valptr->zone_res_attr_name);
+ if (err != Z_OK)
+ return (err);
+ err = newprop(valnode, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value);
+ if (err != Z_OK)
+ return (err);
+ }
+
return (Z_OK);
}
@@ -2243,7 +2370,8 @@ static int
zonecfg_delete_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
xmlNodePtr cur = handle->zone_dh_cur;
- boolean_t addr_match, phys_match, allowed_addr_match;
+ boolean_t addr_match, phys_match, allowed_addr_match, mac_match,
+ gnic_match;
for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
if (xmlStrcmp(cur->name, DTD_ELEM_NET))
@@ -2255,8 +2383,13 @@ zonecfg_delete_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
tabptr->zone_nwif_allowed_address);
phys_match = match_prop(cur, DTD_ATTR_PHYSICAL,
tabptr->zone_nwif_physical);
+ mac_match = match_prop(cur, DTD_ATTR_MAC,
+ tabptr->zone_nwif_mac);
+ gnic_match = match_prop(cur, DTD_ATTR_GNIC,
+ tabptr->zone_nwif_gnic);
- if (addr_match && allowed_addr_match && phys_match) {
+ if (((addr_match && allowed_addr_match) || mac_match ||
+ gnic_match) && phys_match) {
xmlUnlinkNode(cur);
xmlFreeNode(cur);
return (Z_OK);
@@ -2305,6 +2438,58 @@ zonecfg_modify_nwif(
return (Z_OK);
}
+void
+zonecfg_free_res_attr_list(struct zone_res_attrtab *valtab)
+{
+ if (valtab == NULL)
+ return;
+ zonecfg_free_res_attr_list(valtab->zone_res_attr_next);
+ free(valtab);
+}
+
+int
+zonecfg_add_res_attr(struct zone_res_attrtab **headptr,
+ struct zone_res_attrtab *valtabptr)
+{
+ struct zone_res_attrtab *last, *old, *new;
+
+ last = *headptr;
+ for (old = last; old != NULL; old = old->zone_res_attr_next)
+ last = old; /* walk to the end of the list */
+ new = valtabptr; /* alloc'd by caller */
+ new->zone_res_attr_next = NULL;
+ if (last == NULL)
+ *headptr = new;
+ else
+ last->zone_res_attr_next = new;
+ return (Z_OK);
+}
+
+int
+zonecfg_remove_res_attr(struct zone_res_attrtab **headptr,
+ struct zone_res_attrtab *valtabptr)
+{
+ struct zone_res_attrtab *last, *this, *next;
+
+ last = *headptr;
+ for (this = last; this != NULL; this = this->zone_res_attr_next) {
+ if (strcmp(this->zone_res_attr_name,
+ valtabptr->zone_res_attr_name) == 0 &&
+ strcmp(this->zone_res_attr_value,
+ valtabptr->zone_res_attr_value) == 0) {
+ next = this->zone_res_attr_next;
+ if (this == *headptr)
+ *headptr = next;
+ else
+ last->zone_res_attr_next = next;
+ free(this);
+ return (Z_OK);
+ } else
+ last = this;
+ }
+ return (Z_NO_PROPERTY_ID);
+}
+
/*
* Must be a comma-separated list of alpha-numeric file system names.
*/
@@ -2454,7 +2639,7 @@ zonecfg_set_hostid(zone_dochandle_t handle, const char *hostidp)
int
zonecfg_lookup_dev(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr cur, firstmatch;
+ xmlNodePtr cur, val, firstmatch;
int err;
char match[MAXPATHLEN];
@@ -2499,13 +2684,40 @@ zonecfg_lookup_dev(zone_dochandle_t handle, struct zone_devtab *tabptr)
sizeof (tabptr->zone_dev_match))) != Z_OK)
return (err);
+ tabptr->zone_dev_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_dev_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
return (Z_OK);
}
static int
zonecfg_add_dev_core(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
+ xmlNodePtr newnode, cur = handle->zone_dh_cur, valnode;
+ struct zone_res_attrtab *valptr;
int err;
newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_DEVICE, NULL);
@@ -2514,6 +2726,21 @@ zonecfg_add_dev_core(zone_dochandle_t handle, struct zone_devtab *tabptr)
tabptr->zone_dev_match)) != Z_OK)
return (err);
+ for (valptr = tabptr->zone_dev_attrp; valptr != NULL;
+ valptr = valptr->zone_res_attr_next) {
+ valnode = xmlNewTextChild(newnode, NULL, DTD_ELEM_NETATTR,
+ NULL);
+ err = newprop(valnode, DTD_ATTR_NAME,
+ valptr->zone_res_attr_name);
+ if (err != Z_OK)
+ return (err);
+ err = newprop(valnode, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value);
+ if (err != Z_OK)
+ return (err);
+ }
+
+
return (Z_OK);
}
@@ -4573,7 +4800,7 @@ get_pool_sched_class(char *poolname, char *class, int clsize)
pool_conf_t *poolconf;
pool_t *pool;
pool_elem_t *pe;
- pool_value_t *pv = pool_value_alloc();
+ pool_value_t *pv;
const char *sched_str;
if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED)
@@ -4594,15 +4821,23 @@ get_pool_sched_class(char *poolname, char *class, int clsize)
return (Z_NO_POOL);
}
+ if ((pv = pool_value_alloc()) == NULL) {
+ (void) pool_conf_close(poolconf);
+ pool_conf_free(poolconf);
+ return (Z_NO_POOL);
+ }
+
pe = pool_to_elem(poolconf, pool);
if (pool_get_property(poolconf, pe, "pool.scheduler", pv) !=
POC_STRING) {
(void) pool_conf_close(poolconf);
+ pool_value_free(pv);
pool_conf_free(poolconf);
return (Z_NO_ENTRY);
}
(void) pool_value_get_string(pv, &sched_str);
(void) pool_conf_close(poolconf);
+ pool_value_free(pv);
pool_conf_free(poolconf);
if (strlcpy(class, sched_str, clsize) >= clsize)
return (Z_TOO_BIG);
@@ -4711,7 +4946,8 @@ zonecfg_setnwifent(zone_dochandle_t handle)
int
zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
+ struct zone_res_attrtab *valptr;
int err;
if (handle == NULL)
@@ -4747,6 +4983,24 @@ zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
}
+ if ((err = fetchprop(cur, DTD_ATTR_MAC, tabptr->zone_nwif_mac,
+ sizeof (tabptr->zone_nwif_mac))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
+ if ((err = fetchprop(cur, DTD_ATTR_VLANID, tabptr->zone_nwif_vlan_id,
+ sizeof (tabptr->zone_nwif_vlan_id))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
+ if ((err = fetchprop(cur, DTD_ATTR_GNIC, tabptr->zone_nwif_gnic,
+ sizeof (tabptr->zone_nwif_gnic))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
if ((err = fetchprop(cur, DTD_ATTR_DEFROUTER,
tabptr->zone_nwif_defrouter,
sizeof (tabptr->zone_nwif_defrouter))) != Z_OK) {
@@ -4754,6 +5008,29 @@ zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
}
+ tabptr->zone_nwif_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_nwif_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if (fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK)
+ break;
+ if (fetchprop(val, DTD_ATTR_VALUE, valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK)
+ break;
+ }
+
handle->zone_dh_cur = cur->next;
return (Z_OK);
}
@@ -4773,7 +5050,7 @@ zonecfg_setdevent(zone_dochandle_t handle)
int
zonecfg_getdevent(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
int err;
if (handle == NULL)
@@ -4796,6 +5073,31 @@ zonecfg_getdevent(zone_dochandle_t handle, struct zone_devtab *tabptr)
return (err);
}
+ tabptr->zone_dev_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_dev_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE, valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
handle->zone_dh_cur = cur->next;
return (Z_OK);
}
@@ -5524,6 +5826,164 @@ zone_get_brand(char *zone_name, char *brandname, size_t rp_sz)
}
/*
+ * Atomically get a new zone_did value. The currently allocated value
+ * is stored in /etc/zones/did.txt. Lock the file, read the current value,
+ * increment, save the new value and unlock the file. Return the new value
+ * or -1 if there was an error. The ID namespace is large enough that we
+ * don't worry about recycling an ID when a zone is deleted.
+ */
+static zoneid_t
+new_zone_did()
+{
+ int fd;
+ int len;
+ int val;
+ struct flock lck;
+ char buf[80];
+
+ if ((fd = open(DEBUGID_FILE, O_RDWR | O_CREAT,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
+ perror("new_zone_did open failed");
+ return (-1);
+ }
+
+ /* Initialize the lock. */
+ lck.l_whence = SEEK_SET;
+ lck.l_start = 0;
+ lck.l_len = 0;
+
+ /* Wait until we acquire an exclusive lock on the file. */
+ lck.l_type = F_WRLCK;
+ if (fcntl(fd, F_SETLKW, &lck) == -1) {
+ perror("new_zone_did lock failed");
+ (void) close(fd);
+ return (-1);
+ }
+
+ /* Get currently allocated value */
+ len = read(fd, buf, sizeof (buf));
+ if (len == -1) {
+ perror("new_zone_did read failed");
+ val = -1;
+ } else {
+ if (lseek(fd, 0L, SEEK_SET) == -1) {
+ perror("new_zone_did seek failed");
+ val = -1;
+ } else {
+ if (len == 0) {
+ /* Just created the file, initialize at 1 */
+ val = 1;
+ } else {
+ val = atoi(buf);
+ val++;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "%d\n", val);
+ len = strlen(buf);
+
+ /* Save newly allocated value */
+ if (write(fd, buf, len) == -1) {
+ perror("new_zone_did write failed");
+ val = -1;
+ }
+ }
+ }
+
+ /* Release the file lock. */
+ lck.l_type = F_UNLCK;
+ if (fcntl(fd, F_SETLK, &lck) == -1) {
+ perror("new_zone_did unlock failed");
+ val = -1;
+ }
+
+ if (close(fd) != 0)
+ perror("new_zone_did close failed");
+
+ return (val);
+}
+
+/*
+ * Called by zoneadmd to get the zone's debug ID.
+ * If the zone doesn't already have an ID, a new one is generated and
+ * persistently saved onto the zone. Normally either zoneadm or zonecfg
+ * will assign a new ID for the zone, so zoneadmd should never have to
+ * generate one, but we also handle that here just to be paranoid.
+ */
+zoneid_t
+zone_get_did(char *zone_name)
+{
+ int res;
+ zoneid_t new_did;
+ zone_dochandle_t handle;
+ char did_str[80];
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return (getpid());
+
+ if (zonecfg_get_handle((char *)zone_name, handle) != Z_OK)
+ return (getpid());
+
+ res = getrootattr(handle, DTD_ATTR_DID, did_str, sizeof (did_str));
+
+ /* If the zone already has an assigned debug ID, return it. */
+ if (res == Z_OK && did_str[0] != '\0') {
+ zonecfg_fini_handle(handle);
+ return (atoi(did_str));
+ }
+
+ /*
+ * The zone doesn't have an assigned debug ID yet, generate one and
+ * save it as part of the zone definition.
+ */
+ if ((new_did = new_zone_did()) == -1) {
+ /*
+ * We should really never hit this block of code.
+ * Generating a new ID failed for some reason. Use the current
+ * pid as a temporary ID so that the zone can continue to boot
+ * but we don't persistently save this temporary ID on the zone.
+ */
+ zonecfg_fini_handle(handle);
+ return (getpid());
+ }
+
+ /* Now persistently save this new ID onto the zone. */
+ (void) snprintf(did_str, sizeof (did_str), "%d", new_did);
+ (void) setrootattr(handle, DTD_ATTR_DID, did_str);
+ (void) zonecfg_save(handle);
+
+ zonecfg_fini_handle(handle);
+ return (new_did);
+}
+
+zoneid_t
+zonecfg_get_did(zone_dochandle_t handle)
+{
+ char did_str[80];
+ int err;
+ zoneid_t did;
+
+ err = getrootattr(handle, DTD_ATTR_DID, did_str, sizeof (did_str));
+ if (err == Z_OK && did_str[0] != '\0')
+ did = atoi(did_str);
+ else
+ did = -1;
+
+ return (did);
+}
+
+void
+zonecfg_set_did(zone_dochandle_t handle)
+{
+ zoneid_t new_did;
+ char did_str[80];
+
+ if ((new_did = new_zone_did()) == -1)
+ return;
+ (void) snprintf(did_str, sizeof (did_str), "%d", new_did);
+ (void) setrootattr(handle, DTD_ATTR_DID, did_str);
+}
+
+/*
* Return the appropriate root for the active /dev.
* For normal zone, the path is $ZONEPATH/root;
* for scratch zone, the dev path is $ZONEPATH/lu.
@@ -5806,6 +6266,30 @@ zonecfg_get_uuid(const char *zonename, uuid_t uuid)
}
/*
+ * Changes a zone's UUID to the given value. Returns an error if the UUID is
+ * malformed or if the zone cannot be located.
+ */
+int
+zonecfg_set_uuid(const char *zonename, const char *zonepath,
+ const char *uuid)
+{
+ int err;
+ struct zoneent ze;
+
+ bzero(&ze, sizeof (ze));
+ ze.zone_state = -1; /* Preserve existing state in index */
+ (void) strlcpy(ze.zone_name, zonename, sizeof (ze.zone_name));
+ (void) strlcpy(ze.zone_path, zonepath, sizeof (ze.zone_path));
+ if (uuid_parse((char *)uuid, ze.zone_uuid) == -1)
+ return (Z_INVALID_PROPERTY);
+
+ if ((err = putzoneent(&ze, PZE_MODIFY)) != Z_OK)
+ return (err);
+
+ return (Z_OK);
+}
+
+/*
* File-system convenience functions.
*/
boolean_t
@@ -6839,131 +7323,49 @@ zonecfg_getpsetent(zone_dochandle_t handle, struct zone_psettab *tabptr)
return (err);
}
-static int
-add_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
- int err;
-
- newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_MCAP, NULL);
- if ((err = newprop(newnode, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap))
- != Z_OK)
- return (err);
-
- return (Z_OK);
-}
-
-int
-zonecfg_delete_mcap(zone_dochandle_t handle)
-{
- int err;
- xmlNodePtr cur = handle->zone_dh_cur;
-
- if ((err = operation_prep(handle)) != Z_OK)
- return (err);
-
- for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
- if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
- continue;
-
- xmlUnlinkNode(cur);
- xmlFreeNode(cur);
- return (Z_OK);
- }
- return (Z_NO_RESOURCE_ID);
-}
-
-int
-zonecfg_modify_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- int err;
-
- if (tabptr == NULL)
- return (Z_INVAL);
-
- err = zonecfg_delete_mcap(handle);
- /* it is ok if there is no mcap entry */
- if (err != Z_OK && err != Z_NO_RESOURCE_ID)
- return (err);
-
- if ((err = add_mcap(handle, tabptr)) != Z_OK)
- return (err);
-
- return (Z_OK);
-}
-
+/*
+ * Cleanup obsolete constructs in the configuration.
+ * Return true of the config has been updated and must be commited.
+ */
int
-zonecfg_lookup_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+zonecfg_fix_obsolete(zone_dochandle_t handle)
{
+ int res = 0;
+ int add_physmem_rctl = 0;
xmlNodePtr cur;
- int err;
-
- if (tabptr == NULL)
- return (Z_INVAL);
+ char zone_physmem_cap[MAXNAMELEN];
- if ((err = operation_prep(handle)) != Z_OK)
- return (err);
+ if (operation_prep(handle) != Z_OK)
+ return (res);
+ /*
+ * If an obsolete mcap entry exists, convert it to the rctl.
+ */
cur = handle->zone_dh_cur;
for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
continue;
- if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP,
- tabptr->zone_physmem_cap,
- sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (err);
+
+ if (fetchprop(cur, DTD_ATTR_PHYSCAP,
+ zone_physmem_cap, sizeof (zone_physmem_cap)) == Z_OK) {
+ res = 1;
+ add_physmem_rctl = 1;
}
- return (Z_OK);
+ xmlUnlinkNode(cur);
+ xmlFreeNode(cur);
+ break;
}
- return (Z_NO_ENTRY);
-}
-
-static int
-getmcapent_core(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- xmlNodePtr cur;
- int err;
-
- if (handle == NULL)
- return (Z_INVAL);
-
- if ((cur = handle->zone_dh_cur) == NULL)
- return (Z_NO_ENTRY);
-
- for (; cur != NULL; cur = cur->next)
- if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) == 0)
- break;
- if (cur == NULL) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (Z_NO_ENTRY);
- }
+ if (add_physmem_rctl) {
+ uint64_t cap;
+ char *endp;
- if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap,
- sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (err);
+ cap = strtoull(zone_physmem_cap, &endp, 10);
+ (void) zonecfg_set_aliased_rctl(handle, ALIAS_MAXPHYSMEM, cap);
}
- handle->zone_dh_cur = cur->next;
- return (Z_OK);
-}
-
-int
-zonecfg_getmcapent(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- int err;
-
- if ((err = zonecfg_setent(handle)) != Z_OK)
- return (err);
-
- err = getmcapent_core(handle, tabptr);
-
- (void) zonecfg_endent(handle);
-
- return (err);
+ return (res);
}
/*
diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers
index b908a28174..7265b06a1f 100644
--- a/usr/src/lib/libzonecfg/common/mapfile-vers
+++ b/usr/src/lib/libzonecfg/common/mapfile-vers
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
#
#
@@ -53,6 +54,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_add_fs_option;
zonecfg_add_admin;
zonecfg_add_nwif;
+ zonecfg_add_res_attr;
zonecfg_add_pkg;
zonecfg_add_pset;
zonecfg_add_rctl;
@@ -79,7 +81,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_delete_dev;
zonecfg_delete_ds;
zonecfg_delete_filesystem;
- zonecfg_delete_mcap;
zonecfg_delete_nwif;
zonecfg_delete_pset;
zonecfg_delete_rctl;
@@ -104,7 +105,9 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_find_mounts;
zonecfg_find_scratch;
zonecfg_fini_handle;
+ zonecfg_fix_obsolete;
zonecfg_free_fs_option_list;
+ zonecfg_free_res_attr_list;
zonecfg_free_rctl_value_list;
zonecfg_get_aliased_rctl;
zonecfg_get_attach_handle;
@@ -118,6 +121,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_get_bootargs;
zonecfg_get_brand;
zonecfg_get_dflt_sched_class;
+ zonecfg_get_did;
zonecfg_getdevent;
zonecfg_getdevperment;
zonecfg_getdsent;
@@ -127,7 +131,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_get_hostid;
zonecfg_get_iptype;
zonecfg_get_limitpriv;
- zonecfg_getmcapent;
zonecfg_get_name;
zonecfg_get_name_by_uuid;
zonecfg_getnwifent;
@@ -160,7 +163,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_lookup_dev;
zonecfg_lookup_ds;
zonecfg_lookup_filesystem;
- zonecfg_lookup_mcap;
zonecfg_lookup_nwif;
zonecfg_lookup_pset;
zonecfg_lookup_rctl;
@@ -169,7 +171,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_modify_dev;
zonecfg_modify_ds;
zonecfg_modify_filesystem;
- zonecfg_modify_mcap;
zonecfg_modify_nwif;
zonecfg_modify_pset;
zonecfg_modify_rctl;
@@ -183,6 +184,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_ping_zoneadmd;
zonecfg_release_lock_file;
zonecfg_remove_fs_option;
+ zonecfg_remove_res_attr;
zonecfg_remove_rctl_value;
zonecfg_remove_userauths;
zonecfg_reverse_scratch;
@@ -196,6 +198,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_set_autoboot;
zonecfg_set_bootargs;
zonecfg_set_brand;
+ zonecfg_set_did;
zonecfg_setdevent;
zonecfg_setdevperment;
zonecfg_setdsent;
@@ -211,6 +214,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_set_root;
zonecfg_set_sched;
zonecfg_set_swinv;
+ zonecfg_set_uuid;
zonecfg_set_zonepath;
zonecfg_strerror;
zonecfg_str_to_bytes;
@@ -229,6 +233,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_verify_save;
zonecfg_warn_poold;
zone_get_brand;
+ zone_get_did;
zone_get_devroot;
zone_get_id;
zone_get_rootpath;
diff --git a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
index d94bb09c5f..3c7198efef 100644
--- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
+++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
@@ -21,6 +21,7 @@
CDDL HEADER END
Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2011, Joyent Inc. All rights reserved.
-->
@@ -46,14 +47,21 @@
<!ATTLIST inherited-pkg-dir directory CDATA #REQUIRED>
-<!ELEMENT network EMPTY>
+<!ELEMENT net-attr EMPTY>
+<!ATTLIST net-attr name CDATA #REQUIRED
+ value CDATA #REQUIRED>
+
+<!ELEMENT network (net-attr)*>
<!ATTLIST network address CDATA ""
allowed-address CDATA ""
defrouter CDATA ""
- physical CDATA #REQUIRED>
+ global-nic CDATA ""
+ mac-addr CDATA ""
+ physical CDATA #REQUIRED
+ vlan-id CDATA "">
-<!ELEMENT device EMPTY>
+<!ELEMENT device (net-attr)*>
<!ATTLIST device match CDATA #REQUIRED>
@@ -156,6 +164,7 @@
limitpriv CDATA ""
bootargs CDATA ""
brand CDATA ""
+ debugid CDATA ""
scheduling-class CDATA ""
fs-allowed CDATA ""
version NMTOKEN #FIXED '1'>
diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h
index 5a19542b8f..8ee04e7009 100644
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -305,6 +305,7 @@ extern void kstat_runq_back_to_waitq(kstat_io_t *);
#define KM_SLEEP UMEM_NOFAIL
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
+#define KM_NORMALPRI 0
#define KMC_NODEBUG UMC_NODEBUG
#define KMC_NOTOUCH 0 /* not needed for userland caches */
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
diff --git a/usr/src/lib/nsswitch/dns/Makefile.com b/usr/src/lib/nsswitch/dns/Makefile.com
index 23c89c32f8..0366633c0c 100644
--- a/usr/src/lib/nsswitch/dns/Makefile.com
+++ b/usr/src/lib/nsswitch/dns/Makefile.com
@@ -22,8 +22,6 @@
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# lib/nsswitch/dns/Makefile.com
LIBRARY = libnss_dns.a
@@ -46,5 +44,5 @@ CPPFLAGS += -DNSS_DNS_LIBRESOLV=\"libresolv.so.2\"
LINTFLAGS += -erroff=E_GLOBAL_COULD_BE_STATIC2
-LDLIBS += -lnsl
+LDLIBS += -lnsl -lresolv_joy -lsocket
DYNLIB1 = nss_dns.so$(VERS)
diff --git a/usr/src/lib/nsswitch/dns/common/dns_common.c b/usr/src/lib/nsswitch/dns/common/dns_common.c
index a9195f9f68..0195b1847c 100644
--- a/usr/src/lib/nsswitch/dns/common/dns_common.c
+++ b/usr/src/lib/nsswitch/dns/common/dns_common.c
@@ -22,22 +22,19 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* dns_common.c
*/
#include "dns_common.h"
-
-#pragma weak dn_expand
-#pragma weak res_ninit
-#pragma weak res_ndestroy
-#pragma weak res_nsearch
-#pragma weak res_nclose
-#pragma weak ns_get16
-#pragma weak ns_get32
-#pragma weak __ns_get16
-#pragma weak __ns_get32
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <ifaddrs.h>
+#include <net/if.h>
#define DNS_ALIASES 0
#define DNS_ADDRLIST 1
@@ -321,6 +318,52 @@ name_is_alias(char *aliases_ptr, char *name_ptr) {
return (NSS_NOTFOUND);
}
+
+static int
+_nss_has_interfaces(int *v4, int *v6)
+{
+ struct ifaddrs *ifp, *i;
+ struct in_addr in4;
+ struct in6_addr in6;
+ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+
+ *v4 = *v6 = 0;
+
+ if (getifaddrs(&ifp) != 0)
+ return (-1);
+
+ for (i = ifp; i != NULL; i = i->ifa_next) {
+ if (i->ifa_flags & IFF_LOOPBACK)
+ continue;
+ if ((i->ifa_flags & IFF_UP) == 0)
+ continue;
+
+ if (i->ifa_addr->sa_family == AF_INET) {
+ if (*v4 != 0)
+ continue;
+
+ if (((struct sockaddr_in *)i->ifa_addr)->
+ sin_addr.s_addr == INADDR_ANY)
+ continue;
+ *v4 = 1;
+ }
+
+ if (i->ifa_addr->sa_family == AF_INET6) {
+ if (*v6 != 0)
+ continue;
+
+ if (memcmp(&in6addr_any,
+ &((struct sockaddr_in6 *)i->ifa_addr)->sin6_addr,
+ sizeof (struct in6_addr)) == 0)
+ continue;
+ *v6 = 1;
+ }
+ }
+
+ freeifaddrs(ifp);
+ return (0);
+}
+
/*
* nss_dns_gethost_withttl(void *buffer, size_t bufsize, int ipnode)
* nss2 get hosts/ipnodes with ttl backend DNS search engine.
@@ -331,13 +374,6 @@ name_is_alias(char *aliases_ptr, char *name_ptr) {
* Additionally in the extended results a nssuint_t ttl is placed.
* This ttl is the lessor of the ttl's extracted from the result.
*
- * ***Currently the first version of this API only performs simple
- * single res_nsearch lookups for with T_A or T_AAAA results.
- * Other searches are deferred to the generic API w/t ttls.
- *
- * This function is not a generic res_* operation. It only performs
- * a single T_A or T_AAAA lookups***
- *
* RETURNS: NSS_SUCCESS or NSS_ERROR
* If an NSS_ERROR result is returned, nscd is expected
* to resubmit the gethosts request using the old style
@@ -384,11 +420,14 @@ _nss_dns_gethost_withttl(void *buffer, size_t bufsize, int ipnode)
int af;
char *ap, *apc;
int hlen = 0, alen, iplen, len, isans;
+ int has_v4 = 0, has_v6 = 0;
+ int flags, family, pass2 = 0;
statp = &stat;
(void) memset(statp, '\0', sizeof (struct __res_state));
- if (res_ninit(statp) == -1)
+ if (res_ninit(statp) == -1) {
return (NSS_ERROR);
+ }
ap = apc = (char *)aliases;
alen = 0;
@@ -404,33 +443,139 @@ _nss_dns_gethost_withttl(void *buffer, size_t bufsize, int ipnode)
return (NSS_ERROR);
}
+ /*
+ * There may be flags set when we are handling ipnode. There are three
+ * different values for flags:
+ *
+ * o AI_V4MAPPED
+ * o AI_ALL
+ * o AI_ADDRCONFIG
+ *
+ * The first two only have a meaning when af_family is ipv6. The latter
+ * means something in both cases. These flags are documented in
+ * getipnodebyname(3SOCKET), though the combinations leave a little
+ * something to be desired. It would be great if we could actually use
+ * getipnodebyname directly here since it already knows how to handle
+ * this kind of logic; however, we're not quite so lucky. Ideally we
+ * would add such an interface to libresolv.so.2 to handle this kind of
+ * thing, but that's rather painful as well. We'll summarize what has to
+ * happen below:
+ *
+ * AI_ALL is only meaningful when AI_V4MAPPED is also specified. Both
+ * are ignored if the family is not AF_INET6
+ *
+ * family == AF_INET, flags | AI_ADDRCONFIG
+ * - lookup A records iff we have v4 plumbed
+ * family == AF_INET, !(flags | AI_ADDRCONFIG)
+ * - lookup A records
+ * family == AF_INET6, flags == 0 || flags == AI_ALL
+ * - lookup AAAA records
+ * family == AF_INET6, flags | AI_V4MAPPED
+ * - lookup AAAA, if none, lookup A
+ * family == AF_INET6, flags | AI_ADDRCONFIG
+ * - lookup AAAA records if ipv6
+ * family == AF_INET6, flags | AI_V4MAPPED && flags | AI_ALL
+ * - lookup AAAA records, lookup A records
+ * family == AF_INET6, flags | AI_V4MAPPED && flags | AI_ADDRCONFIG
+ * - lookup AAAA records if ipv6
+ * - If no AAAA && ipv4 exists, lookup A
+ * family == AF_INET6, flags | AI_V4MAPPED && flags | AI_ADDRCONFIG &&
+ * flags | AI_ALL
+ * - lookup AAAA records if ipv6
+ * - loookup A records if ipv4
+ */
if (ipnode) {
/* initially only handle the simple cases */
- if (arg.key.ipnode.flags != 0) {
- res_ndestroy(statp);
- return (NSS_ERROR);
- }
name = arg.key.ipnode.name;
- if (arg.key.ipnode.af_family == AF_INET6)
- qtype = T_AAAA;
- else
- qtype = T_A;
+ flags = arg.key.ipnode.flags;
+ family = arg.key.ipnode.af_family;
+ if (flags != 0) {
+ /*
+ * Figure out our first pass. We'll determine if we need
+ * to do a second pass afterwards once we successfully
+ * finish our first pass.
+ */
+ if ((flags & AI_ADDRCONFIG) != 0) {
+ if (_nss_has_interfaces(&has_v4, &has_v6) !=
+ 0) {
+ res_ndestroy(statp);
+ return (NSS_ERROR);
+ }
+ /* Impossible situations... */
+ if (family == AF_INET && has_v4 == 0) {
+ res_ndestroy(statp);
+ return (NSS_NOTFOUND);
+ }
+ if (family == AF_INET6 && has_v6 == 0 &&
+ !(flags & AI_V4MAPPED)) {
+ res_ndestroy(statp);
+ return (NSS_NOTFOUND);
+ }
+ if (family == AF_INET6 && has_v6)
+ qtype = T_AAAA;
+ if (family == AF_INET || (family == AF_INET6 &&
+ has_v6 == 0 && flags & AI_V4MAPPED))
+ qtype = T_A;
+ } else {
+ has_v4 = has_v6 = 1;
+ if (family == AF_INET6)
+ qtype = T_AAAA;
+ else
+ qtype = T_A;
+ }
+ } else {
+ if (family == AF_INET6)
+ qtype = T_AAAA;
+ else
+ qtype = T_A;
+ }
} else {
name = arg.key.name;
qtype = T_A;
}
+
+searchagain:
ret = res_nsearch(statp, name, C_IN, qtype, resbuf.buf, NS_MAXMSG);
if (ret == -1) {
- if (statp->res_h_errno == HOST_NOT_FOUND) {
+ /*
+ * We want to continue on unless we got NO_RECOVERY. Otherwise,
+ * HOST_NOT_FOUND, TRY_AGAIN, and NO_DATA all suggest to me that
+ * we should keep going.
+ */
+ if (statp->res_h_errno == NO_RECOVERY) {
+ /* else lookup error - handle in general code */
+ res_ndestroy(statp);
+ return (NSS_ERROR);
+ }
+
+ /*
+ * We found something on our first pass. Make sure that we do
+ * not clobber this information. This ultimately means that we
+ * were successful.
+ */
+ if (pass2 == 2)
+ goto out;
+
+ if (pass2 == 1 || flags == 0 || family == AF_INET ||
+ (family == AF_INET6 && !(flags & AI_V4MAPPED))) {
pbuf->p_herrno = HOST_NOT_FOUND;
pbuf->p_status = NSS_NOTFOUND;
pbuf->data_len = 0;
res_ndestroy(statp);
return (NSS_NOTFOUND);
}
- /* else lookup error - handle in general code */
- res_ndestroy(statp);
- return (NSS_ERROR);
+ if ((flags & AI_ADDRCONFIG) && !(flags & AI_ALL) &&
+ has_v4 == 0) {
+ pbuf->p_herrno = HOST_NOT_FOUND;
+ pbuf->p_status = NSS_NOTFOUND;
+ pbuf->data_len = 0;
+ res_ndestroy(statp);
+ return (NSS_NOTFOUND);
+ }
+ qtype = T_A;
+ flags = 0;
+ pass2 = 1;
+ goto searchagain;
}
cp = resbuf.buf;
@@ -579,6 +724,15 @@ _nss_dns_gethost_withttl(void *buffer, size_t bufsize, int ipnode)
*bptr++ = '\n';
blen++;
}
+
+ /* Depending on our flags we may need to go back another time. */
+ if (qtype == T_AAAA && family == AF_INET6 &&
+ ((flags & AI_V4MAPPED) != 0) && ((flags & AI_ALL) != 0) && has_v4) {
+ qtype = T_A;
+ pass2 = 2; /* Indicate that we found data this pass */
+ goto searchagain;
+ }
+
/* Presumably the buffer is now filled. */
len = ROUND_UP(blen, sizeof (nssuint_t));
/* still room? */
@@ -587,6 +741,7 @@ _nss_dns_gethost_withttl(void *buffer, size_t bufsize, int ipnode)
res_ndestroy(statp);
return (NSS_ERROR);
}
+out:
pbuf->ext_off = pbuf->data_off + len;
pbuf->ext_len = sizeof (nssuint_t);
pbuf->data_len = blen;
diff --git a/usr/src/lib/nsswitch/dns/common/dns_common.h b/usr/src/lib/nsswitch/dns/common/dns_common.h
index 717f56d70f..83d486cf57 100644
--- a/usr/src/lib/nsswitch/dns/common/dns_common.h
+++ b/usr/src/lib/nsswitch/dns/common/dns_common.h
@@ -31,8 +31,6 @@
#ifndef _DNS_COMMON_H
#define _DNS_COMMON_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>
@@ -43,7 +41,7 @@
#include <thread.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
-#include <resolv.h>
+#include <resolv_joy.h>
#include <syslog.h>
#include <nsswitch.h>
#include <nss_common.h>
diff --git a/usr/src/lib/nsswitch/dns/common/dns_mt.c b/usr/src/lib/nsswitch/dns/common/dns_mt.c
index 128b1bde75..4af3f671c0 100644
--- a/usr/src/lib/nsswitch/dns/common/dns_mt.c
+++ b/usr/src/lib/nsswitch/dns/common/dns_mt.c
@@ -23,8 +23,9 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* dns_mt.c
@@ -49,52 +50,41 @@
static void _nss_dns_init(void);
extern struct hostent *res_gethostbyname(const char *);
-#pragma weak res_gethostbyname
-#define RES_SET_NO_HOSTS_FALLBACK "__res_set_no_hosts_fallback"
-extern void __res_set_no_hosts_fallback(void);
-#pragma weak __res_set_no_hosts_fallback
+#define RES_SET_NO_HOSTS_FALLBACK "__joy_res_set_no_hosts_fallback"
+extern void __joy_res_set_no_hosts_fallback(void);
-#define RES_UNSET_NO_HOSTS_FALLBACK "__res_unset_no_hosts_fallback"
-extern void __res_unset_no_hosts_fallback(void);
-#pragma weak __res_unset_no_hosts_fallback
+#define RES_UNSET_NO_HOSTS_FALLBACK "__joy_res_unset_no_hosts_fallback"
+extern void __joy_res_unset_no_hosts_fallback(void);
#define RES_GET_RES "__res_get_res"
extern struct __res_state *__res_get_res(void);
-#pragma weak __res_get_res
#define RES_ENABLE_MT "__res_enable_mt"
extern int __res_enable_mt(void);
-#pragma weak __res_enable_mt
#define RES_DISABLE_MT "__res_disable_mt"
extern int __res_disable_mt(void);
-#pragma weak __res_disable_mt
#define RES_GET_H_ERRNO "__res_get_h_errno"
extern int *__res_get_h_errno();
-#pragma weak __res_get_h_errno
-#define __H_ERRNO "__h_errno"
-extern int *__h_errno(void);
-#pragma weak __h_errno
+#define __H_ERRNO "__joy_h_errno"
+extern int *__joy_h_errno(void);
-#define RES_OVERRIDE_RETRY "__res_override_retry"
-extern int __res_override_retry(int);
-#pragma weak __res_override_retry
+#define RES_OVERRIDE_RETRY "__joy_res_override_retry"
+extern int __joy_res_override_retry(int);
static void __fallback_set_no_hosts(void);
-static int *__fallback_h_errno(void);
-static int __fallback_override_retry(int);
static int __is_mt_safe(void);
void (*set_no_hosts_fallback)(void) = __fallback_set_no_hosts;
void (*unset_no_hosts_fallback)(void) = __fallback_set_no_hosts;
struct __res_state *(*set_res_retry)() = 0;
-int (*enable_mt)() = 0;
-int (*disable_mt)() = 0;
-int *(*get_h_errno)(void) = 0;
-int (*override_retry)(int) = 0;
+int (*enable_mt)() = __is_mt_safe;
+int (*disable_mt)() = __is_mt_safe;
+int *(*get_h_errno)(void) = __joy_h_errno;
+int (*override_retry)(int) = __joy_res_override_retry;
/* Usually set from the Makefile */
#ifndef NSS_DNS_LIBRESOLV
@@ -106,91 +96,12 @@ extern int h_errno;
mutex_t one_lane = DEFAULTMUTEX;
+/* Because we link against libresolv_joy.so.2, this is relatively easy. */
void
_nss_dns_init(void)
{
- void *reslib, (*f_void_ptr)();
-
- /* If no libresolv library, then load one */
- if (res_gethostbyname == 0) {
- if ((reslib =
- dlopen(NSS_DNS_LIBRESOLV, RTLD_LAZY|RTLD_GLOBAL)) != 0) {
- /* Turn off /etc/hosts fall back in libresolv */
- if ((f_void_ptr = (void (*)(void))dlsym(reslib,
- RES_SET_NO_HOSTS_FALLBACK)) != 0) {
- set_no_hosts_fallback = f_void_ptr;
- }
- if ((f_void_ptr = (void (*)(void))dlsym(reslib,
- RES_SET_NO_HOSTS_FALLBACK)) != 0) {
- unset_no_hosts_fallback = f_void_ptr;
- }
- /* Set number of resolver retries */
- if ((override_retry = (int (*)(int))dlsym(reslib,
- RES_OVERRIDE_RETRY)) == 0) {
- set_res_retry =
- (struct __res_state *(*)(void))dlsym(reslib,
- RES_GET_RES);
- override_retry = __fallback_override_retry;
- }
- /*
- * Select h_errno retrieval function. A BIND 8.2.2
- * libresolv.so.2 will have __h_errno, a BIND 8.1.2
- * one will have __res_get_h_errno, and other
- * versions may have nothing at all.
- *
- * Also try to bind to the relevant MT enable/disable
- * functions which are also dependent on the version
- * of the BIND libresolv.so.2 being used.
- */
- if ((get_h_errno = (int *(*)(void))dlsym(reslib,
- __H_ERRNO)) != 0) {
- /* BIND 8.2.2 libresolv.so.2 is MT safe. */
- enable_mt = __is_mt_safe;
- disable_mt = __is_mt_safe;
- } else {
- if ((get_h_errno =
- (int *(*)(void))dlsym(reslib,
- RES_GET_H_ERRNO)) == 0) {
- get_h_errno = __fallback_h_errno;
- }
- /*
- * Pre-BIND 8.2.2 was not MT safe. Try to
- * bind the MT enable/disable functions.
- */
- if ((enable_mt = (int (*)(void))dlsym(reslib,
- RES_ENABLE_MT)) != 0 &&
- (disable_mt = (int (*)(void))dlsym(reslib,
- RES_DISABLE_MT)) == 0) {
- enable_mt = 0;
- }
- }
- }
- } else {
- /* Libresolv already loaded */
- if ((f_void_ptr = __res_set_no_hosts_fallback) != 0) {
- set_no_hosts_fallback = f_void_ptr;
- }
- if ((f_void_ptr = __res_unset_no_hosts_fallback) != 0) {
- unset_no_hosts_fallback = f_void_ptr;
- }
- if ((override_retry = __res_override_retry) == 0) {
- set_res_retry = __res_get_res;
- override_retry = __fallback_override_retry;
- }
- if ((get_h_errno = __h_errno) == 0 &&
- (get_h_errno = __res_get_h_errno) == 0) {
- get_h_errno = __fallback_h_errno;
- }
- if (get_h_errno == __h_errno) {
- enable_mt = __is_mt_safe;
- disable_mt = __is_mt_safe;
- } else {
- if ((enable_mt = __res_enable_mt) != 0 &&
- (disable_mt = __res_disable_mt) == 0) {
- enable_mt = 0;
- }
- }
- }
+ enable_mt = __is_mt_safe;
+ disable_mt = __is_mt_safe;
}
@@ -228,36 +139,6 @@ __is_mt_safe(void) {
}
-/*
- * Return pointer to the global h_errno variable
- */
-static int *
-__fallback_h_errno(void) {
- return (&h_errno);
-}
-
-
-/*
- * This function is called when the resolver library doesn't provide its
- * own function to establish an override retry. If we can get a pointer
- * to the per-thread _res (i.e., set_res_retry != 0), we set the retries
- * directly, and return the previous number of retries. Otherwise, there's
- * nothing to do.
- */
-static int
-__fallback_override_retry(int retry) {
- struct __res_state *res;
- int old_retry = 0;
-
- if (set_res_retry != 0) {
- res = set_res_retry();
- old_retry = res->retry;
- res->retry = retry;
- }
- return (old_retry);
-}
-
-
static void
__fallback_set_no_hosts(void) {
}
diff --git a/usr/src/lib/nsswitch/dns/common/gethostent.c b/usr/src/lib/nsswitch/dns/common/gethostent.c
index 648ea8ba01..d321dd24c6 100644
--- a/usr/src/lib/nsswitch/dns/common/gethostent.c
+++ b/usr/src/lib/nsswitch/dns/common/gethostent.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* gethostent.c
*
@@ -53,12 +51,6 @@ static struct hostent *_gethostbyaddr(int *h_errnop, const char *addr,
int len, int type);
struct hostent *_nss_dns_gethostbyname2(int *h_errnop, const char *name);
-#pragma weak res_gethostbyname
-#pragma weak res_gethostbyname2
-#pragma weak res_gethostbyaddr
-#pragma weak res_sethostent
-#pragma weak res_endhostent
-
nss_backend_t *_nss_dns_constr(dns_backend_op_t ops[], int n_ops);
nss_status_t __nss_dns_getbyaddr(dns_backend_ptr_t, void *);
diff --git a/usr/src/lib/nsswitch/dns/common/gethostent6.c b/usr/src/lib/nsswitch/dns/common/gethostent6.c
index ee85832073..f3efc4eae6 100644
--- a/usr/src/lib/nsswitch/dns/common/gethostent6.c
+++ b/usr/src/lib/nsswitch/dns/common/gethostent6.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This is the DNS backend for IPv6 addresses.
* getbyname() is a local routine, but getbyaddr() actually shares the
@@ -50,8 +48,6 @@
*/
-#pragma weak res_endhostent
-
extern struct hostent *_gethostbyname(int *, const char *);
extern struct hostent *_nss_dns_gethostbyname2(int *, const char *);
diff --git a/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com b/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
index 020051c977..7345ddc892 100644
--- a/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
+++ b/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
@@ -73,7 +73,7 @@ TSSLIB=-L$(TSPILIBDIR)
TSSLIB64=-L$(TSPILIBDIR)/$(MACH64)
TSSINC=-I$(TSPIINCDIR)
-LDLIBS += $(TSSLIB) -L$(ADJUNCT_PROTO)/lib -lc -luuid -lmd -ltspi -lcrypto
+LDLIBS += $(TSSLIB) -L$(ADJUNCT_PROTO)/lib -lc -luuid -lmd -ltspi -lsunw_crypto
CPPFLAGS += -xCC -D_POSIX_PTHREAD_SEMANTICS $(TSSINC)
CPPFLAGS64 += $(CPPFLAGS)
C99MODE= $(C99_ENABLE)
diff --git a/usr/src/lib/print/mod_ipp/Makefile b/usr/src/lib/print/mod_ipp/Makefile
index 8d6500fb41..137e88bbf8 100644
--- a/usr/src/lib/print/mod_ipp/Makefile
+++ b/usr/src/lib/print/mod_ipp/Makefile
@@ -48,7 +48,7 @@ SRCS = $(OBJECTS:%.o = %.c)
CFLAGS += $(CCVERBOSE)
CPPFLAGS += -I../libipp-listener/common
CPPFLAGS += -I../libipp-core/common
-CPPFLAGS += -I/usr/apache/include
+CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/apache/include
CPPFLAGS += -DEAPI
ZDEFS = $(ZNODEFS)
diff --git a/usr/src/lib/pysolaris/Makefile.com b/usr/src/lib/pysolaris/Makefile.com
index 28d68704f9..a6194a9a2a 100644
--- a/usr/src/lib/pysolaris/Makefile.com
+++ b/usr/src/lib/pysolaris/Makefile.com
@@ -44,6 +44,7 @@ C99LMODE= -Xc99=%all
LIBS = $(DYNLIB)
LDLIBS += -lc -lsec -lidmap -lpython2.6
CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/include/python2.6
CERRWARN += -_gcc=-Wno-unused-variable
CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/include/python2.6
diff --git a/usr/src/man/Makefile b/usr/src/man/Makefile
index 9c85fcf4cb..21856f3940 100644
--- a/usr/src/man/Makefile
+++ b/usr/src/man/Makefile
@@ -82,6 +82,7 @@ SUBDIRS= man1 \
man3tsol \
man3uuid \
man3volmgt \
+ man3vnd \
man3xcurses \
man3xnet \
man4 \
diff --git a/usr/src/man/Makefile.man b/usr/src/man/Makefile.man
index 1b26bf0fb8..d273e29c1b 100644
--- a/usr/src/man/Makefile.man
+++ b/usr/src/man/Makefile.man
@@ -28,7 +28,10 @@ MANCHECKS= $(MANFILES:%=%.check)
ROOTMANFILES= $(MANFILES:%=$(ROOTMAN)/man$(MANSECT)/%)
ROOTMANLINKS= $(MANLINKS:%=$(ROOTMAN)/man$(MANSECT)/%)
-$(ROOTMAN)/man$(MANSECT)/% $(ROOTHASMAN)/man$(MANSECT)/%: %
+$(ROOTMAN)/man$(MANSECT) $(ROOTHASMAN)/man$(MANSECT):
+ $(INS.dir)
+
+$(ROOTMAN)/man$(MANSECT)/% $(ROOTHASMAN)/man$(MANSECT)/%: % $(ROOTMAN)/man$(MANSECT) $(ROOTHASMAN)/man$(MANSECT)
$(INS.file)
#
@@ -45,7 +48,7 @@ $(MANCHECKS):
$(MANLINKS):
$(RM) $@; $(SYMLINK) $(LINKSRC) $@
-$(ROOTMANLINKS): $(MANLINKS)
+$(ROOTMANLINKS): $(MANLINKS) $(ROOTMAN)/man$(MANSECT)
$(RM) $@; $(CP) -RP $(@F) $(@D)
all:
diff --git a/usr/src/man/man1/Makefile b/usr/src/man/man1/Makefile
index 2291d1a9c0..35bcebb71a 100644
--- a/usr/src/man/man1/Makefile
+++ b/usr/src/man/man1/Makefile
@@ -74,6 +74,7 @@ MANFILES= acctcom.1 \
clear.1 \
cmp.1 \
col.1 \
+ column.1 \
comm.1 \
command.1 \
compress.1 \
diff --git a/usr/src/man/man1/column.1 b/usr/src/man/man1/column.1
new file mode 100644
index 0000000000..a8c23310ba
--- /dev/null
+++ b/usr/src/man/man1/column.1
@@ -0,0 +1,129 @@
+.\" Copyright (c) 1989, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)column.1 8.1 (Berkeley) 6/6/93
+.\" $FreeBSD$
+.\"
+.\" Portions Copyright (c) 2013 Joyent, Inc. All rights reserved.
+.\"
+.TH COLUMN 1 "Jan 10, 2013"
+.SH NAME
+column \- columnate lists
+.SH SYNOPSIS
+.LP
+.nf
+\fBcolumn\fR [\fB-tx\fR] [\fB-c\fR \fIcolumns\fR] [\fB-s\fR \fIsep\fR] [\fIfile\fR ... ]
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBcolumn\fR
+utility formats its input into multiple columns.
+Rows are filled before columns.
+Input is taken from
+\fIfile\fR
+operands, or, by default, from the standard input.
+Empty lines are ignored.
+.SH OPTIONS
+.sp
+.LP
+The options are as follows:
+.sp
+.ne 2
+.na
+\fB\fB-c\fR \fIcolumns\fR\fR
+.ad
+.RS 17n
+Output is formatted for a display \fIcolumns\fR
+wide.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-s\fR \fIsep\fR\fR
+.ad
+.RS 17n
+Specify a set of characters to be used to delimit columns for the
+\fB-t\fR option.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-t\fR\fR
+.ad
+.RS 17n
+Determine the number of columns the input contains and create a table.
+Columns are delimited with whitespace, by default, or with the characters
+supplied using the \fBs\fR
+option.
+Useful for pretty-printing displays.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-x\fR
+.ad
+.RS 17n
+Fill columns before filling rows.
+.RE
+
+.SH ENVIRONMENT
+The COLUMNS , LANG , LC_ALL
+and
+LC_CTYPE
+environment variables affect the execution of
+\fBcolumn\fR
+as described in
+\fBenviron\fR(5).
+
+.SH EXIT STATUS
+The \fBcolumn\fR utility exits 0 on success and >0 if an error occurs.
+
+.SH EXAMPLES
+.sp
+.in +2
+.nf
+(printf \&"PERM LINKS OWNER GROUP SIZE MONTH DAY \&"\ \&;\ \&\e
+printf \&"HH:MM/YEAR NAME\en\&"\ \&;\ \&\e
+ls -l \&| sed 1d) \&| column -t
+.fi
+.in -2
+.sp
+
+
+.SH SEE ALSO
+\fBls\fR(1), \fBpaste\fR(1), \fBsort\fR(1)
+
+.SH HISTORY
+The \fBcolumn\fR command appeared in 4.3BSD-Reno.
+
+.SH BUGS
+Input lines are limited to LINE_MAX bytes in length.
diff --git a/usr/src/man/man1/crontab.1 b/usr/src/man/man1/crontab.1
index c93255f406..1008b63a21 100644
--- a/usr/src/man/man1/crontab.1
+++ b/usr/src/man/man1/crontab.1
@@ -1,6 +1,7 @@
'\" te
.\" Copyright 1989 AT&T
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2011, Joyent, Inc. All Rights Reserved
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved
.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at http://www.opengroup.org/bookstore/.
.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text
@@ -10,7 +11,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH CRONTAB 1 "Apr 6, 2009"
+.TH CRONTAB 1 "Sep 23, 2013"
.SH NAME
crontab \- user crontab file
.SH SYNOPSIS
@@ -26,7 +27,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -46,7 +47,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/xpg4/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/xpg4/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -66,7 +67,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/xpg6/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/xpg6/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -86,6 +87,17 @@ users' crontabs.
.LP
If \fBcrontab\fR is invoked with \fIfilename\fR, this overwrites an existing
\fBcrontab\fR entry for the user that invokes it.
+.sp
+.LP
+Cron supports a merged crontab with entries coming from either the user's
+\fB/var/spool/cron/crontabs\fR file or from the user's
+\fB/etc/cron.d/crontabs\fR file. The entries in the user's
+\fB/var/spool/cron/crontabs\fR file are editable whereas those in
+\fB/etc/cron.d/crontabs\fR are system-defined entries which may not
+be customized by the user. The dual set of crontab entries is only
+of interest to system-defined users such as \fBroot\fR. Except where
+otherwise explicitly indicated, all variants of the \fBcrontab\fR command
+act only on the editable crontab files found in \fB/var/spool/cron/crontabs\fR.
.SS "\fBcrontab\fR Access Control"
.sp
.LP
@@ -343,6 +355,9 @@ file using the \fB-r\fR option.
If \fIusername\fR is specified, the specified user's \fBcrontab\fR file is
edited, rather than the current user's \fBcrontab\fR file. This can only be
done by root or by a user with the \fBsolaris.jobs.admin\fR authorization.
+.sp
+Only the entries in the user's \fB/var/spool/cron/crontabs\fR file are
+editable.
.RE
.sp
@@ -354,6 +369,22 @@ done by root or by a user with the \fBsolaris.jobs.admin\fR authorization.
Lists the \fBcrontab\fR file for the invoking user. Only root or a user with
the \fBsolaris.jobs.admin\fR authorization can specify a username following the
\fB-l\fR option to list the \fBcrontab\fR file of the specified user.
+.sp
+Entries from the user's \fB/var/spool/cron/crontabs\fR file are listed, unless
+the \fB-g\fR option is given, in which case only entries from the user's
+\fB/etc/cron.d/crontabs\fR file are listed.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-g\fR\fR
+.ad
+.RS 6n
+In conjunction with the \fB-l\fR option, lists the global \fBcrontab\fR file
+for the invoking or specified user (if authorized) instead of the editable
+\fBcrontab\fR file. This option is not valid unless the \fB-l\fR option is
+also given.
.RE
.sp
@@ -583,6 +614,15 @@ list of denied users
.sp
.ne 2
.na
+\fB\fB/etc/cron.d/crontabs\fR\fR
+.ad
+.RS 28n
+system spool area for \fBcrontab\fR
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB/var/cron/log\fR\fR
.ad
.RS 28n
diff --git a/usr/src/man/man1/ld.1 b/usr/src/man/man1/ld.1
index fdddea7ece..4ebe033dce 100644
--- a/usr/src/man/man1/ld.1
+++ b/usr/src/man/man1/ld.1
@@ -24,7 +24,7 @@ ld \- link-editor for object files
[\fB-z\fR combreloc | nocombreloc ] [\fB-z\fR defs | nodefs]
[\fB-z\fR direct | nodirect] [\fB-z\fR endfiltee]
[\fB-z\fR fatal-warnings | nofatal-warnings ] [\fB-z\fR finiarray=\fIfunction\fR]
-[\fB-z\fR globalaudit] [\fB-z\fR groupperm | nogroupperm]
+[\fB-z\fR globalaudit] [\fB-z\fR groupperm | nogroupperm] [\fB-z\fR help ]
[\fB-z\fR guidance[=\fIid1\fR,\fIid2\fR...] [\fB-z\fR help ]
[\fB-z\fR ignore | record] [\fB-z\fR initarray=\fIfunction\fR] [\fB-z\fR initfirst]
[\fB-z\fR interpose] [\fB-z\fR lazyload | nolazyload]
diff --git a/usr/src/man/man1/ld.so.1.1 b/usr/src/man/man1/ld.so.1.1
index 4b14ca4f1a..19afbbf3d6 100644
--- a/usr/src/man/man1/ld.so.1.1
+++ b/usr/src/man/man1/ld.so.1.1
@@ -1,9 +1,10 @@
'\"
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH LD.SO.1 1 "Oct 5, 2012"
+.TH LD.SO.1 1 "May 8, 2014"
.SH NAME
ld.so.1 \- runtime linker for dynamic objects
.SH SYNOPSIS
@@ -574,6 +575,24 @@ aid debugging. See also the \fBRTLD_DI_SETSIGNAL\fR request to
.RE
.sp
+.ne 2
+.na
+.BR LD_TOXIC_PATH,
+.BR LD_TOXIC_PATH_32,
+.BR LD_TOXIC_PATH_64,
+.ad
+.sp .6
+.RS 4n
+The toxic path refers to a set of paths where by, if
+.B ld.so.1
+were to load a dependency on that path, rather than loading it, it
+should kill the process. This is useful when having built libraries that
+while matching the native architecture of the system, are not suitable
+to be used, for example, libraries that that correspond to an alternate
+release of an operating system.
+.RE
+
+.sp
.LP
Notice that environment variable names beginning with the
characters '\fBLD_\fR' are reserved for possible future enhancements to \fBld\fR(1) and
diff --git a/usr/src/man/man1/nawk.1 b/usr/src/man/man1/nawk.1
index 425db5d299..2c27e1baa0 100644
--- a/usr/src/man/man1/nawk.1
+++ b/usr/src/man/man1/nawk.1
@@ -15,6 +15,12 @@ nawk \- pattern scanning and processing language
.SH SYNOPSIS
.LP
.nf
+\fB/usr/bin/awk\fR [\fB-F\fR \fIERE\fR] [\fB-v\fR \fIassignment\fR] \fI\&'program'\fR | \fB-f\fR \fIprogfile\fR...
+ [\fIargument\fR]...
+.fi
+
+.LP
+.nf
\fB/usr/bin/nawk\fR [\fB-F\fR \fIERE\fR] [\fB-v\fR \fIassignment\fR] \fI\&'program'\fR | \fB-f\fR \fIprogfile\fR...
[\fIargument\fR]...
.fi
@@ -28,7 +34,8 @@ nawk \- pattern scanning and processing language
.SH DESCRIPTION
.sp
.LP
-The \fB/usr/bin/nawk\fR and \fB/usr/xpg4/bin/awk\fR utilities execute
+The \fB/usr/bin/awk\fR, \fB/usr/bin/nawk\fR and \fB/usr/xpg4/bin/awk\fR
+utilities execute
\fIprogram\fRs written in the \fBnawk\fR programming language, which is
specialized for textual data manipulation. A \fBnawk\fR \fIprogram\fR is a
sequence of patterns and corresponding actions. The string specifying
diff --git a/usr/src/man/man1/proc.1 b/usr/src/man/man1/proc.1
index cd20e215e3..bf0e588012 100644
--- a/usr/src/man/man1/proc.1
+++ b/usr/src/man/man1/proc.1
@@ -2,10 +2,11 @@
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved
.\" Portions Copyright 2008 Chad Mynhier
.\" Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
+.\" Copyright 2013 (c) Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PROC 1 "Oct 23, 2012"
+.TH PROC 1 "Apr 01, 2013"
.SH NAME
proc, pflags, pcred, pldd, psig, pstack, pfiles, pwdx, pstop, prun, pwait,
ptime \- proc tools
@@ -72,7 +73,7 @@ ptime \- proc tools
.LP
.nf
-\fB/usr/bin/ptime\fR [\fB-Fm\fR] [\fB-p\fR] \fIpid\fR...
+\fB/usr/bin/ptime\fR [\fB-Fm\fR] \fB-p pidlist\fR
.fi
.LP
@@ -215,8 +216,10 @@ Time the \fIcommand\fR, like \fBtime\fR(1), but using microstate accounting for
reproducible precision. Unlike \fBtime\fR(1), children of the command are not
timed.
.sp
-If the \fB-p\fR \fIpid\fR version is used, display a snapshot of timing
-statistics for the specified \fIpid\fR.
+If the \fB-p\fR \fIpidlist\fR version is used, display a snapshot of timing
+statistics for the specified processes. The \fIpidlist\fR may either be a comma
+delineated list or a space delineated list. Space delineated lists must be
+properly quoted to assure that they are in a single argument.
.RE
.SH OPTIONS
diff --git a/usr/src/man/man1/ps.1 b/usr/src/man/man1/ps.1
index 7ff574f12b..2b0d435c44 100644
--- a/usr/src/man/man1/ps.1
+++ b/usr/src/man/man1/ps.1
@@ -38,6 +38,9 @@ displayed is controlled by the options.
Some options accept lists as arguments. Items in a list can be either separated
by commas or else enclosed in quotes and separated by commas or spaces. Values
for \fIproclist\fR and \fIgrplist\fR must be numeric.
+.sp
+.LP
+The \fBps\fR command also accepts BSD-style options. See \fBps\fR(1b).
.SH OPTIONS
.sp
.LP
@@ -1311,7 +1314,8 @@ Standard See \fBstandards\fR(5).
.sp
.LP
\fBkill\fR(1), \fBlgrpinfo\fR(1), \fBnice\fR(1), \fBpagesize\fR(1),
-\fBpmap\fR(1), \fBpriocntl\fR(1), \fBwho\fR(1), \fBgetty\fR(1M), \fBproc\fR(4),
+\fBpmap\fR(1), \fBpriocntl\fR(1), \fBps\fR(1b), \fBwho\fR(1), \fBgetty\fR(1M),
+\fBproc\fR(4),
\fBttysrch\fR(4), \fBattributes\fR(5), \fBenviron\fR(5),
\fBresource_controls\fR(5), \fBstandards\fR(5), \fBzones\fR(5)
.SH NOTES
diff --git a/usr/src/man/man1/sed.1 b/usr/src/man/man1/sed.1
index ded1c15fa3..1d54a56ade 100644
--- a/usr/src/man/man1/sed.1
+++ b/usr/src/man/man1/sed.1
@@ -1,8 +1,6 @@
.\" Copyright (c) 1992, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
-.\" Copyright 2011 Nexenta Systems, Inc. All rights reserved.
-.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
@@ -30,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.TH SED 1 "Nov 2, 2011"
+.TH SED 1 "Jun 16, 2014"
.SH NAME
\fBsed\fP
\- stream editor
@@ -45,7 +43,8 @@
[\fB\-Ealnr\fP]
[\fB\-e\fP \fIcommand\fP]
[\fB\-f\fP \fIcommand_file\fP]
-[\fB\-I\fP[\fIextension\fP] | \fB\-i\fP[\fIextension\fP]]
+[\fB\-I\fP \fIextension\fP]
+[\fB\-i\fP \fIextension\fP]
[\fIfile ...\fP]
.SH DESCRIPTION
The
@@ -70,7 +69,7 @@ The following options are available:
Interpret regular expressions as extended (modern) regular expressions
rather than basic regular expressions (BRE's).
The
-\fBre_format\fP(7)
+\fBregex\fP(5)
manual page fully describes both formats.
.TP
\fB\-a\fP
@@ -98,11 +97,16 @@ Append the editing commands found in the file
to the list of commands.
The editing commands should each be listed on a separate line.
.TP
-\fB\-I\fP[\fIextension\fP]
-Edit files in-place, saving backups if \fIextension\fP was specified.
-It is not recommended to omit saving backups when in-place editing files,
-as you risk corruption or partial content in situations where disk
-space is exhausted, etc.
+\fB\-I\fP \fIextension\fP
+Edit files in-place, saving backups with the specified
+\fIextension\fP.
+If a zero-length
+\fIextension\fP
+is given, no backup will be saved.
+It is not recommended to give a zero-length
+\fIextension\fP
+when in-place editing files, as you risk corruption or partial content
+in situations where disk space is exhausted, etc.
Note that in-place editing with
\fB\-I\fP
@@ -120,7 +124,7 @@ where using
\fB\-i\fP
is desired.
.TP
-\fB\-i\fP[\fIextension\fP]
+\fB\-i\fP \fIextension\fP
Edit files in-place similarly to
\fB\-I\fP,
but treat each file independently from other files.
@@ -234,7 +238,7 @@ function.
The regular expressions used in
\fB,\fP
by default, are basic regular expressions (BREs, see
-\fBre_format\fP(7)
+\fBregex\fP(5)
for more information), but extended (modern) regular expressions can be used
instead if the
\fB\-E\fP
@@ -505,7 +509,7 @@ where
``#''
is a digit, is replaced by the text matched
by the corresponding backreference expression (see
-\fBre_format\fP(7)) .
+\fBregex\fP(5)) .
A line can be split by substituting a newline character into it.
To specify a newline character in the replacement string, precede it with
@@ -625,8 +629,8 @@ The \fBsed\fP utility exits 0 on success, and >0 if an error occurs.
\fBawk\fP(1),
\fBed\fP(1),
\fBgrep\fP(1),
-\fBregex\fP(3),
-\fBre_format\fP(5)
+\fBregex\fP(3C),
+\fBregex\fP(5)
.SH STANDARDS
The
\fBsed\fP
diff --git a/usr/src/man/man1/zlogin.1 b/usr/src/man/man1/zlogin.1
index 2da43d568c..129718e11e 100644
--- a/usr/src/man/man1/zlogin.1
+++ b/usr/src/man/man1/zlogin.1
@@ -13,6 +13,7 @@
.\" Portions Copyright [yyyy] [name of copyright owner]
.\" Copyright 2013 DEY Storage Systems, Inc.
.\" Copyright (c) 2014 Gary Mills
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved
.TH ZLOGIN 1 "Jan 22, 2014"
.SH NAME
zlogin \- enter a zone
@@ -24,7 +25,7 @@ zlogin \- enter a zone
.LP
.nf
-\fBzlogin\fR [\fB-nEQS\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR \fIutility\fR
+\fBzlogin\fR [\fB-inEQS\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR \fIutility\fR
[\fIargument\fR]...
.fi
@@ -43,12 +44,12 @@ utility.
\fBInteractive Mode\fR
.ad
.RS 24n
-If no utility argument is given and the stdin file descriptor for the
-\fBzlogin\fR process is a tty device, \fBzlogin\fR operates in \fBinteractive
-mode\fR. In this mode, \fBzlogin\fR creates a new pseudo terminal for use
-within the login session. Programs requiring a tty device, for example,
-\fBvi\fR(1), work properly in this mode. In this mode, \fBzlogin\fR invokes
-\fBlogin\fR(1) to provide a suitable login session.
+If no utility argument is given or if the \fB-i\fR option is specified, and the
+stdin file descriptor for the \fBzlogin\fR process is a tty device, \fBzlogin\fR
+operates in \fBinteractive mode\fR. In this mode, \fBzlogin\fR creates a new
+pseudo terminal for use within the login session. Programs requiring a tty
+device, for example, \fBvi\fR(1), work properly in this mode. In this mode,
+\fBzlogin\fR invokes \fBlogin\fR(1) to provide a suitable login session.
.RE
.sp
@@ -57,11 +58,12 @@ within the login session. Programs requiring a tty device, for example,
\fBNon-Interactive Mode\fR
.ad
.RS 24n
-If a utility is specified, \fBzlogin\fR operates in \fBnon-interactive mode\fR.
-This mode can be useful for script authors since stdin, stdout, and stderr are
-preserved and the exit status of \fIutility\fR is returned upon termination. In
-this mode, \fBzlogin\fR invokes \fBsu\fR(1M) in order to set up the user's
-environment and to provide a login environment.
+If a utility is specified and the \fB-i\fR option is not specified, \fBzlogin\fR
+operates in \fBnon-interactive mode\fR. This mode can be useful for script
+authors since stdin, stdout, and stderr are preserved and the exit status of
+\fIutility\fR is returned upon termination. In this mode, \fBzlogin\fR invokes
+\fBsu\fR(1M) in order to set up the user's environment and to provide a login
+environment.
.sp
The specified command is passed as a string and interpreted by a shell running
in the non-global zone. See \fBrsh\fR(1).
@@ -116,6 +118,16 @@ login by using the escape sequence character.
.sp
.ne 2
.na
+\fB\fB-i\fR\fR
+.ad
+.RS 15n
+Forces interactive mode when a utility argument is specified.
+.RE
+
+.sp
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIusername\fR\fR
.ad
.RS 15n
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index 133bd112f1..a95323906e 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -13,9 +13,10 @@
# Copyright 2011, Richard Lowe
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
#
-include $(SRC)//Makefile.master
+include $(SRC)//Makefile.master
MANSECT= 1m
@@ -551,7 +552,10 @@ _MANFILES= 6to4relay.1m \
uucleanup.1m \
uusched.1m \
uuxqt.1m \
+ vfsstat.1m \
vmstat.1m \
+ vndadm.1m \
+ vndstat.1m \
volcopy.1m \
volcopy_ufs.1m \
vscanadm.1m \
diff --git a/usr/src/man/man1m/dladm.1m b/usr/src/man/man1m/dladm.1m
index c8b2dd8e3e..6a390899ab 100644
--- a/usr/src/man/man1m/dladm.1m
+++ b/usr/src/man/man1m/dladm.1m
@@ -16,7 +16,7 @@ dladm \- administer data links
.LP
.nf
\fBdladm show-link\fR [\fB-P\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIlink\fR]
-\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] \fIlink\fR \fInew-link\fR
+\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIlink\fR \fInew-link\fR
.fi
.LP
@@ -99,9 +99,11 @@ dladm \- administer data links
.LP
.nf
-\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIlink\fR
-\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-p\fR \fIprop\fR[,...]] \fIlink\fR
-\fBdladm show-linkprop\fR [\fB-P\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]] [\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]
+\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...]
+ \fIlink\fR
+\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] [\fB-p\fR \fIprop\fR[,...]] \fIlink\fR
+\fBdladm show-linkprop\fR [\fB-P\fR] [\fB-z\fR \fIzonename\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]]
+ [\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]
.fi
.LP
@@ -116,9 +118,9 @@ dladm \- administer data links
\fBdladm create-vnic\fR [\fB-t\fR] \fB-l\fR \fIlink\fR [\fB-R\fR \fIroot-dir\fR] [\fB-m\fR \fIvalue\fR | auto |
{factory \fB-n\fR \fIslot-identifier\fR]} | {random [\fB-r\fR \fIprefix\fR]}]
[\fB-v\fR \fIvlan-id\fR] [\fB-p\fR \fIprop\fR=\fIvalue\fR[,...]] \fIvnic-link\fR
-\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fIvnic-link\fR
+\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIvnic-link\fR
\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]]
- [\fB-l\fR \fIlink\fR] [\fIvnic-link\fR]
+ [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIvnic-link\fR]
.fi
.LP
@@ -568,8 +570,7 @@ will be displayed only once.
.sp
.ne 2
.na
-\fB\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] \fIlink\fR
-\fInew-link\fR\fR
+\fB\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIlink\fR \fInew-link\fR\fR
.ad
.sp .6
.RS 4n
@@ -587,6 +588,16 @@ examples of how this subcommand is used.
See "Options," above.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+A link assigned to a zone can only be renamed while the zone is in the ready state.
+.RE
+
.RE
.sp
@@ -3192,8 +3203,7 @@ Extended output is displayed for \fBPTYPE\fR values of \fBcurrent\fR,
.sp
.ne 2
.na
-\fB\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-p\fR
-\fIprop\fR=\fIvalue\fR[,...] \fIlink\fR\fR
+\fB\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIlink\fR\fR
.ad
.sp .6
.RS 4n
@@ -3225,6 +3235,16 @@ See "Options," above.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop\fR=\fIvalue\fR[,...], \fB--prop\fR
\fIprop\fR=\fIvalue\fR[,...]\fR
.ad
@@ -3244,8 +3264,7 @@ same value.
.sp
.ne 2
.na
-\fB\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-p\fR
-\fIprop\fR,...] \fIlink\fR\fR
+\fB\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] [\fB-p\fR \fIprop\fR,...] \fIlink\fR\fR
.ad
.sp .6
.RS 4n
@@ -3277,6 +3296,16 @@ See "Options," above.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop, ...\fR, \fB--prop\fR=\fIprop, ...\fR\fR
.ad
.sp .6
@@ -3291,8 +3320,7 @@ the same value.
.sp
.ne 2
.na
-\fB\fBdladm show-linkprop\fR [\fB-P\fR] [[\fB-c\fR] \fB-o\fR
-\fIfield\fR[,...]][\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]\fR
+\fB\fBdladm show-linkprop\fR [\fB-P\fR] [\fB-z\fR \fIzonename\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]][\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]\fR
.ad
.sp .6
.RS 4n
@@ -3410,6 +3438,16 @@ Display persistent link property information
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop, ...\fR, \fB--prop\fR=\fIprop, ...\fR\fR
.ad
.sp .6
@@ -3727,8 +3765,7 @@ A comma-separated list of properties to set to the specified values.
.sp
.ne 2
.na
-\fB\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR]
-\fIvnic-link\fR\fR
+\fB\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIvnic-link\fR\fR
.ad
.sp .6
.RS 4n
@@ -3754,13 +3791,22 @@ next reboot.
See "Options," above.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
.ne 2
.na
-\fB\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]]
-[\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fIvnic-link\fR]\fR
+\fB\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIvnic-link\fR]\fR
.ad
.sp .6
.RS 4n
@@ -3903,6 +3949,16 @@ will be displayed only once.
Display information for all VNICs on the named link.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
diff --git a/usr/src/man/man1m/flowadm.1m b/usr/src/man/man1m/flowadm.1m
index b12ce6af96..c9d5b846d5 100644
--- a/usr/src/man/man1m/flowadm.1m
+++ b/usr/src/man/man1m/flowadm.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2011, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -11,14 +12,14 @@ services, containers, and virtual machines
.LP
.nf
\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-S\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-l\fR \fIlink\fR]
- [\fB-o\fR \fIfield\fR[,...]] [\fIflow\fR]
+ [\fB-o\fR \fIfield\fR[,...]] [\fB-z\fR \fIzonename\fR] [\fIflow\fR]
.fi
.LP
.nf
-\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR \fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...]
- \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR
-\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] {\fB-l\fR \fIlink\fR | \fIflow\fR}
+\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR \fIlink\fR
+ \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR
+\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] {\fB-l\fR \fIlink\fR | \fIflow\fR}
.fi
.LP
@@ -77,8 +78,7 @@ The following subcommands are supported:
.sp
.ne 2
.na
-\fB\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]]
-[\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fIflow\fR]\fR
+\fB\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIflow\fR]\fR
.ad
.sp .6
.RS 4n
@@ -223,14 +223,22 @@ Display information for all flows on the named link or information for the
named flow.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
.ne 2
.na
-\fB\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR
-\fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR
-\fIprop\fR=\fIvalue\fR[,...] \fIflow\fR\fR
+\fB\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR \fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR\fR
.ad
.sp .6
.RS 4n
@@ -268,6 +276,16 @@ persistent creation.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIlink\fR, \fB--link\fR=\fIlink\fR\fR
.ad
.sp .6
@@ -300,8 +318,7 @@ A comma-separated list of properties to be set to the specified values.
.sp
.ne 2
.na
-\fB\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR
-{\fIlink\fR | \fIflow\fR}\fR
+\fB\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR {\fIlink\fR | \fIflow\fR}\fR
.ad
.sp .6
.RS 4n
@@ -331,6 +348,16 @@ persistent removal.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIlink\fR | \fIflow\fR, \fB--link\fR=\fIlink\fR | \fIflow\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man1m/ipf.1m b/usr/src/man/man1m/ipf.1m
index 69cdacf689..93267db577 100644
--- a/usr/src/man/man1m/ipf.1m
+++ b/usr/src/man/man1m/ipf.1m
@@ -2,15 +2,16 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2009, Sun Microsystems Inc. All Rights Reserved.
-.TH IPF 1M "Feb 25, 2009"
+.\" Portions Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
+.TH IPF 1M "Oct 30, 2013"
.SH NAME
ipf \- alter packet filtering lists for IP packet input and output
.SH SYNOPSIS
.LP
.nf
-\fBipf\fR [\fB-6AdDEInoPRrsvVyzZ\fR] [\fB-l\fR block | pass | nomatch]
+\fBipf\fR [\fB-6AdDEGInoPRrsvVyzZ\fR] [\fB-l\fR block | pass | nomatch]
[\fB-T\fR \fIoptionlist\fR] [\fB-F\fR i | o | a | s | S] \fB-f\fR \fIfilename\fR
- [\fB-f\fR \fIfilename\fR...]
+ [\fB-f\fR \fIfilename\fR...] [\fIzonename\fR]
.fi
.SH DESCRIPTION
@@ -257,6 +258,17 @@ packet filter rule lists.
.sp
.ne 2
.na
+\fB\fB-G\fR\fR
+.ad
+.sp .6
+.RS 4n
+Make changes to the Global Zone-controlled ipfilter for the zone given as an
+argument. See the \fBZONES\fR section for more information.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-I\fR\fR
.ad
.sp .6
@@ -459,6 +471,22 @@ Zero global statistics held in the kernel for filtering only. This does not
affect fragment or state statistics.
.RE
+.SH ZONES
+.sp
+.LP
+Each non-global zone has two ipfilter instances: the in-zone ipfilter, which
+can be controlled from both the zone itself and the global zone, and the
+Global Zone-controlled (GZ-controlled) instance, which can only be controlled
+from the Global Zone. The non-global zone is not able to observe or control
+the GZ-controlled ipfilter.
+
+ipf optionally takes a zone name as an argument, which will change the
+ipfilter settings for that zone, rather than the current one. The zonename
+option is only available in the Global Zone. Using it in any other zone will
+return an error. If the \fB-G\fR option is specified with this argument, the
+Global Zone-controlled ipfilter is operated on. If \fB-G\fR is not specified,
+the in-zone ipfilter is operated on.
+
.SH FILES
.sp
.ne 2
@@ -519,7 +547,7 @@ Interface Stability Committed
.LP
\fBipfstat\fR(1M), \fBipmon\fR(1M), \fBipnat\fR(1M), \fBippool\fR(1M),
\fBsvcadm\fR(1M), \fBsvc.ipfd\fR(1M), \fBipf\fR(4), \fBipnat.conf\fR(4),
-\fBippool\fR(4), \fBattributes\fR(5), \fBipfilter\fR(5)
+\fBippool\fR(4), \fBattributes\fR(5), \fBipfilter\fR(5), \fBzones(5)\fR
.sp
.LP
\fI\fR
diff --git a/usr/src/man/man1m/ipfs.1m b/usr/src/man/man1m/ipfs.1m
index ea2f16c065..5514e96ea9 100644
--- a/usr/src/man/man1m/ipfs.1m
+++ b/usr/src/man/man1m/ipfs.1m
@@ -2,43 +2,44 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2008, Sun Microsystems Inc. All Rights Reserved.
-.TH IPFS 1M "Apr 3, 2008"
+.\" Portions Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
+.TH IPFS 1M "Oct 30, 2013"
.SH NAME
ipfs \- saves and restores information for NAT and state tables
.SH SYNOPSIS
.LP
.nf
-\fBipfs\fR [\fB-nv\fR] \fB-l\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nv\fR] \fB-l\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nv\fR] \fB-u\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nv\fR] \fB-u\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nv\fR] [\fB-d\fR \fIdirname\fR] \fB-R\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nv\fR] [\fB-d\fR \fIdirname\fR] \fB-R\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nv\fR] [\fB-d\fR \fIdirname\fR] \fB-W\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nv\fR] [\fB-d\fR \fIdirname\fR] \fB-W\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nNSv\fR] [\fB-f\fR \fIfilename\fR] \fB-r\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nNSv\fR] [\fB-f\fR \fIfilename\fR] \fB-r\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nNSv\fR] [\fB-f\fR \fIfilename\fR] \fB-w\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nNSv\fR] [\fB-f\fR \fIfilename\fR] \fB-w\fR
.fi
.LP
.nf
-\fBipfs\fR [\fB-nNSv\fR] \fB-f\fR \fIfilename\fR \fB-i\fR \fI<if1>\fR,\fI<if2>\fR
+\fBipfs\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-nNSv\fR] \fB-f\fR \fIfilename\fR \fB-i\fR \fI<if1>\fR,\fI<if2>\fR
.fi
.SH DESCRIPTION
@@ -164,6 +165,30 @@ can be changed with the \fB-d\fR option. The state tables are locked at the
beginning of this operation and unlocked once complete.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR\fR
+.ad
+.RS 6n
+Operate on the in-zone state information for the specified zone. If neither
+this option nor \fB-G\fR is specified, the current zone is used. This command
+is only available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for
+more information.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-G\fR \fIzonename\fR\fR
+.ad
+.RS 6n
+Operate on the global zone controlled state information for the specified
+zone. If neither this option nor \fB-z\fR is specified, the current zone is
+used. This command is only available in the Global Zone. See \fBZONES\fR in
+\fBipf\fR(1m) for more information.
+.RE
+
.SH FILES
.RS +4
.TP
@@ -214,7 +239,8 @@ Interface Stability Committed
.SH SEE ALSO
.sp
.LP
-\fBipf\fR(1M), \fBipmon\fR(1M), \fBipnat\fR(1M), \fBattributes\fR(5)
+\fBipf\fR(1M), \fBipmon\fR(1M), \fBipnat\fR(1M), \fBattributes\fR(5),
+\fBzones(5)\fR
.SH DIAGNOSTICS
.sp
.LP
diff --git a/usr/src/man/man1m/ipfstat.1m b/usr/src/man/man1m/ipfstat.1m
index 212de841a9..ba46c2c44b 100644
--- a/usr/src/man/man1m/ipfstat.1m
+++ b/usr/src/man/man1m/ipfstat.1m
@@ -2,7 +2,8 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2008, Sun Microsystems Inc. All Rights Reserved.
-.TH IPFSTAT 1M "Apr 3, 2008"
+.\" Portions Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
+.TH IPFSTAT 1M "Oct 30, 2013"
.SH NAME
ipfstat \- reports on packet filter statistics and filter list
.SH SYNOPSIS
@@ -14,7 +15,7 @@ ipfstat \- reports on packet filter statistics and filter list
.LP
.nf
\fBipfstat\fR [\fB-C\fR] [\fB-D\fR \fIaddrport\fR] [\fB-P\fR \fIprotocol\fR] [\fB-S\fR \fIaddrport\fR]
- [\fB-T\fR \fIrefreshtime\fR]
+ [\fB-T\fR \fIrefreshtime\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR]
.fi
.SH DESCRIPTION
@@ -351,6 +352,30 @@ shows the process table. States can be sorted in a number of different ways.
Turn verbose mode on. Displays additional debugging information.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR\fR
+.ad
+.RS 18n
+Report the in-zone statistics for the specified zone. If neither this option
+nor \fB-G\fR is specified, the current zone is used. This command is only
+available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for more
+information.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-G\fR \fIzonename\fR\fR
+.ad
+.RS 18n
+Report the global zone controlled statistics for the specified zone. If
+neither this option nor \fB-z\fR is specified, the current zone is used. This
+command is only available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m)
+for more information.
+.RE
+
.SH FILES
.RS +4
.TP
@@ -396,7 +421,7 @@ Interface Stability Committed
.sp
.LP
\fBipf\fR(1M), \fBkstat\fR(1M), \fBkstat\fR(3KSTAT), \fBattributes\fR(5),
-\fBipfilter\fR(5)
+\fBipfilter\fR(5), \fBzones(5)\fR
.sp
.LP
\fI\fR
diff --git a/usr/src/man/man1m/ipmon.1m b/usr/src/man/man1m/ipmon.1m
index 2094979915..07574790b9 100644
--- a/usr/src/man/man1m/ipmon.1m
+++ b/usr/src/man/man1m/ipmon.1m
@@ -2,14 +2,15 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2008, Sun Microsystems Inc. All Rights Reserved.
-.TH IPMON 1M "Apr 3, 2008"
+.\" Portions Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
+.TH IPMON 1M "Oct 30, 2013"
.SH NAME
ipmon \- monitors /dev/ipl for logged packets
.SH SYNOPSIS
.LP
.nf
\fBipmon\fR [\fB-abDFhnpstvxX\fR] [\fB-N\fR \fIdevice\fR] [ [o] [NSI]] [\fB-O\fR [NSI]]
- [\fB-P\fR \fIpidfile\fR] [\fB-S\fR \fIdevice\fR] [\fB-f\fR \fIdevice\fR] [\fIfilename\fR]
+ [\fB-P\fR \fIpidfile\fR] [\fB-S\fR \fIdevice\fR] [\fB-f\fR \fIdevice\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fIfilename\fR]
.fi
.SH DESCRIPTION
@@ -343,6 +344,32 @@ Show the packet data in hex.
Show the log header record data in hex.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR\fR
+.ad
+.sp .6
+.RS 4n
+Monitor packets the specified zone's in-zone filter. If neither this option
+nor \fB-G\fR is specified, the current zone is used. This command is only
+available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for more
+information.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-G\fR \fIzonename\fR\fR
+.ad
+.sp .6
+.RS 4n
+Monitor packets for the specified zone's global zone controlled filter. If
+neither this option nor \fB-z\fR is specified, the current zone is used. This
+command is only available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m)
+for more information.
+.RE
+
.SH FILES
.RS +4
.TP
@@ -382,7 +409,7 @@ Interface Stability Committed
.sp
.LP
\fBipf\fR(1M), \fBipfstat\fR(1M), \fBipnat\fR(1M), \fBattributes\fR(5),
-\fBipfilter\fR(5)
+\fBipfilter\fR(5), \fBzones(5)\fR
.sp
.LP
\fI\fR
diff --git a/usr/src/man/man1m/ipnat.1m b/usr/src/man/man1m/ipnat.1m
index 6e36a50e26..ed382fa7a9 100644
--- a/usr/src/man/man1m/ipnat.1m
+++ b/usr/src/man/man1m/ipnat.1m
@@ -2,13 +2,14 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2008, Sun Microsystems Inc. All Rights Reserved.
-.TH IPNAT 1M "Apr 3, 2008"
+.\" Portions Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
+.TH IPNAT 1M "Oct 30, 2013"
.SH NAME
ipnat \- user interface to the NAT subsystem
.SH SYNOPSIS
.LP
.nf
-\fBipnat\fR [\fB-CdFhlnRrsv\fR] \fB-f\fR \fIfilename\fR
+\fBipnat\fR [\fB-CdFhlnRrsv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] \fB-f\fR \fIfilename\fR
.fi
.SH DESCRIPTION
@@ -140,6 +141,30 @@ Turn verbose mode on. Displays information relating to rule processing and
active rules/table entries.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR\fR
+.ad
+.RS 15n
+Operate on the in-zone IP NAT for the specified zone. If neither this option
+nor \fB-G\fR is specified, the current zone is used. This command is only
+available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for more
+information.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-G\fR \fIzonename\fR\fR
+.ad
+.RS 15n
+Operate on the global zone controlled IP NAT for the specified zone. If
+neither this option nor \fB-z\fR is specified, the current zone is used. This
+command is only available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m)
+for more information.
+.RE
+
.SH FILES
.sp
.ne 2
@@ -200,4 +225,5 @@ Interface Stability Committed
.SH SEE ALSO
.sp
.LP
-\fBipf\fR(1M), \fBipfstat\fR(1M), \fBipnat\fR(4), \fBattributes\fR(5)
+\fBipf\fR(1M), \fBipfstat\fR(1M), \fBipnat\fR(4), \fBattributes\fR(5),
+\fBzones(5)\fR
diff --git a/usr/src/man/man1m/ippool.1m b/usr/src/man/man1m/ippool.1m
index 38e7cc19e6..c7cae2029e 100644
--- a/usr/src/man/man1m/ippool.1m
+++ b/usr/src/man/man1m/ippool.1m
@@ -2,56 +2,57 @@
.\" To view license terms, attribution, and copyright for IP Filter, the default path is /usr/lib/ipf/IPFILTER.LICENCE. If the Solaris operating environment has been installed anywhere other than the default, modify the given path to access the file at the installed
.\" location.
.\" Portions Copyright (c) 2008, Sun Microsystems Inc. All Rights Reserved.
-.TH IPPOOL 1M "Apr 3, 2008"
+.\" Portions Copyright (c) 2012, Joyent, Inc. All Rights Reserved.
+.TH IPPOOL 1M "Nov 26, 2012"
.SH NAME
ippool \- user interface to the IP Filter pools
.SH SYNOPSIS
.LP
.nf
-\fBippool\fR \fB-a\fR [\fB-dnv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] \fB-i\fR \fIipaddr\fR
+\fBippool\fR \fB-a\fR [\fB-dnv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] \fB-i\fR \fIipaddr\fR
[/\fInetmask\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-A\fR [\fB-dnv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] [\fB-S\fR \fIseed\fR]
+\fBippool\fR \fB-A\fR [\fB-dnv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] [\fB-S\fR \fIseed\fR]
[\fB-t\fR \fItype\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-f\fR \fIfile\fR [\fB-dnuv\fR]
+\fBippool\fR \fB-f\fR \fIfile\fR [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-dnuv\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-F\fR [\fB-dv\fR] [\fB-o\fR \fIrole\fR] [\fB-t\fR \fItype\fR]
+\fBippool\fR \fB-F\fR [\fB-dv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-o\fR \fIrole\fR] [\fB-t\fR \fItype\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-h\fR [\fB-dv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-t\fR \fItype\fR]
+\fBippool\fR \fB-h\fR [\fB-dv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-t\fR \fItype\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-l\fR [\fB-dv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-t\fR \fItype\fR]
+\fBippool\fR \fB-l\fR [\fB-dv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-t\fR \fItype\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-r\fR [\fB-dnv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] \fB-i\fR \fIipaddr\fR
+\fBippool\fR \fB-r\fR [\fB-dnv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] \fB-i\fR \fIipaddr\fR
[/\fInetmask\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-R\fR [\fB-dnv\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] [\fB-t\fR \fItype\fR]
+\fBippool\fR \fB-R\fR [\fB-dnv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-m\fR \fIpoolname\fR] [\fB-o\fR \fIrole\fR] [\fB-t\fR \fItype\fR]
.fi
.LP
.nf
-\fBippool\fR \fB-s\fR [\fB-dtv\fR] [\fB-M\fR \fIcore\fR] [\fB-N\fR \fInamelist\fR]
+\fBippool\fR \fB-s\fR [\fB-dtv\fR] [\fB-G\fR | \fB-z\fR \fIzonename\fR] [\fB-M\fR \fIcore\fR] [\fB-N\fR \fInamelist\fR]
.fi
.SH DESCRIPTION
@@ -111,6 +112,30 @@ would alter the currently running kernel.
Turn verbose mode on.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR\fR
+.ad
+.RS 6n
+Manage the specified zone's in-zone IP pools. If neither this option nor
+\fB-G\fR is specified, the current zone is used. This command is only
+available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for more
+information.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-G\fR \fIzonename\fR\fR
+.ad
+.RS 6n
+Manage the specified zone's global zone controlled IP pools. If neither this
+option nor \fB-z\fR is specified, the current zone is used. This command is
+only available in the Global Zone. See \fBZONES\fR in \fBipf\fR(1m) for more
+information.
+.RE
+
.SS "Instance-Specific Options"
.sp
.LP
@@ -328,4 +353,5 @@ Interface Stability Committed
.SH SEE ALSO
.sp
.LP
-\fBipf\fR(1M), \fBipfstat\fR(1M), \fBippool\fR(4), \fBattributes\fR(5)
+\fBipf\fR(1M), \fBipfstat\fR(1M), \fBippool\fR(4), \fBattributes\fR(5),
+\fBzones(5)\fR
diff --git a/usr/src/man/man1m/mount_tmpfs.1m b/usr/src/man/man1m/mount_tmpfs.1m
index a4f38d3b1f..3520a3f31b 100644
--- a/usr/src/man/man1m/mount_tmpfs.1m
+++ b/usr/src/man/man1m/mount_tmpfs.1m
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2011, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH MOUNT_TMPFS 1M "Nov 24, 2003"
+.TH MOUNT_TMPFS 1M "Dec 2, 2011"
.SH NAME
mount_tmpfs \- mount tmpfs file systems
.SH SYNOPSIS
@@ -45,6 +46,17 @@ available:
.sp
.ne 2
.na
+\fB\fBremount\fR\fR
+.ad
+.sp .6
+.RS 19n
+Remounts a file system with a new size. A size not explicitly
+set with \fBremount\fR reverts to no limit.
+.RE
+
+.sp
+.ne 2
+.na
\fBsize=\fIsz\fR\fR
.ad
.RS 19n
diff --git a/usr/src/man/man1m/prstat.1m b/usr/src/man/man1m/prstat.1m
index a5f02621cf..08e2ce9907 100644
--- a/usr/src/man/man1m/prstat.1m
+++ b/usr/src/man/man1m/prstat.1m
@@ -1,6 +1,7 @@
'\" te
.\" Copyright (c) 2013 Gary Mills
.\" Copyright (c) 2006, 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -10,10 +11,10 @@ prstat \- report active process statistics
.SH SYNOPSIS
.LP
.nf
-\fBprstat\fR [\fB-acHJLmRrtTvWZ\fR] [\fB-d\fR u | d] [\fB-C\fR \fIpsrsetlist\fR] [\fB-h\fR \fIlgrplist\fR]
+\fBprstat\fR [\fB-acHJLmRrtTvVWZ\fR] [\fB-d\fR u | d] [\fB-C\fR \fIpsrsetlist\fR] [\fB-h\fR \fIlgrplist\fR]
[\fB-j\fR \fIprojlist\fR] [\fB-k\fR \fItasklist\fR] [\fB-n\fR \fIntop\fR[,\fInbottom\fR]]
[\fB-p\fR \fIpidlist\fR] [\fB-P\fR \fIcpulist\fR] [\fB-s\fR \fIkey\fR | \fB-S\fR \fIkey\fR ]
- [\fB-u\fR \fIeuidlist\fR] [\fB-U\fR \fIuidlist\fR] [\fB-z\fR \fIzoneidlist\fR]
+ [\fB-u\fR \fIeuidlist\fR] [\fB-U\fR \fIuidlist\fR] [\fB-z\fR \fIzoneidlist\fR] [\fB-Z\fR]
[\fIinterval\fR [\fIcount\fR]]
.fi
@@ -366,6 +367,18 @@ with the \fB-\fR sign.
.sp
.ne 2
.na
+\fB\fB-V\fR\fR
+.ad
+.sp .6
+.RS 4n
+Report accurate aggregated SWAP and RSS values when used with the \fB-J\fR,
+\fB-t\fR, \fB-T\fR or \fB-Z\fR options. This uses an accurate, but more expensive,
+calculation to determine the aggregated values for the specified grouping.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-W\fR\fR
.ad
.sp .6
@@ -447,9 +460,11 @@ devices, in kilobytes (\fBK\fR), megabytes (\fBM\fR), or gigabytes (\fBG\fR).
.RS 4n
The resident set size of the process (\fBRSS\fR), in kilobytes (\fBK\fR),
megabytes (\fBM\fR), or gigabytes (\fBG\fR). The RSS value is an estimate
-provided by \fBproc\fR(4) that might underestimate the actual resident set
-size. Users who want to get more accurate usage information for capacity
-planning should use the \fB-x\fR option to \fBpmap\fR(1) instead.
+provided by \fBproc\fR(4) that might underestimate the actual
+per-process resident set size and usually overestimates the aggregated
+resident set size. Users who want to get more accurate usage information for
+capacity planning should use either the \fB-V\fR option or, for per-process
+results, the \fB-x\fR option to \fBpmap\fR(1) instead.
.RE
.sp
diff --git a/usr/src/man/man1m/prtconf.1m b/usr/src/man/man1m/prtconf.1m
index d8ad9e72ba..ff62a01992 100644
--- a/usr/src/man/man1m/prtconf.1m
+++ b/usr/src/man/man1m/prtconf.1m
@@ -10,7 +10,7 @@ prtconf \- print system configuration
.SH SYNOPSIS
.LP
.nf
-\fB/usr/sbin/prtconf\fR [\fB-V\fR] | [\fB-F\fR] | [\fB-x\fR] | [\fB-bpv\fR] | [\fB-acdDPv\fR]
+\fB/usr/sbin/prtconf\fR [\fB-V\fR] | [\fB-F\fR] | [\fB-m\fr] | [\fB-x\fR] | [\fB-bpv\fR] | [\fB-acdDPv\fR]
[\fIdev_path\fR]
.fi
@@ -100,6 +100,16 @@ console frame buffer on a SUNW,Ultra-30 is \fBffb\fR, the command returns:
.sp
.ne 2
.na
+\fB\fB-m\fR\fR
+.ad
+.RS 6n
+Displays the amount system memory in megabytes.
+This flag must be used by itself.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR\fR
.ad
.RS 6n
diff --git a/usr/src/man/man1m/reboot.1m b/usr/src/man/man1m/reboot.1m
index 1ff92b6f33..8eadedf18d 100644
--- a/usr/src/man/man1m/reboot.1m
+++ b/usr/src/man/man1m/reboot.1m
@@ -144,8 +144,7 @@ This option is currently available only on x86 systems. The \fB-p\fR and
.ad
.sp .6
.RS 4n
-Quick. Reboot quickly and ungracefully, without shutting down running processes
-first.
+Quick. Reboot quickly without halting running zones first.
.RE
.SH OPERANDS
diff --git a/usr/src/man/man1m/snoop.1m b/usr/src/man/man1m/snoop.1m
index ca969e22b4..4184c66e10 100644
--- a/usr/src/man/man1m/snoop.1m
+++ b/usr/src/man/man1m/snoop.1m
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (C) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH SNOOP 1M "Feb 18, 2009"
+.TH SNOOP 1M "Feb 24, 2014"
.SH NAME
snoop \- capture and inspect network packets
.SH SYNOPSIS
@@ -12,7 +13,7 @@ snoop \- capture and inspect network packets
\fBsnoop\fR [\fB-aqrCDINPSvV\fR] [\fB-t\fR [r | a | d]] [\fB-c\fR \fImaxcount\fR]
[\fB-d\fR \fIdevice\fR] [\fB-i\fR \fIfilename\fR] [\fB-n\fR \fIfilename\fR] [\fB-o\fR \fIfilename\fR]
[\fB-p\fR \fIfirst\fR [, \fIlast\fR]] [\fB-s\fR \fIsnaplen\fR] [\fB-x\fR \fIoffset\fR [, \fIlength\fR]]
- [\fIexpression\fR]
+ [\fB-z\fR \fIzonename\fR] [\fIexpression\fR]
.fi
.SH DESCRIPTION
@@ -298,6 +299,22 @@ the whole packet, use an \fIoffset\fR of 0. If a \fIlength\fR value is not
provided, the rest of the packet is displayed.
.RE
+.sp
+.ne 2
+.na
+.BI -z zonename
+.ad
+.sp .6
+.RS 4n
+Open an earlier datalink specified via
+.B -d
+or
+.B -I
+in the specified zone \fIzonename\fR.
+This option is only meaningful in the global zone and
+allows the global zone to inspect datalinks of non-global zones.
+.RE
+
.SH OPERANDS
.sp
.ne 2
diff --git a/usr/src/man/man1m/svc.startd.1m b/usr/src/man/man1m/svc.startd.1m
index 7c80c35e23..103c6b5fec 100644
--- a/usr/src/man/man1m/svc.startd.1m
+++ b/usr/src/man/man1m/svc.startd.1m
@@ -4,7 +4,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH SVC.STARTD 1M "Mar 18, 2011"
+.TH SVC.STARTD 1M "Aug 23, 2012"
.SH NAME
svc.startd \- Service Management Facility master restarter
.SH SYNOPSIS
@@ -372,10 +372,13 @@ properties listed below in the \fBstartd\fR property group.
.RS 4n
The \fBcritical_failure_count\fR and \fBcritical_failure_period\fR properties
together specify the maximum number of service failures allowed in a given
-time interval before \fBsvc.startd\fR transitions the service to maintenance.
+number of seconds before \fBsvc.startd\fR transitions the service to
+maintenance.
If the number of failures exceeds \fBcritical_failure_count\fR in any period of
\fBcritical_failure_period\fR seconds, \fBsvc.startd\fR will transition the
-service to maintenance.
+service to maintenance. The \fBcritical_failure_count\fR value is limited
+to the range 1-10 and defaults to 10. The \fBcritical_failure_period\fR
+defaults to 600 seconds.
.RE
.sp
diff --git a/usr/src/man/man1m/svccfg.1m b/usr/src/man/man1m/svccfg.1m
index 972967267b..c2e2cf28cd 100644
--- a/usr/src/man/man1m/svccfg.1m
+++ b/usr/src/man/man1m/svccfg.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2012, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -261,9 +262,11 @@ Without the \fB-a\fR option, property groups containing protected information
\fB-a\fR option is specified, all values will be archived. An error results if
there are insufficient privileges to read these values.
.sp
-Note that \fBexport\fR requires a service FMRI. If you specify an instance
-(including an abbreviation, such as \fBapache2\fR or \fBsendmail\fR, that
-specifies an instance), the command fails.
+Note that \fBexport\fR requires a service FMRI. To ease the use of arguments
+cut and pasted from other command output, if you specify a complete
+instance FMRI, the entire corresponding service including all instances
+is exported and a warning is issued. If you specify an abbreviation, such as
+\fBapache2\fR or \fBsendmail\fR, that specifies an instance, the command fails.
.RE
.sp
diff --git a/usr/src/man/man1m/tunefs.1m b/usr/src/man/man1m/tunefs.1m
index 7f522f43fa..0b849f2dd7 100644
--- a/usr/src/man/man1m/tunefs.1m
+++ b/usr/src/man/man1m/tunefs.1m
@@ -3,7 +3,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH TUNEFS 1M "Dec 5, 2003"
+.TH TUNEFS 1M "Sep 19, 2013"
.SH NAME
tunefs \- tune an existing UFS file system
.SH SYNOPSIS
@@ -120,3 +120,9 @@ encountering files greater than or equal to 2 Gbyte ( 2^31 bytes).
.sp
.LP
\fBmkfs_ufs\fR(1M), \fBnewfs\fR(1M), \fBattributes\fR(5), \fBlargefile\fR(5)
+
+.\" Take this out and a Unix Demon will dog your steps from now until
+.\" the time_t's wrap around.
+.SH BUGS
+.sp
+You can tune a file system, but you can't tune a fish.
diff --git a/usr/src/man/man1m/vfsstat.1m b/usr/src/man/man1m/vfsstat.1m
new file mode 100644
index 0000000000..aef8431a09
--- /dev/null
+++ b/usr/src/man/man1m/vfsstat.1m
@@ -0,0 +1,213 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2014 Joyent, Inc. All rights reserved.
+.\"
+.TH "VFSSTAT" "1m" "May 1, 2014" "" ""
+.
+.SH "NAME"
+\fBvfsstat\fR \-\- Report VFS read and write activity
+.
+.SH "SYNOPSIS"
+.
+.nf
+vfsstat [\-hIMrzZ] [interval [count]]
+.
+.fi
+.
+.SH "DESCRIPTION"
+The vfsstat utility reports a summary of VFS read and write
+activity per zone\. It first prints all activity since boot, then
+reports activity over a specified interval\.
+.
+.P
+When run from a non\-global zone (NGZ), only activity from that NGZ
+can be observed\. When run from a the global zone (GZ), activity
+from the GZ and all other NGZs can be observed\.
+.
+.P
+This tool is convenient for examining I/O performance as
+experienced by a particular zone or application\. Other tools
+which examine solely disk I/O do not report reads and writes which
+may use the filesystem\'s cache\. Since all read and write system
+calls pass through the VFS layer, even those which are satisfied
+by the filesystem cache, this tool is a useful starting point when
+looking at a potential I/O performance problem\. The vfsstat
+command reports the most accurate reading of I/O performance as
+experienced by an application or zone\.
+.
+.P
+One additional feature is that ZFS zvol performance is also reported
+by this tool, even though zvol I/O does not go through the VFS
+layer\. This is done so that this single tool can be used to monitor
+I/O performance and because its not unreasonable to think of zvols
+as being included along with other ZFS filesystems\.
+.
+.P
+The calculations and output fields emulate those from iostat(1m)
+as closely as possible\. When only one zone is actively performing
+disk I/O, the results from iostat(1m) in the global zone and
+vfsstat in the local zone should be almost identical\. Note that
+many VFS read operations may be handled by the filesystem cache,
+so vfsstat and iostat(1m) will be similar only when most
+operations require a disk access\.
+.
+.P
+As with iostat(1m), a result of 100% for VFS read and write
+utilization does not mean that the VFS layer is fully saturated\.
+Instead, that measurement just shows that at least one operation
+was pending over the last interval of time examined\. Since the
+VFS layer can process more than one operation concurrently, this
+measurement will frequently be 100% but the VFS layer can still
+accept additional requests\.
+.
+.SH "OUTPUT"
+The vfsstat utility reports the following information:
+.
+.IP "" 4
+.
+.nf
+r/s
+.RS
+reads per second
+.RE
+
+w/s
+.RS
+writes per second
+.RE
+
+kr/s
+.RS
+kilobytes read per second
+.RE
+
+kw/s
+.RS
+kilobytes written per second
+.RE
+
+ractv
+.RS
+average number of read operations actively being serviced by the VFS layer
+.RE
+
+wactv
+.RS
+average number of write operations actively being serviced by the VFS layer
+.RE
+
+read_t
+.RS
+average VFS read latency, in microseconds
+.RE
+
+writ_t
+.RS
+average VFS write latency, in microseconds
+.RE
+
+%r
+.RS
+percent of time there is a VFS read operation pending
+.RE
+
+%w
+.RS
+percent of time there is a VFS write operation pending
+.RE
+
+d/s
+.RS
+VFS operations per second delayed by the ZFS I/O throttle
+.RE
+
+del_t
+.RS
+average ZFS I/O throttle delay, in microseconds
+.RE
+.
+.fi
+.
+.IP "" 0
+.
+.SH "OPTIONS"
+The following options are supported:
+.
+.P
+\-h
+.RS
+Show help message and exit
+.RE
+.
+.P
+\-I
+.RS
+Print results per interval, rather than per second (where applicable)
+.RE
+.
+.P
+\-M
+.RS
+Print results in MB/s instead of KB/s
+.RE
+.
+.P
+\-r
+.RS
+Show results in a comma\-separated format
+.RE
+.
+.P
+\-z
+.RS
+Hide zones with no VFS activity
+.RE
+.
+.P
+\-Z
+.RS
+Print results for all zones, not just the current zone
+.RE
+.
+.SH "OPERANDS"
+interval
+.
+.P
+Specifies the length in seconds to pause between each interval
+report\. If not specified, vfsstat will print a summary since boot
+and exit\.
+.
+.P
+count
+.
+.P
+Specifies the number of intervals to report\. Defaults to
+unlimited if not specified\.
+.
+.SH "SEE ALSO"
+.
+.nf
+iostat(1m), ziostat(1m), mpstat(1m)
+.
+.fi
+.
+.SH "NOTES"
+This command does not examine readdir or any other VFS operations,
+only read and write operations\.
+.
+.P
+This command does not look at network I/O, only I/O operations to
+or from a file\.
+.
+.P
+The output format from vfsstat may change over time; use the
+comma\-separated output for a stable output format\.
diff --git a/usr/src/man/man1m/vndadm.1m b/usr/src/man/man1m/vndadm.1m
new file mode 100644
index 0000000000..253518a88a
--- /dev/null
+++ b/usr/src/man/man1m/vndadm.1m
@@ -0,0 +1,651 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VNDADM 1M "Mar 06, 2014"
+.SH NAME
+vndadm \- administer vnd devices
+
+.SH SYNOPSIS
+
+.nf
+vndadm create [-z zonename] [-l datalink] device
+vndadm destroy [-z zonename] device...
+vndadm list [-p] [-d delim] [-o field,...] [-z zonename] [device]...
+vndadm get [-p] [-d delim] [-z zonename] device [prop]...
+vndadm set [-z zonename] device prop=val...
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The vndadm command is used to administer vnd devices. A vnd device is
+similar to an IP network interface, except that the vnd device operates
+at layer two. A vnd device is created over a data link (see dladm(1M))
+and its address is that of the underlying data link. For ethernet based
+devices, that address would be the MAC address of the data link. vnd
+devices are character devices which may be used to send and receive
+layer two packets. When reading or writing to a vnd device, the full
+frame must be present. This is useful for working with virtual machines,
+or other environments where you need to manipulate the entire layer two
+frame.
+
+.sp
+.LP
+Every command takes a device as an argument. To specify a vnd device,
+you just use the name of the device. Devices are scoped to zones. If no
+zone is specified, the current zone is assumed. A device name can be any
+series of alphanumeric ascii characters which typically match the name
+of the underlying data link. A given vnd device name must be unique in a
+given zone, but the same name can be used across zones.
+.sp
+.SH OPTIONS
+.sp
+.LP
+All vndadm subcommands have the following common option:
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+Operate in the context of the specified zone. When creating a vnd
+device, the named device is created in the specified zone. All other
+operations scope the device lookup to the specified zone. If the user is
+not in the global zone, the use of -z will not work.
+
+.sp
+.LP
+When -z is used and multiple devices are specified, then
+the use of -z applies to all of the devices.
+.RE
+
+.SH SUBCOMMANDS
+.sp
+.ne 2
+.na
+vndadm create [-z zonename] [-l datalink] device
+.ad
+.sp
+.RS 4n
+Creates a vnd device with the specified name device. If -l datalink is
+not specified, it is assumed that the data link and the device share the
+same name. The created device will exist for as long as the zone exists
+or until a call to vndadm destroy. vnd devices do not persist across
+system reboots. Note, if an IP interface or another libdlpi(3LIB)
+consumer is already using the data link, then vnd will fail.
+
+.sp
+The maximum length of the name of device is 31 characters. The allowed
+set of characters is alphanumberic characters, ':', \'-', and \'_'. The
+names 'zone' and 'ctl' are reserved and may not be used.
+
+.sp
+.ne 2
+.na
+-l datalink
+.ad
+.sp .6
+.RS 4n
+Specifies the name of the data link to create the device over. This
+allows the vnd device name to be different from the data link's name.
+.RE
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.na
+vndadm destroy [-z zonename] device...
+.ad
+.sp
+.RS 4n
+Destroys the specified device. The destruction is analogous to
+unlink(2). If the device is still open and used by applications, the
+device will continue to exist, but it will no longer be accessible by
+the name device.
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+vndadm list [-p] [-d delim] [-o field,...] [-z zonename] [device]...
+.ad
+.sp
+.RS 4n
+Lists active vnd devices. By default, vnadm list lists all devices in
+every zone that the caller is allowed to see; the current zone if in the
+non-global zone, and all zones in the global zone. If device is
+specified one or more times, then output will be limited to the
+specified devices.
+.sp
+.ne 2
+.na
+-o field[,...]
+.ad
+.sp .6
+.RS 4n
+A case-insensitive, comma-separated list of output fields. When -o is
+not used, all of the fields listed below are shown. The field name must
+be one of the following fields:
+
+.sp
+.ne 2
+.na
+NAME
+.ad
+.sp .6
+.RS 4n
+The name of the vnd device.
+.RE
+
+.sp
+.ne 2
+.na
+DATALINK
+.ad
+.sp .6
+.RS 4n
+The name of the data link the vnd device was created over.
+.RE
+
+.sp
+.ne 2
+.na
+ZONENAME
+.ad
+.sp .6
+.RS 4n
+The name of the zone that the vnd device exists in.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+-p
+.ad
+.sp .6
+.RS 4n
+Display the output in a stable machine parseable format. The -o option
+is required with the -p option. See "Parseable Output Format" below.
+.RE
+
+.sp
+.ne 2
+.na
+-d delim
+.ad
+.sp .6
+.RS 4n
+Change the delimiter used in conjunction with generating parseable
+output. This option may only be specified when -p is also specified.
+.RE
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+
+.RE
+
+
+.sp
+.ne 2
+.na
+vndadm get [-p] [-d delim] [-z zonename] device [prop]...
+.ad
+.sp
+.RS 4n
+Displays the properties for the specified device. By default, all
+properties of a given device are displayed. If prop is specified one or
+more times, then only the specified properties will be displayed for
+device. For a list of properties, see the section "Properties" below.
+The property output consists of the following four columns:
+.sp
+.ne 2
+.na
+LINK
+.ad
+.sp .6
+.RS 4n
+The name of the device
+.RE
+
+.sp
+.ne 2
+.na
+PROPERTY
+.ad
+.sp .6
+.RS 4n
+The name of the property. Note that some properties that are private to
+the implementation may be displayed. Those properties begin with a
+leading underscore.
+.RE
+
+.sp
+.ne 2
+.na
+PERM
+.ad
+.sp .6
+.RS 4n
+Describes whether the property is read-only or
+if it is read-write. This field does not
+indicate if the current user has permission, but
+lists permissions for a privileged user.
+.RE
+
+.sp
+.ne 2
+.na
+VALUE
+.ad
+.sp .6
+.RS 4n
+The value of the property.
+.RE
+
+.sp
+.ne 2
+.na
+-p
+.ad
+.sp .6
+.RS 4n
+Display the output in a stable machine parseable format. See "Parseable
+Output Format" below.
+.RE
+
+.sp
+.ne 2
+.na
+-d delim
+.ad
+.sp .6
+.RS 4n
+Change the delimiter used in conjunction with generating parseable
+output. This option may only be specified when -p is also specified.
+.RE
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+vndadm set [-z zonename] device prop=val...
+.ad
+.sp
+.RS 4n
+Sets properties on the named device. Setting a property takes effect for
+all operations on the device, after the program returns. Multiple
+properties can be set at once; however, properties are applied one at a
+time to the device. Property names and values must be separated with an
+equals sign. Additional property and value pairs should be separated by
+white space. For a list of properties, see the section "Properties"
+below.
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.SS Parseable Output Format
+.sp
+.LP
+The default output for parseable data is to be separated with a single
+ascii space character. The delimiter may be changed with the -d
+option. When parseable output is requested, no numbers that represent
+sizes will be displayed in human readable form, they will be fully
+expanded. eg. the number 42K will instead be 43008.
+
+.SS Properties
+.sp
+.LP
+The following are supported and stable properties. Note that any
+properties that starts with a leading underscore are not a stable
+property and may be removed at any time.
+
+.sp
+.ne 2
+.na
+rxbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the receive buffer for
+the device. All received data enters the receive buffer until a consumer
+consumes it. If adding a received frame would exceed the size of the
+receive buffer, then that frame will be dropped. The maximum size of the
+buffer is limited by the 'maxsize' property. The minimum size of the
+buffer is the value of the 'maxtu' property. The property's value may be
+anything between that maximum and minimum. When setting this property,
+standard size suffixes such as 'K' and 'M' may be used.
+.RE
+
+.sp
+.ne 2
+.na
+txbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the transmit buffer. All
+in-flight transmitted data must be able to fit into the transmit buffer
+to account for potential flow control events. If there is not enough
+space in the transmit buffer, transmit related I/O operations will
+either block or fail based on whether the file has been put into
+non-blocking mode by setting O_NONBLOCK or O_NDELAY with fcntl(2). The
+maximum size of the buffer is limited by the 'maxsize' property. The
+minimum size of the buffer is the value of the 'maxtu' property. The
+property's value may be anything between that maximum and minimum. When
+setting this property, standard size suffixes such as 'K' and 'M' may be
+used.
+
+.RE
+
+.sp
+.ne 2
+.na
+maxsize
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the maximum size of buffers in the
+system. Properties such as rxbuf and txbuf cannot be set beyond this.
+.RE
+
+.sp
+.ne 2
+.na
+mintu
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the minimum size of a frame
+transmitted to the underlying data link. Note that the minimum listed
+here may be less than the size of a valid layer two frame and therefore
+may be dropped. A frame smaller than this value will be rejected by vnd.
+.RE
+
+.sp
+.ne 2
+.na
+maxtu
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the maximum size of a frame
+transmitted to the underlying data link. A frame larger than this value
+will be rejected by vnd.
+.RE
+
+.SH EXAMPLES
+.LP
+Example 1 Creating a vnd device
+.sp
+.LP
+To create a vnd device over the VNIC named net0, enter the following
+command:
+
+.sp
+.in +2
+.nf
+# vndadm create net0
+.fi
+.in -2
+.sp
+
+.LP
+Example 2 Creating a vnd device in another zone
+.sp
+.LP
+
+To create a vnd device over the VNIC named net1 in the zone
+1b7155a4-aef9-e7f0-d33c-9705e4b8b525, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm create -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525 net1
+.fi
+.in -2
+.sp
+
+.LP
+Example 3 Destroying a vnd device
+.sp
+.LP
+
+To destroy the vnd device named net0, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm destroy net0
+.fi
+.in -2
+.sp
+
+.LP
+Example 4 Destroying a vnd device in another zone
+.sp
+.LP
+
+To destroy the vnd device named net1 in the zone
+1b7155a4-aef9-e7f0-d33c-9705e4b8b525, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm destroy -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525 net1
+.fi
+.in -2
+.sp
+
+.LP
+Example 5 List all vnd devices
+.sp
+.LP
+
+To list all devices, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list
+NAME DATALINK ZONENAME
+net0 net0 global
+net0 net0 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 6 Listing devices in a specific zone
+.sp
+.LP
+
+To list devices in a specific zone, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+
+NAME DATALINK ZONENAME
+net0 net0 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 7 List all devices in a parseable format
+.sp
+.LP
+
+To list all devices in a parseable format with the delimiter of ':', run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list -p -d: -o name,datalink,zone
+net0:net0:global
+net0:net0:1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 8 Retrieving all properties for a device
+.sp
+.LP
+
+To retrieve all of the properties for the vnd device foo0, run the
+following command:
+
+.sp
+.in +2
+.nf
+# vndadm get foo0
+LINK PROPERTY PERM VALUE
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+foo0 maxsize r- 4194304
+foo0 mintu r- 0
+foo0 maxtu r- 1518
+foo0 _nflush rw 10
+foo0 _burstsz rw 10
+.fi
+.in -2
+.sp
+
+.LP
+Example 9 Retrieving specific properties for a device
+.sp
+.LP
+
+To retrieve just the rxbuf and txbuf properties for the vnd device foo0,
+run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm get foo0 rxbuf txbuf
+LINK PROPERTY PERM VALUE
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+.fi
+.in -2
+.sp
+
+.LP
+Example 10 Retrieving properties for a device in a parseable format
+.sp
+.LP
+
+To retrieve all properties for the vnd device foo0 in a parseable
+format, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm get -p foo0
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+foo0 maxsize r- 4194304
+foo0 mintu r- 0
+foo0 maxtu r- 1518
+foo0 _nflush rw 10
+foo0 _burstsz rw 10
+.fi
+.in -2
+.sp
+
+.LP
+Example 11 Setting a property on a device
+.sp
+.LP
+
+To set the receive buffer size to one megabyte on the device foo0, run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm set foo0 rxbuf=1M
+.fi
+.in -2
+.sp
+
+.LP
+Example 12 Setting multiple properties on a device
+.sp
+.LP
+
+To set the transmit buffer to 300 Kb and the receive buffer to 1 Mb, run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm set foo0 rxbuf=300K txbuf=1M
+.fi
+.in -2
+.sp
+
+.SH SEE ALSO
+
+dladm(1M), ipadm(1M), fcntl(2), fcntl.h(3HEAD), libvnd(3LIB),
+vndstat(1M), vnd(7D)
diff --git a/usr/src/man/man1m/vndstat.1m b/usr/src/man/man1m/vndstat.1m
new file mode 100644
index 0000000000..a7f843e228
--- /dev/null
+++ b/usr/src/man/man1m/vndstat.1m
@@ -0,0 +1,163 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VNDSTAT 1M "Mar 06, 2014"
+.SH NAME
+vndstat \- report vnd activity
+
+.SH SYNOPSIS
+
+vndstat [interval [count]]
+
+.SH DESCRIPTION
+.sp
+.LP
+The vndstat command reports a summary of per-device vnd
+activity. Once per interval it prints a table of statistics per
+device. In the global zone, vndstat reports on all devices in the
+system. From the non-global zone, it only reports on devices that are
+present in that zone. vndstat reports on all vnd devices
+that exist, including anonymous devices which are not linked into the
+file system.
+.sp
+.LP
+The vndstat command's output includes the following information:
+.sp
+.ne 2
+.na
+.B name
+.ad
+.RS 14n
+The name of the device, if bound. If a given vnd device is not
+bound into the file system, hence considered anonymous, then there will
+be no name for the device.
+.RE
+
+.sp
+.ne 2
+.na
+.B rx B/s
+.ad
+.RS 14n
+The number of bytes received by the device during interval.
+.RE
+
+.sp
+.ne 2
+.na
+.B tx B/s
+.ad
+.RS 14n
+The number of bytes transmitted by the device during interval.
+.RE
+
+.sp
+.ne 2
+.na
+.B drops
+.ad
+.RS 14n
+The number of packets and messages which have been dropped. This
+includes all drops due to insufficient buffer space, IP hooks, and
+unknown or malformed DLPI messages.
+.RE
+
+.sp
+.ne 2
+.na
+.B txfc
+.ad
+.RS 14n
+The number of flow control events that have occurred. A flow control
+event occurs when the layers below vnd request that all transmits
+be paused until a future call resumes the flow. This statistic is
+incremented when the flow is resumed. It is not incremented when it is
+first paused.
+.RE
+
+.sp
+.ne 2
+.na
+.B zone
+.ad
+.RS 14n
+The name of the zone the device is located in.
+.RE
+
+.SH OPTIONS
+
+.sp
+.ne 2
+.na
+interval
+.ad
+.RS 13n
+Report once each interval seconds. interval may not be
+fractional.
+.RE
+
+.sp
+.ne 2
+.na
+count
+.ad
+.RS 13n
+Only print count reports, then exit.
+.RE
+.sp
+.LP
+When no arguments are given to vndstat, it will always print at an
+interval of one second. Reports will continue until vndstat
+is terminated.
+
+.SH EXAMPLES
+.LP
+Example 1 Print five seconds of data
+
+.sp
+.in +2
+.nf
+example% vndstat 1 5
+ name | rx B/s | tx B/s | drops txfc | zone
+ net0 | 1.45MB/s | 14.1KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.50MB/s | 19.5KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 2.83MB/s | 30.8KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.08MB/s | 30.6KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.21MB/s | 30.6KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Interface Stability See below.
+.TE
+
+.sp
+.LP
+Invocation is evolving. Human readable output is unstable.
+.SH SEE ALSO
+
+dlstat(1M), nicstat(1M), vndadm(1M), vnd(7M)
diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m
index 13b360d9ea..4e486bfb2b 100644
--- a/usr/src/man/man1m/zfs.1m
+++ b/usr/src/man/man1m/zfs.1m
@@ -112,7 +112,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...]
+\fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hpc\fR][\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...]
[\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fBall\fR | \fIproperty\fR[,\fIproperty\fR]...
\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...
.fi
@@ -2380,6 +2380,16 @@ A comma-separated list of types to display, where \fItype\fR is one of
For example, specifying \fB-t snapshot\fR displays only snapshots.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-p\fR\fR
+.ad
+.sp .6
+.RS 4n
+Display numbers in parseable (exact) values.
+.RE
+
.RE
.sp
@@ -2403,7 +2413,7 @@ Properties" section.
.sp
.ne 2
.na
-\fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR
+\fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hpc\fR] [\fB-o\fR
\fIfield\fR[,\fIfield\fR]... [\fB-t\fR \fItype\fR[,\fItype\fR]...] [\fB-s\fR \fIsource\fR[,\fIsource\fR]... \fBall\fR |
\fIproperty\fR[,\fIproperty\fR]... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...\fR
.ad
@@ -2498,6 +2508,19 @@ is all sources.
Display numbers in parsable (exact) values.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-c\fR\fR
+.ad
+.sp .6
+.RS 4n
+Only display properties which can be retrieved without issuing any I/O requests,
+i.e. properties which are already cached. Most properties are cached except for
+create-time properties (normalization, utf8only, casesensitivity) as well as a
+volume's size and block size.
+.RE
+
.RE
.sp
@@ -2925,7 +2948,7 @@ See \fBzpool-features\fR(5) for details on ZFS feature flags and the
.sp
.ne 2
.na
-\fBzfs send\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
+\fBzfs send\fR [\fB-DnPpRrveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
.ad
.sp .6
.RS 4n
@@ -3001,6 +3024,18 @@ will be much better if the filesystem uses a dedup-capable checksum (eg.
.sp
.ne 2
.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively send all descendant snapshots. This is similar to the \fB-R\fR
+flag, but information about deleted and renamed datasets is not included, and
+property information is only included if the \fB-p\fR flag is specified.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-L\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man1m/zoneadm.1m b/usr/src/man/man1m/zoneadm.1m
index 21a87e2924..0a60d5f345 100644
--- a/usr/src/man/man1m/zoneadm.1m
+++ b/usr/src/man/man1m/zoneadm.1m
@@ -1,6 +1,7 @@
'\" te
.\" Copyright 2014 Nexenta Systems, Inc. All rights reserved.
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2011 Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -132,12 +133,14 @@ Use the following command to attach a zone:
.sp
.ne 2
.na
-\fB\fBboot\fR [\fB--\fR \fIboot_options\fR]\fR
+\fB\fBboot\fR [\fB-X\fR] [\fB--\fR \fIboot_options\fR]\fR
.ad
.sp .6
.RS 4n
Boot (or activate) the specified zones.
.sp
+The \fI-X\fR option enables debug for the zone's brand while booting.
+.sp
The following \fIboot_options\fR are supported:
.sp
.ne 2
@@ -252,12 +255,25 @@ The source zone must be halted before this subcommand can be used.
.sp
.ne 2
.na
-\fB\fBhalt\fR\fR
+\fB\fBhalt [\fB-X\fR]\fR\fR
.ad
.sp .6
.RS 4n
Halt the specified zones. \fBhalt\fR bypasses running the shutdown scripts
inside the zone. It also removes run time resources of the zone.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while halting.
+.sp
+Use:
+.sp
+.in +2
+.nf
+zlogin \fIzone\fR shutdown
+.fi
+.in -2
+.sp
+
+to cleanly shutdown the zone by running the shutdown scripts.
.RE
.sp
@@ -407,24 +423,28 @@ and normal restrictions for \fIzonepath\fR apply.
.sp
.ne 2
.na
-\fB\fBready\fR\fR
+\fB\fBready [\fB-X\fR]\fR\fR
.ad
.sp .6
.RS 4n
Prepares a zone for running applications but does not start any user processes
in the zone.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while readying.
.RE
.sp
.ne 2
.na
-\fB\fBreboot\fR\ [\fB--\fR \fIboot_options\fR]]\fR
+\fB\fBreboot\fR\ [\fB-X\fR] [\fB--\fR \fIboot_options\fR]]\fR
.ad
.sp .6
.RS 4n
Restart the zones. This is equivalent to a \fBhalt\fR \fBboot\fR sequence. This
subcommand fails if the specified zones are not active. See \fIboot\fR subcommand
for the boot options.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while rebooting.
.RE
.sp
diff --git a/usr/src/man/man1m/zonecfg.1m b/usr/src/man/man1m/zonecfg.1m
index f7a491ceee..c7d1ace8e5 100644
--- a/usr/src/man/man1m/zonecfg.1m
+++ b/usr/src/man/man1m/zonecfg.1m
@@ -10,17 +10,17 @@ zonecfg \- set up zone configuration
.SH SYNOPSIS
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR}
.fi
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR \fIsubcommand\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR} \fIsubcommand\fR
.fi
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR \fB-f\fR \fIcommand_file\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR} \fB-f\fR \fIcommand_file\fR
.fi
.LP
@@ -43,7 +43,8 @@ The following synopsis of the \fBzonecfg\fR command is for interactive usage:
.sp
.in +2
.nf
-zonecfg \fB-z\fR \fIzonename subcommand\fR
+{\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR}
+zonecfg {\fB-z\fR \fIzonename | \fB-u\fR \fIuuid} subcommand\fR
.fi
.in -2
.sp
@@ -337,6 +338,16 @@ The following properties are supported:
.sp
.ne 2
.na
+\fB(global)\fR
+.ad
+.sp .6
+.RS 4n
+\fBzfs-io-priority\fR
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBfs\fR\fR
.ad
.sp .6
@@ -351,7 +362,7 @@ The following properties are supported:
.ad
.sp .6
.RS 4n
-\fBaddress\fR, \fBphysical\fR, \fBdefrouter\fR
+\fBaddress\fR, \fBallowed-address\fR, \fBdefrouter\fR, \fBglobal-nic\fR, \fBmac-addr\fR, \fBphysical\fR, \fBproperty\fR, \fBvlan-id\fR
.RE
.sp
@@ -614,7 +625,17 @@ Values needed to determine how, where, and so forth to mount file systems. See
.sp
.ne 2
.na
-\fB\fBnet\fR: address, physical, defrouter\fR
+\fB\fBinherit-pkg-dir\fR: dir\fR
+.ad
+.sp .6
+.RS 4n
+The directory path.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBnet\fR: address, allowed-address, defrouter, global-nic, mac-addr, physical, property, vlan-id\fR
.ad
.sp .6
.RS 4n
@@ -653,6 +674,10 @@ zone. However, if the interface is not used by the global zone, it should be
configured \fBdown\fR in the global zone, and the default router for the
interface should be specified here.
.sp
+The global-nic is used for exclusive stack zones which will use a VNIC on-demand. When the zone boots, a VNIC named using the physical property will be created on the global NIC. If provided, the mac-addr and vlan-id will be set on this VNIC.
+.sp
+The \fBproperty\fR setting is a resource which can be used to set arbitrary name/value pairs on the network. These name/value pairs are made available to the zone's brand, which can use them as needed to set up the network interface.
+.sp
For an exclusive-IP zone, the physical property must be set and the address and
default router properties cannot be set.
.RE
@@ -884,6 +909,16 @@ is not supported.
.RE
.sp
+.ne 2
+.na
+\fBglobal: \fBzfs-io-priority\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies a priority for this zone's ZFS I/O. The priority is used by the ZFS I/O scheduler as in input to determine how to schedule I/O across zones. By default all zones have a priority of 1. The value can be increased for zones whose I/O is more critical. This property is the preferred way to set the \fBzone.zfs-io-priority\fR rctl.
+.RE
+
+.sp
.LP
The following table summarizes resources, property-names, and types:
.sp
@@ -906,13 +941,22 @@ resource property-name type
(global) max-shm-ids simple
(global) max-shm-memory simple
(global) scheduling-class simple
+(global) zfs-io-priority simple
fs dir simple
special simple
raw simple
type simple
options list of simple
net address simple
+ allowed-address simple
+ defrouter simple
+ global-nic simple
+ mac-addr simple
physical simple
+ property list of complex
+ name simple
+ value simple
+ vlan-id simple
device match simple
rctl name simple
value list of complex
@@ -1125,6 +1169,16 @@ name \fBglobal\fR and all names beginning with \fBSUNW\fR are reserved and
cannot be used.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-u\fR \fIuuid\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specify the uuid of a zone instead of the Zone name.
+.RE
+
.SH SUBCOMMANDS
.sp
.LP
@@ -1215,8 +1269,7 @@ correct to be committed, this operation automatically does a verify.
.sp
.ne 2
.na
-\fB\fBcreate [\fR\fB-F\fR\fB] [\fR \fB-a\fR \fIpath\fR |\fB-b\fR \fB|\fR
-\fB-t\fR \fItemplate\fR\fB]\fR\fR
+\fB\fBcreate [\fR\fB-F\fR\fB] [\fR \fB-a\fR \fIpath\fR |\fB-b\fR \fB|\fR \fB-t\fR \fItemplate\fR\fB] [\fR\fB-X\fR\fB]\fR\fR
.ad
.sp .6
.RS 4n
@@ -1238,6 +1291,8 @@ configured, it should be installed using the "\fBzoneadm attach\fR" command
.sp
Use the \fB-b\fR option to create a blank configuration. Without arguments,
\fBcreate\fR applies the Sun default settings.
+.sp
+Use the \fB-X\fR option to facilitate creating a zone whose XML definition already exists on the host. The zone will be atomically added to the zone index file.
.RE
.sp
@@ -1314,18 +1369,21 @@ which is currently being added or modified.
.sp
.ne 2
.na
-\fB\fBremove\fR \fIresource-type\fR\fB{\fR\fIproperty-name\fR\fB=\fR\fIproperty
--value\fR\fB}\fR(global scope)\fR
+\fB\fBremove\fR [\fR\fB-F\fR\fB] \fIresource-type\fR\fB [\fR\fIproperty-name\fR\fB=\fR\fIproperty-value\fR\fB]* \fR(global scope)\fR
+.br
+\fB\fBremove\fR \fR\fIproperty-name\fR\fB \fR\fIproperty-value\fR\fB \fR(resource scope)\fR
.ad
.sp .6
.RS 4n
In the global scope, removes the specified resource. The \fB[]\fR syntax means
-0 or more of whatever is inside the square braces. If you want only to remove a
+0 or more property name-value pairs. If you want to only remove a
single instance of the resource, you must specify enough property name-value
pairs for the resource to be uniquely identified. If no property name-value
pairs are specified, all instances will be removed. If there is more than one
-pair is specified, a confirmation is required, unless you use the \fB-F\fR
-option.
+pair specified, a confirmation is required, unless you use the \fB-F\fR
+option. Likewise, the \fB-F\fR option can be used to remove a resource that
+does not exist (that is, no error will occur). In the resource scope, remove
+the specified name-value pair.
.RE
.sp
diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m
index fbfd393579..e14a72b7e3 100644
--- a/usr/src/man/man1m/zpool.1m
+++ b/usr/src/man/man1m/zpool.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
.\" Copyright 2011, Nexenta Systems, Inc. All Rights Reserved.
.\" Copyright (c) 2013 by Delphix. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development
@@ -1608,7 +1609,7 @@ the pool, in addition to the pool-wide statistics.
.sp
.ne 2
.na
-\fB\fBzpool list\fR [\fB-T\fR \fBu\fR | \fBd\fR] [\fB-Hv\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...
+\fB\fBzpool list\fR [\fB-T\fR \fBu\fR | \fBd\fR] [\fB-Hvp\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...
[\fIinterval\fR[\fIcount\fR]]\fR
.ad
.sp .6
@@ -1670,6 +1671,15 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within
the pool, in addition to the pool-wise statistics.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-p\fR\fR
+.ad
+.RS 12n
+Display numbers in parseable (exact) values.
+.RE
+
.RE
.sp
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index 6240c88c40..bb75c1d4b6 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -14,6 +14,7 @@
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
# Copyright 2013, OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
include $(SRC)/Makefile.master
@@ -107,6 +108,9 @@ MANFILES= __fbufsize.3c \
enable_extended_FILE_stdio.3c \
encrypt.3c \
end.3c \
+ epoll_create.3c \
+ epoll_ctl.3c \
+ epoll_wait.3c \
err.3c \
euclen.3c \
exit.3c \
@@ -194,6 +198,9 @@ MANFILES= __fbufsize.3c \
imaxdiv.3c \
index.3c \
initgroups.3c \
+ inotify_init.3c \
+ inotify_add_watch.3c \
+ inotify_rm_watch.3c \
insque.3c \
is_system_labeled.3c \
isaexec.3c \
@@ -719,6 +726,8 @@ MANLINKS= FD_CLR.3c \
endusershell.3c \
endutent.3c \
endutxent.3c \
+ epoll_create1.3c \
+ epoll_pwait.3c \
erand48.3c \
errno.3c \
errx.3c \
@@ -1541,6 +1550,9 @@ _etext.3c := LINKSRC = end.3c
edata.3c := LINKSRC = end.3c
etext.3c := LINKSRC = end.3c
+epoll_create1.3c := LINKSRC = epoll_create.3c
+epoll_pwait.3c := LINKSRC = epoll_wait.3c
+
errx.3c := LINKSRC = err.3c
verr.3c := LINKSRC = err.3c
verrx.3c := LINKSRC = err.3c
diff --git a/usr/src/man/man3c/epoll_create.3c b/usr/src/man/man3c/epoll_create.3c
new file mode 100644
index 0000000000..3dd9abf5f7
--- /dev/null
+++ b/usr/src/man/man3c/epoll_create.3c
@@ -0,0 +1,104 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH EPOLL_CREATE 3C "Apr 17, 2014"
+.SH NAME
+epoll_create, epoll_create1 \- create an epoll instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/epoll.h>
+
+\fBint\fR \fBepoll_create\fR(\fBint\fR \fIsize\fR);
+.fi
+
+.LP
+.nf
+\fBint\fR \fBepoll_create1\fR(\fBint\fR \fIflags\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBepoll_create()\fR and \fBepoll_create1()\fR functions both create an
+\fBepoll\fR(5) instance that can be operated upon via \fBepoll_ctl\fR(3C),
+\fBepoll_wait\fR(3C) and \fBepoll_pwait\fR(3C). \fBepoll\fR instances are
+represented as file descriptors, and should be closed via \fBclose\fR(2).
+
+The only difference between the two functions is their signature;
+\fBepoll_create()\fR takes a size argument that
+is vestigal and is only meaningful in as much as it must be greater than
+zero, while \fBepoll_create1()\fR takes a flags argument that can have
+any of the following values:
+
+.sp
+.ne 2
+.na
+\fBEPOLL_CLOEXEC\fR
+.ad
+.RS 12n
+Instance should be closed upon an
+\fBexec\fR(2); see \fBopen\fR(2)'s description of \fBO_CLOEXEC\fR.
+.RE
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, 0 is returned. Otherwise, -1 is returned and errno
+is set to indicate the error.
+.SH ERRORS
+.sp
+.LP
+The \fBepoll_create()\fR and \fBepoll_create1()\fR functions will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+Either the \fIsize\fR is zero (\fBepoll_create()\fR) or the \fIflags\fR
+are invalid (\fBepoll_create1()\fR).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEMFILE\fR\fR
+.ad
+.RS 10n
+There are currently {\fBOPEN_MAX\fR} file descriptors open in the calling
+process.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENFILE\fR\fR
+.ad
+.RS 10n
+The maximum allowable number of files is currently open in the system.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+The \fBepoll\fR(5) facility is implemented for purposes of offering
+compatibility for Linux-borne applications; native
+applications should continue to prefer using event ports via the
+\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C)
+interfaces. See \fBepoll\fR(5) for compatibility details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBepoll_ctl\fR(3C), \fBepoll_wait\fR(3C), \fBepoll\fR(5)
diff --git a/usr/src/man/man3c/epoll_ctl.3c b/usr/src/man/man3c/epoll_ctl.3c
new file mode 100644
index 0000000000..ccf3139396
--- /dev/null
+++ b/usr/src/man/man3c/epoll_ctl.3c
@@ -0,0 +1,300 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH EPOLL_CTL 3C "Apr 17, 2014"
+.SH NAME
+epoll_ctl \- control an epoll instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/epoll.h>
+
+\fBint\fR \fBepoll_ctl\fR(\fBint\fR \fIepfd\fR, \fBint\fR \fIop\fR, \fBint\fR \fIfd\fR, \fBstruct epoll_event *\fR\fIevent\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBepoll_ctl()\fR function executes the operation specified by
+\fIop\fR (as parameterized by \fIevent\fR) on the \fIepfd\fR epoll instance.
+Valid values for \fIop\fR:
+
+.sp
+.ne 2
+.na
+\fBEPOLL_CTL_ADD\fR
+.ad
+.RS 12n
+For the \fBepoll\fR(5) instance specified by \fIepfd\fR,
+associate the file descriptor specified by \fIfd\fR with the event specified
+by \fIevent\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLL_CTL_DEL\fR
+.ad
+.RS 12n
+For the \fBepoll\fR(5) instance specified by \fIepfd\fR,
+remove all event associations for the file descriptor specified by \fIfd\fR.
+\fIevent\fR is ignored, and may be NULL.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLL_CTL_MOD\fR
+.ad
+.RS 12n
+For the \fBepoll\fR(5) instance specified by \fIepfd\fR, modify the event
+association for the file descriptor specified by \fIfd\fR to be that
+specified by \fIevent\fR.
+
+.RE
+
+The \fIevent\fR parameter has the following structure:
+
+.in +4
+.nf
+typedef union epoll_data {
+ void *ptr;
+ int fd;
+ uint32_t u32;
+ uint64_t u64;
+} epoll_data_t;
+
+struct epoll_event {
+ uint32_t events;
+ epoll_data_t data;
+};
+.fi
+.in -4
+
+The \fIdata\fR field specifies the datum to
+be associated with the event and
+will be returned via \fBepoll_wait\fR(3C).
+The \fIevents\fR field denotes both the desired events (when specified via
+\fBepoll_ctl()\fR) and the events that have occurred (when returned via
+\fBepoll_wait\fR(3C)).
+In either case, the
+\fIevents\fR field is a bitmask constructed by a logical \fBOR\fR operation
+of any combination of the following event flags:
+
+.sp
+.ne 2
+.na
+\fBEPOLLIN\fR
+.RS 14n
+Data other than high priority data may be read without blocking. For streams,
+this flag is set in the returned \fIevents\fR even if the message is of
+zero length.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLPRI\fR
+.RS 14n
+Normal data (priority band equals 0) may be read without blocking. For streams,
+this flag is set in the returned \fIevents\fR even if the message is of zero
+length.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLOUT\fR
+.RS 14n
+Normal data (priority band equals 0) may be written without blocking.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLRDNORM\fR
+.RS 14n
+Normal data (priority band equals 0) may be read without blocking. For streams,
+this flag is set in the returned \fIrevents\fR even if the message is of
+zero length.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLRDBAND\fR
+.RS 14n
+Data from a non-zero priority band may be read without blocking. For streams,
+this flag is set in the returned \fIrevents\fR even if the message is of
+zero length.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLWRNORM\fR
+.RS 14n
+The same as \fBEPOLLOUT\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLWRBAND\fR
+.RS 14n
+Priority data (priority band > 0) may be written. This event only examines
+bands that have been written to at least once.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLMSG\fR
+.RS 14n
+This exists only for backwards binary and source compatibility with Linux;
+it has no meaning and is ignored.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLERR\fR
+.RS 14n
+An error has occurred on the device or stream. This flag is only valid in the
+returned \fIevents\fR field.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLHUP\fR
+.RS 14n
+A hangup has occurred on the stream. This event and \fBEPOLLOUT\fR are mutually
+exclusive; a stream can never be writable if a hangup has occurred. However,
+this event and \fBEPOLLIN\fR, \fBEPOLLRDNORM\fR, \fBEPOLLRDBAND\fR,
+\fBEPOLLRDHUP\fR or
+\fBEPOLLPRI\fR are not mutually exclusive. This flag is only valid in the
+the \fIevents\fR field returned from \fBepoll_wait\fR(3C); it is not used
+in the \fIevents\fR field specified via \fBepoll_ctl()\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLRDHUP\fR
+.RS 14n
+The stream socket peer shutdown the writing half of the connection and no
+further data will be readable via the socket. This event is not mutually
+exclusive with \fBEPOLLIN\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLWAKEUP\fR
+.RS 14n
+This exists only for backwards binary and source compatibility with Linux;
+it has no meaning and is ignored.
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLONESHOT\fR
+.RS 14n
+Sets the specified event to be in one-shot mode, whereby the event association
+with the \fBepoll\fR(5) instance specified by \fIepfd\fR is removed atomically
+as the event is returned via \fBepoll_wait\fR(3C). Use of this mode allows
+for resolution of some of the
+races inherent in multithreaded use of \fBepoll_wait\fR(3C).
+.RE
+
+.sp
+.ne 2
+.na
+\fBEPOLLET\fR
+.RS 14n
+Sets the specified event to be edge-triggered mode instead of the default
+mode of level-triggered. In this mode, events will be induced by
+transitions on an event source rather than the state of the event source.
+While perhaps superficially appealing, this mode introduces several new
+potential failure modes for user-level software and should be used
+with caution.
+.RE
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, \fBepoll_ctl()\fR returns 0.
+If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+\fBepoll_ctl()\fR will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+\fIepfd\fR is not a valid file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEFAULT\fR\fR
+.ad
+.RS 10n
+The memory associated with \fIevent\fR was not mapped.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEEXIST\fR\fR
+.ad
+.RS 10n
+The operation specified was \fBEPOLL_CTL_ADD\fR and the specified file
+descriptor is already associated with an event for the specified
+\fBepoll\fR(5) instance.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENOENT\fR\fR
+.ad
+.RS 10n
+The operation specified was \fBEPOLL_CTL_MOD\fR or \fBEPOLL_CTL_DEL\fR and
+the specified file descriptor is not associated with an event for the
+specified \fBepoll\fR(5) instance.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+The \fBepoll\fR(5) facility is implemented for purposes of offering
+compatibility for Linux-borne applications; native
+applications should continue to prefer using event ports via the
+\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C)
+interfaces. See \fBepoll\fR(5) for compatibility details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBepoll_create\fR(3C), \fBepoll_wait\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBepoll\fR(5)
diff --git a/usr/src/man/man3c/epoll_wait.3c b/usr/src/man/man3c/epoll_wait.3c
new file mode 100644
index 0000000000..670eebe89c
--- /dev/null
+++ b/usr/src/man/man3c/epoll_wait.3c
@@ -0,0 +1,113 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH EPOLL_WAIT 3C "Apr 17, 2014"
+.SH NAME
+epoll_wait, epoll_pwait \- wait for epoll events
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/epoll.h>
+
+\fBint\fR \fBepoll_wait\fR(\fBint\fR \fIepfd\fR, \fBstruct epoll_event *\fR\fIevents\fR,
+ \fBint\fR \fImaxevents\fR, \fBint\fR \fItimeout\fR);
+.fi
+
+.LP
+.nf
+\fBint\fR \fBepoll_pwait\fR(\fBint\fR \fIepfd\fR, \fBstruct epoll_event *\fR\fIevents\fR,
+ \fBint\fR \fImaxevents\fR, \fBint\fR \fItimeout\fR,
+ \fBconst sigset_t *\fR\fIsigmask\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBepoll_wait()\fR function waits for events on the \fBepoll\fR(5)
+instance specified by \fIepfd\fR. The \fIevents\fR parameter must point to
+an array of \fImaxevents\fR \fIepoll_event\fR structures to be
+filled in with pending events. The \fItimeout\fR argument specifies the
+number of milliseconds to wait for an event if none is pending. A
+\fItimeout\fR of -1 denotes an infinite timeout.
+
+The \fBepoll_pwait()\fR is similar to \fBepoll_wait()\fR, but takes an
+additional \fIsigmask\fR argument that specifies the desired signal mask
+when \fBepoll_pwait()\fR is blocked. It is equivalent to atomically
+setting the signal mask, calling \fBepoll_wait()\fR, and restoring the
+signal mask upon return, and is therefore similar to the relationship
+between \fBselect\fR(3C) and \fBpselect\fR(3C).
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon successful completion, \fBepoll_wait()\fR and \fBepoll_pwait()\fR return
+the number of events, or 0 if none was pending and \fItimeout\fR milliseconds
+elapsed. If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+The \fBepoll_wait()\fR and \fBepoll_pwait()\fR functions will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+\fIepfd\fR is not a valid file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEFAULT\fR\fR
+.ad
+.RS 10n
+The memory associated with \fIevents\fR was not mapped or was not writable.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINTR\fR\fR
+.ad
+.RS 10n
+A signal was received during the \fBepoll_wait()\fR or \fBepoll_pwait()\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+Either \fIepfd\fR is not a valid \fBepoll\fR(5) instance or \fImaxevents\fR
+is not greater than zero.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+The \fBepoll\fR(5) facility is implemented for purposes of offering
+compatibility for Linux-borne applications; native
+applications should continue to prefer using event ports via the
+\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C)
+interfaces. See \fBepoll\fR(5) for compatibility details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBepoll_create\fR(3C), \fBepoll_ctl\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBpselect\fR(3C), \fBepoll\fR(5)
diff --git a/usr/src/man/man3c/inotify_add_watch.3c b/usr/src/man/man3c/inotify_add_watch.3c
new file mode 100644
index 0000000000..4f79e03c82
--- /dev/null
+++ b/usr/src/man/man3c/inotify_add_watch.3c
@@ -0,0 +1,120 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_ADD_WATCH 3C "Sep 17, 2014"
+.SH NAME
+inotify_add_watch \- add a watch to an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_add_watch\fR(\fBint\fR \fIfd\fR, \fBconst char *\fR\fIpathname\fR, \fBuint32_t\fR \fImask\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_add_watch()\fR function adds a watch for the file or
+directory specified by \fIpathname\fR to the inotify instance
+specified by \fIfd\fR for the events specified by \fImask\fR. See
+\fBinotify\fR(5) for details on the meaning of \fImask\fR, how
+it affects the interpretation of \fIpathname\fR, and how
+events for the watched file or directory are subsequently
+retrieved via \fBread\fR(2).
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, \fBinotify_add_watch()\fR returns the
+watch descriptor associated with the new watch.
+If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+\fBinotify_add_watch()\fR will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEACCES\fR\fR
+.ad
+.RS 10n
+\fIpathname\fR could not be opened for reading.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument is not a valid open file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEFAULT\fR\fR
+.ad
+.RS 10n
+The memory associated with \fIpathname\fR was not mapped.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument does not correspond to an
+\fBinotify\fR(5) instance as initialized with
+\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENOSPC\fR\fR
+.ad
+.RS 10n
+The number of watches on the specified instance would exceed the
+maximum number of watches per \fBinotify\fR(5) instance.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENOTDIR\fR\fR
+.ad
+.RS 10n
+\fIpathname\fR does not correspond to a directory and
+\fBIN_ONLYDIR\fR was specified in \fImask\fR.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBinotify\fR(5)
diff --git a/usr/src/man/man3c/inotify_init.3c b/usr/src/man/man3c/inotify_init.3c
new file mode 100644
index 0000000000..a091df2c26
--- /dev/null
+++ b/usr/src/man/man3c/inotify_init.3c
@@ -0,0 +1,107 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_INIT 3C "Sep 17, 2014"
+.SH NAME
+inotify_init, inotify_init1 \- initialize an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_init\fR(\fBvoid\fR);
+.fi
+
+.LP
+.nf
+\fBint\fR \fBinotify_init1\fR(\fBint\fR \fIflags\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_init()\fR and \fBinotify_init1()\fR functions both create an
+\fBinotify\fR(5) instance that can be operated upon via
+\fBinotify_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C) and \fBread\fR(2).
+\fBinotify\fR instances are
+represented as file descriptors, and should be closed via \fBclose\fR(2).
+
+The only difference between the two functions is their signature;
+\fBinotify_init()\fR takes no arguments,
+while \fBinotify_init1()\fR takes a \fIflags\fR argument that can have
+any of the following values:
+
+.sp
+.ne 2
+.na
+\fBIN_CLOEXEC\fR
+.ad
+.RS 12n
+Instance should be closed upon an
+\fBexec\fR(2); see \fBopen\fR(2)'s description of \fBO_CLOEXEC\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_NONBLOCK\fR
+.ad
+.RS 12n
+Instance be set to be non-blocking. A \fBread\fR(2) on an
+\fBinotify\fR instance that has been initialized with
+\fBIN_NONBLOCK\fR will return \fBEAGAIN\fR if there are
+no events enqueued in lieu of blocking.
+.RE
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, 0 is returned. Otherwise, -1 is returned and errno
+is set to indicate the error.
+.SH ERRORS
+.sp
+.LP
+The \fBinotify_init()\fR and \fBinotify_init1()\fR functions will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIflags\fR are invalid (\fBinotify_init1()\fR).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEMFILE\fR\fR
+.ad
+.RS 10n
+There are currently {\fBOPEN_MAX\fR} file descriptors open in the calling
+process, or the maximum number of \fBinotify\fR instances for the user
+would be exceeded.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotiy_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C), \fBinotify\fR(5)
diff --git a/usr/src/man/man3c/inotify_rm_watch.3c b/usr/src/man/man3c/inotify_rm_watch.3c
new file mode 100644
index 0000000000..de568f8e24
--- /dev/null
+++ b/usr/src/man/man3c/inotify_rm_watch.3c
@@ -0,0 +1,81 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_RM_WATCH 3C "Sep 17, 2014"
+.SH NAME
+inotify_rm_watch \- remove a watch from an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_rm_watch\fR(\fBint\fR \fIfd\fR, \fBint\fR \fIwd\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_rm_watch()\fR function removes the watch specified
+by \fIwd\fR from the inotify instance associated with \fIfd\fR.
+Removing a watch will induce an \fBIN_IGNORED\fR event; see
+\fBinotify\fR(5) for details.
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, \fBinotify_add_watch()\fR returns the
+watch descriptor associated with the new watch.
+If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+\fBinotify_rm_watch()\fR will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument is not a valid open file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument does not correspond to an
+\fBinotify\fR(5) instance as initialized with
+\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C), or
+\fIwd\fR is not a valid watch for the specified inotify
+instance.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBinotify\fR(5)
diff --git a/usr/src/man/man3c/psignal.3c b/usr/src/man/man3c/psignal.3c
index 213c45f07f..81252ee0dd 100644
--- a/usr/src/man/man3c/psignal.3c
+++ b/usr/src/man/man3c/psignal.3c
@@ -1,15 +1,16 @@
'\" te
.\" Copyright 1989 AT&T. Copyright (c) 2005, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PSIGNAL 3C "Mar 31, 2005"
+.TH PSIGNAL 3C "Aug 14, 2014"
.SH NAME
psignal, psiginfo \- system signal messages
.SH SYNOPSIS
.LP
.nf
-#include <siginfo.h>
+#include <signal.h>
\fBvoid\fR \fBpsignal\fR(\fBint\fR \fIsig\fR, \fBconst char *\fR\fIs\fR);
.fi
@@ -20,7 +21,6 @@ psignal, psiginfo \- system signal messages
.fi
.SH DESCRIPTION
-.sp
.LP
The \fBpsignal()\fR and \fBpsiginfo()\fR functions produce messages on the
standard error output describing a signal. The \fIsig\fR argument is a signal
@@ -28,14 +28,14 @@ that may have been passed as the first argument to a signal handler. The
\fIpinfo\fR argument is a pointer to a \fBsiginfo\fR structure that may have
been passed as the second argument to an enhanced signal handler. See
\fBsigaction\fR(2). The argument string \fIs\fR is printed first, followed by a
-colon and a blank, followed by the message and a \fBNEWLINE\fR character.
+colon and a blank, followed by the message and a \fBNEWLINE\fR character. If
+\fBs\fR is the value \fBNULL\fR or an empty string, then nothing is printed for
+the user's string and the colon and blank are omitted.
.SH USAGE
-.sp
.LP
Messages printed from these functions are in the native language specified by
the \fBLC_MESSAGES\fR locale category. See \fBsetlocale\fR(3C).
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -51,7 +51,6 @@ MT-Level Safe
.TE
.SH SEE ALSO
-.sp
.LP
\fBsigaction\fR(2), \fBgettext\fR(3C), \fBperror\fR(3C), \fBsetlocale\fR(3C),
\fBsiginfo.h\fR(3HEAD), \fBsignal.h\fR(3HEAD), \fBattributes\fR(5)
diff --git a/usr/src/man/man3dlpi/Makefile b/usr/src/man/man3dlpi/Makefile
index cdd24216bd..4c5448f0be 100644
--- a/usr/src/man/man3dlpi/Makefile
+++ b/usr/src/man/man3dlpi/Makefile
@@ -41,10 +41,12 @@ MANFILES= dlpi_arptype.3dlpi \
dlpi_walk.3dlpi
MANLINKS= dlpi_disabmulti.3dlpi \
+ dlpi_open_zone.3dlpi \
dlpi_promiscoff.3dlpi
dlpi_disabmulti.3dlpi := LINKSRC = dlpi_enabmulti.3dlpi
+dlpi_open_zone.3dlpi := LINKSRC = man3dlpi/dlpi_open.3dlpi
dlpi_promiscoff.3dlpi := LINKSRC = dlpi_promiscon.3dlpi
.KEEP_STATE:
diff --git a/usr/src/man/man3dlpi/dlpi_open.3dlpi b/usr/src/man/man3dlpi/dlpi_open.3dlpi
index 8129a75404..489f66066a 100644
--- a/usr/src/man/man3dlpi/dlpi_open.3dlpi
+++ b/usr/src/man/man3dlpi/dlpi_open.3dlpi
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH DLPI_OPEN 3DLPI "Nov 17, 2008"
+.TH DLPI_OPEN 3DLPI "Feb 24, 2014"
.SH NAME
dlpi_open \- open DLPI link
.SH SYNOPSIS
@@ -14,6 +15,9 @@ dlpi_open \- open DLPI link
\fBint\fR \fBdlpi_open\fR(\fBconst char *\fR\fIlinkname\fR, \fBdlpi_handle_t *\fR\fIdhp\fR,
\fBuint_t\fR \fIflags\fR);
+
+\fBint\fR \fBdlpi_open_zone\fR(\fBconst char *\fR\fIlinkname\fR, \fBconst char *\fR
+ \fIzonename\fR, \fBdlpi_handle_t *\fR\fIdhp\fR, \fBuint_t\fR \fIflags\fR);
.fi
.SH DESCRIPTION
@@ -114,6 +118,18 @@ value ensures that \fBDLPI_ETIMEDOUT\fR is returned from a \fBlibdlpi\fR
operation only in the event that the \fBDLPI\fR link becomes unresponsive. The
timeout value can be changed with \fBdlpi_set_timeout\fR(3DLPI), although this
should seldom be necessary.
+
+.sp
+.LP
+The \fBdlpi_open_zone()\fR function behaves as \fBdlpi_open()\fR, except that it
+looks for the link specified by \fBlinkname\fR in the specified zone
+\fBzonename\fR as opposed to the current zone. This function is only meaningful
+from the global zone. Instead of scanning \fB/dev/net\fR, \fBdlpi_open_zone()\fR
+scans \fB/dev/net/zone/<\fIzonename\fR> for the data link and
+\fB/dev/ipnet/zone/<\fIzonename\fR> when DLPI_DEVIPNET is present in
+\fBflags\fR. If a NULL or empty string is passed into \fBdlpi_open_zone()\fR, it
+will behave as though \fBdlpi_open\fR has been called.
+
.SH RETURN VALUES
.sp
.LP
@@ -124,7 +140,7 @@ section is returned.
.SH ERRORS
.sp
.LP
-The \fBdlpi_open()\fR function will fail if:
+The \fBdlpi_open()\fR and \fBdlpi_open_zone()\fR function will fail if:
.sp
.ne 2
.na
@@ -195,6 +211,17 @@ DLPI operation failed
See \fBattributes\fR(5) for description of the following attributes:
.sp
+.LP
+The \fBdlpi_open_zone()\fR function will fail if:
+.sp
+.ne 2
+.na
+\fB\fBDLPI_EZONENAMEINVAL\fR\fR
+.ad
+.RS 25n
+Invalid \fIzonename\fR argument
+.RE
+
.sp
.TS
box;
diff --git a/usr/src/man/man3lib/Makefile b/usr/src/man/man3lib/Makefile
index fe9acebc3a..e36ee916b9 100644
--- a/usr/src/man/man3lib/Makefile
+++ b/usr/src/man/man3lib/Makefile
@@ -103,6 +103,7 @@ MANFILES= libMPAPI.3lib \
libumem.3lib \
libuuid.3lib \
libvolmgt.3lib \
+ libvnd.3lib \
libw.3lib \
libxnet.3lib \
liby.3lib
diff --git a/usr/src/man/man3lib/libvnd.3lib b/usr/src/man/man3lib/libvnd.3lib
new file mode 100644
index 0000000000..ead69ff82e
--- /dev/null
+++ b/usr/src/man/man3lib/libvnd.3lib
@@ -0,0 +1,690 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH LIBVND 3LIB "Mar 06, 2014"
+.SH NAME
+libvnd \- vnd library
+
+.SH SYNOPSIS
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+.fi
+
+.SH DESCRIPTION
+.LP
+The libvnd library provides a stable and programmatic interface to
+vnd(7D) devices. vnd devices provide the means for creating a layer two
+interface over a data link, similar to the use of libdlpi(3LIB) and
+IP(7P). In dlpi parlance, a vnd device obtains data from all service
+attachment points (SAP). For ethernet devices, this means that a vnd
+device sends and receives traffic for all ethertypes. It is intended to
+be used for services such as virtual machines which emulate layer two
+devices.
+
+.LP
+Handles to vnd(7D) devices are obtained through the use of vnd_create
+and vnd_open. With a handle, I/O can be performed and properties on the
+device can be set and retrieved. I/O on devices should be performed
+through the vnd_frameio_read and vnd_frameio_write functions. A file
+descriptor suitable for use with event ports and polling may be obtained
+through vnd_pollfd. Handles are relinquished through calls to vnd_close;
+however, devices will persist until vnd_unlink has been called.
+
+.LP
+The rest of this manual documents the interfaces, properties, errors,
+and threading model for libvnd. The in-depth description of individual
+interfaces, their arguments, and examples, are in manual pages for each
+provided interface.
+
+
+.SH INTERFACES
+.sp
+.LP
+
+The shared object libvnd.so.1 provides the public interfaces defined
+below. See Intro(3) for additional information on shared object
+interfaces. Individual functions are documented in their own manual
+pages.
+
+.sp
+.TS
+l l
+l l .
+vnd_create vnd_errno
+vnd_open vnd_syserrno
+vnd_unlink vnd_strerror
+vnd_close vnd_strsyserror
+vnd_pollfd vnd_walk
+vnd_prop_get vnd_prop_set
+vnd_prop_iter vnd_prop_writeable
+vnd_frameio_read vnd_frameio_write
+.TE
+
+.SH PROPERTIES
+
+.LP
+The following table summarizes properties of a vnd device. The
+properties can be retrieved and set with the functions
+vnd_prop_get(3VND) and vnd_prop_set(3VND). Following the table, the
+structures and properties are described in greater detail.
+
+.nf
+ +-------------------+---------------------+-------+
+ | PROPERTY | STRUCTURE | PERM |
+ +-------------------+---------------------+-------+
+ | VND_PROP_RXBUF | vnd_prop_buf_t | R/W |
+ +-------------------+---------------------+-------+
+ | VND_PROP_TXBUF | vnd_prop_buf_t | R/W |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MAXBUF | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MINTU | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MAXTU | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+.fi
+
+.SS Structures
+
+.LP
+The vnd_prop_buf_t structure has the following members:
+
+.in +2
+.nf
+uint64_t vpb_size;
+.fi
+.in -2
+
+.LP
+The vpb_size member refers to a size in bytes. When getting a property,
+it represents the size of that property, when setting a property, it is
+the size to set the property to.
+
+
+.SS Property Descriptions
+.sp
+.ne 2
+.na
+rxbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the receive buffer for
+the device. All received data enters the receive buffer until a consumer
+consumes it. If adding a received frame would exceed the size of the
+receive buffer, then that frame will be dropped. The maximum size of the
+buffer is limited by the 'maxsize' property.
+.RE
+
+.sp
+.ne 2
+.na
+txbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the transmit buffer. All
+in-flight transmitted data must be able to fix into the transmit buffer
+to deal with potential flow control events. If there is not enough space
+in the transmit buffer, transmit related I/O operations will either
+block or fail based on whether or not O_NONBLOCK or O_NDELAY were set
+with fcntl(2).
+.RE
+
+.sp
+.ne 2
+.na
+maxsize
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the maximum size of buffers in the
+system. Properties such as rxbuf and txbuf cannot be set beyond this.
+.RE
+
+.sp
+.ne 2
+.na
+mintu
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the minimum size of a frame
+transmitted to the underlying data link. Note that the minimum listed
+here may be less than the size of a valid layer two frame and therefore
+may be dropped. A frame smaller than this value will be rejected by vnd.
+.RE
+
+.sp
+.ne 2
+.na
+maxtu
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the maximum size of
+a frame transmitted to the underlying data link. A frame
+larger than this value will be rejected by vnd.
+.RE
+
+
+.SH ERRORS
+.sp
+.LP
+Most interfaces provided by libvnd provide a means to retrieve a
+vnd_errno_t that describes an error that has occurred. The manuals for
+individual interfaces describe whether or not this additional error
+information is available and how to retrieve it. The following is a
+complete list of the error numbers and their names as defined in
+<sys/vnd_errno.h>. Any entries not listed here are private to the
+implementation and may change at any time.
+
+.sp
+.ne 2
+.na
+0 VND_E_SUCCESS
+.ad
+.RS 23n
+no error
+.sp
+This indicates that the operation completed successfully.
+.RE
+
+.sp
+.ne 2
+.na
+1 VND_E_NOMEM
+.ad
+.RS 23n
+not enough memory available
+.sp
+Insufficient memory was available. This is the equivalent of the
+standard system errno ENOMEM.
+.RE
+
+.sp
+.ne 2
+.na
+2 VND_E_NODATALINK
+.ad
+.RS 23n
+no such datalink
+.sp
+The data link requested to be used as part of vnd_create does not exist
+in the requested zone.
+.RE
+
+.sp
+.ne 2
+.na
+3 VND_E_NOTETHER
+.ad
+.RS 23n
+datalink not of type DL_ETHER
+.sp
+The data link used as part of a call to vnd_create is not an Ethernet
+device. vnd_create only works with Ethernet devices at this time.
+.RE
+
+.sp
+.ne 2
+.na
+4 VND_E_DLPIINVAL
+.ad
+.RS 23n
+unknown dlpi failure
+.sp
+An unexpected DLPI message was received during vnd device
+initialization.
+.RE
+
+.sp
+.ne 2
+.na
+5 VND_E_ATTACHFAIL
+.ad
+.RS 23n
+DL_ATTACH_REQ failed
+.sp
+During vnd device initialization, the dlpi call to attach to the
+requested data link failed.
+.RE
+
+.sp
+.ne 2
+.na
+6 VND_E_BINDFAIL
+.ad
+.RS 23n
+DL_BIND_REQ failed
+.sp
+
+During vnd device initialization, the dlpi call to bind to a service
+attachment point on the data link failed.
+.RE
+
+.sp
+.ne 2
+.na
+7 VND_E_PROMISCFAIL
+.ad
+.RS 23n
+DL_PROMISCON_REQ failed
+.sp
+
+During vnd device initialization, the dlpi call to enable promiscuous
+mode on the underlying device failed.
+.RE
+
+.sp
+.ne 2
+.na
+8 VND_E_DIRECTFAIL
+.ad
+.RS 23n
+DLD_CAPAB_DIRECT enable failed
+.sp
+During vnd device initialization, the dlpi call to enable the DLD fast
+path failed.
+.RE
+
+.sp
+.ne 2
+.na
+9 VND_E_CAPACKINVAL
+.ad
+.RS 23n
+bad datalink capability
+.sp
+During vnd device initialization, the kernel responded with an invalid
+capability acknowledgement.
+.RE
+
+.sp
+.ne 2
+.na
+10 VND_E_SUBCAPINVAL
+.ad
+.RS 23n
+bad datalink subcapability
+.sp
+During vnd device initialization, the kernel responded with an invalid
+sub-capability.
+.RE
+
+.sp
+.ne 2
+.na
+11 VND_E_DLDBADVERS
+.ad
+.RS 23n
+bad dld version
+.sp
+The vnd(7D) module does not support the version of the dld capability
+that the kernel sent. As such, the data path could not be brought up and
+the device could not be fully initialized.
+.RE
+
+.sp
+.ne 2
+.na
+12 VND_E_KSTATCREATE
+.ad
+.RS 23n
+failed to create kstats
+.sp
+During vnd device initialization, the necessary kstats could not be
+created.
+.RE
+
+.sp
+.ne 2
+.na
+13 VND_E_NODEV
+.ad
+.RS 23n
+no such vnd link
+.sp
+During device initialization, the requested character device did not
+exist.
+.RE
+
+.sp
+.ne 2
+.na
+14 VND_E_NONETSTACK
+.ad
+.RS 23n
+netstack doesn't exist
+.sp
+During device initialization, the networking stack for the device did
+not exist.
+.RE
+
+.sp
+.ne 2
+.na
+15 VND_E_ASSOCIATED
+.ad
+.RS 23n
+device already associated
+.sp
+During vnd device initialization, the vnd STREAMS device was already
+associated with another vnd device.
+.RE
+
+.sp
+.ne 2
+.na
+16 VND_E_ATTACHED
+.ad
+.RS 23n
+device already attached
+.sp
+The given vnd device has already been created over a data link and
+cannot be created over another one.
+.RE
+
+.sp
+.ne 2
+.na
+17 VND_E_LINKED
+.ad
+.RS 23n
+device already linked
+.sp
+The given vnd device has already been given a name and bound into the
+file system name space.
+.RE
+
+.sp
+.ne 2
+.na
+18 VND_E_BADNAME
+.ad
+.RS 23n
+invalid name
+.sp
+The requested name is not a valid name. Valid names are alphanumeric
+ascii names, along with the following ascii characters: ':', '\-', and
+\'_'. Names must be less than LIBVND_NAMELEN bytes including the null
+terminator.
+.RE
+
+.sp
+.ne 2
+.na
+19 VND_E_PERM
+.ad
+.RS 23n
+permission denied
+.sp
+A request was made from a non-global zone to manipulate a vnd device
+that belongs to a different zone.
+.RE
+
+.sp
+.ne 2
+.na
+20 VND_E_NOZONE
+.ad
+.RS 23n
+no such zone
+.sp
+A request was made which targeted a zone that did not exist.
+.RE
+
+.sp
+.ne 2
+.na
+21 VND_E_STRINIT
+.ad
+.RS 23n
+failed to initialize vnd stream module
+.sp
+During vnd device initialization, the vnd STREAMS module could not be
+pushed onto the data link's stream head.
+.RE
+
+.sp
+.ne 2
+.na
+22 VND_E_NOTATTACHED
+.ad
+.RS 23n
+device not attached
+.sp
+A request was made that requires a vnd device be attached to a data
+link, such as a call to change a property. The device was not attached
+to a data link.
+.RE
+
+.sp
+.ne 2
+.na
+23 VND_E_NOTLINKED
+.ad
+.RS 23n
+device not linked
+.sp
+A request was made to a vnd device that requires the vnd device to be
+named and present in /dev. The given device was not linked into /dev at
+the time of the call.
+.RE
+
+.sp
+.ne 2
+.na
+24 VND_E_LINKEXISTS
+.ad
+.RS 23n
+another device has the same link name
+.sp
+When trying to link a given vnd device into a zones /dev name space,
+another device already exists with the same name.
+.RE
+
+.sp
+.ne 2
+.na
+25 VND_E_MINORNODE
+.ad
+.RS 23n
+failed to create minor node
+.sp
+While trying to link a vnd device into the /devices and /dev name space,
+the call to ddi_create_minor_node() failed.
+.RE
+
+.sp
+.ne 2
+.na
+26 VND_E_BUFTOOBIG
+.ad
+.RS 23n
+requested buffer size is too large
+.sp
+The requested buffer size exceeds the maximum valid value for the given
+property.
+.RE
+
+.sp
+.ne 2
+.na
+27 VND_E_BUFTOOSMALL
+.ad
+.RS 23n
+requested buffer size is too small
+.sp
+The requested buffer size is less than the minimum buffer size. This
+generally occurs when making the buffer size less than the maximum
+transmission unit.
+.RE
+
+.sp
+.ne 2
+.na
+28 VND_E_DLEXCL
+.ad
+.RS 23n
+unable to obtain exclusive access to dlpi link, link busy
+.sp
+When a vnd device is created, it expects exclusive active access to the
+device. If any other active dlpi consumers, such as IP, are already
+using the device, then the vnd device will not be created. Passive
+consumers, such as snoop, can still use a device that has been
+exclusively opened.
+.RE
+
+.sp
+.ne 2
+.na
+28 VND_E_DIRECTNOTSUP
+.ad
+.RS 23n
+DLD direct capability not supported over data link
+.sp
+The data link that the vnd device was created over does not supported
+the DLD Direct capability. As such, the data path could not be
+initialized.
+.RE
+
+.sp
+.ne 2
+.na
+30 VND_E_BADPROPSIZE
+.ad
+.RS 23n
+invalid property size
+.sp
+The size of the data passed into vnd_prop_get or vnd_prop_set is
+incorrect and does not match the expected data size.
+.RE
+
+.sp
+.ne 2
+.na
+31 VND_E_BADPROP
+.ad
+.RS 23n
+invalid property
+.sp
+An unknown property identifier was specified. For a list of valid
+properties, see the section above entitled "PROPERTIES".
+.RE
+
+.sp
+.ne 2
+.na
+32 VND_E_PROPRDONLY
+.ad
+.RS 23n
+property is read only
+.sp
+An operation tried to update the value of a read only property. For a
+list of which properties are read only and which are readable and
+writeable, see the section above entitled "PROPERTIES".
+.RE
+
+.sp
+.ne 2
+.na
+33 VND_E_SYS
+.ad
+.RS 23n
+unexpected system error
+.sp
+This indicates that there is no vnd specific error available and that
+the system errno is valid. The system errno can be obtained and printed
+through vnd_syserrno and vnd_strsyserror. The possible values and their
+meanings are documented in Intro(2).
+.RE
+
+.sp
+.ne 2
+.na
+34 VND_E_CAPABPASS
+.ad
+.RS 23n
+capabilities invalid, pass-through module detected
+.sp
+While negotiating capabilities, a pass-through module was detected and
+the capability had to be discarded. Because of this, the data path could
+not be initialized.
+.RE
+
+
+.SH THREADING
+
+.LP
+The libvnd library is not truly MT-safe. MT-safety is provided on
+the granularity of a given vnd_handle_t. Operations on a single
+vnd_handle_t are unsafe; however, operations on different handles are
+MT-safe. If a single vnd_handle_t is used by multiple threads, it
+is the caller's responsibility to provide locking to ensure that
+multiple threads aren't simultaneously calling into libvnd on a
+single handle.
+
+
+.SH FILES
+.sp
+.ne 2
+.na
+/usr/lib/libvnd.so.1
+.ad
+.RS 27n
+shared object
+.RE
+
+.sp
+.ne 2
+.na
+/usr/lib/64/libvnd.so.1
+.ad
+.RS 27n
+64-bit shared object
+.RE
+
+.SH ATTRIBUTES
+
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING"
+.TE
+
+.SH SEE ALSO
+
+.sp
+.LP
+attributes(5), Intro(2), fcntl(2), Intro(3), fcntl.h(3HEAD), libdlpi(3LIB), port_create(3C), vnd(7D)
+.sp
+.LP
+vnd_close(3VND), vnd_create(3VND), vnd_errno(3VND),
+vnd_frameio_read(3VND), vnd_frameio_write(3VND), vnd_open(3VND)
+vnd_pollfd(3VND), vnd_prop_get(3VND), vnd_prop_iter(3VND),
+vnd_prop_set(3VND),
+vnd_prop_writeable(3VND), vnd_walk(3VND)
diff --git a/usr/src/man/man3vnd/Makefile b/usr/src/man/man3vnd/Makefile
new file mode 100644
index 0000000000..64abf9dcd6
--- /dev/null
+++ b/usr/src/man/man3vnd/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet
+# at http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/Makefile.master
+
+MANSECT= 3vnd
+
+MANFILES= vnd_create.3vnd \
+ vnd_errno.3vnd \
+ vnd_frameio_read.3vnd \
+ vnd_pollfd.3vnd \
+ vnd_prop_get.3vnd \
+ vnd_prop_iter.3vnd \
+ vnd_prop_writeable.3vnd \
+ vnd_walk.3vnd
+
+MANLINKS= frameio_t.3vnd \
+ framevec_t.3vnd \
+ vnd_close.3vnd \
+ vnd_frameio_write.3vnd \
+ vnd_open.3vnd \
+ vnd_prop_set.3vnd \
+ vnd_prop_iter_f.3vnd \
+ vnd_strerror.3vnd \
+ vnd_strsyserror.3vnd \
+ vnd_syserrno.3vnd \
+ vnd_unlink.3vnd \
+ vnd_walk_cb_f.3vnd
+
+# vnd_create.3vnd
+vnd_open.3vnd := LINKSRC = vnd_create.3vnd
+vnd_unlink.3vnd := LINKSRC = vnd_create.3vnd
+vnd_close.3vnd := LINKSRC = vnd_create.3vnd
+
+# vnd_errno.3vnd
+vnd_strerror.3vnd := LINKSRC = vnd_errno.3vnd
+vnd_syserrno.3vnd := LINKSRC = vnd_errno.3vnd
+vnd_strsyserror.3vnd := LINKSRC = vnd_errno.3vnd
+
+# vnd_frameio_read.3vnd
+vnd_frameio_write.3vnd := LINKSRC = vnd_frameio_read.3vnd
+framevec_t.3vnd := LINKSRC = vnd_frameio_read.3vnd
+frameio_t.3vnd := LINKSRC = vnd_frameio_read.3vnd
+
+# vnd_prop_get.3vnd
+vnd_prop_set.3vnd := LINKSRC = vnd_prop_get.3vnd
+
+# vnd_prop_iter.3vnd
+vnd_prop_iter_f.3vnd := LINKSRC = vnd_prop_iter.3vnd
+
+# vnd_walk.3vnd
+vnd_walk_cb_f.3vnd := LINKSRC = vnd_walk.3vnd
+
+.KEEP_STATE:
+
+include $(SRC)/man/Makefile.man
+
+install: $(ROOTMANFILES) $(ROOTMANLINKS)
diff --git a/usr/src/man/man3vnd/vnd_create.3vnd b/usr/src/man/man3vnd/vnd_create.3vnd
new file mode 100644
index 0000000000..d29237a60c
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_create.3vnd
@@ -0,0 +1,280 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_CREATE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_create, vnd_open, vnd_unlink, vnd_close \- create, open, and destroy
+vnd devices
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+vnd_handle_t *vnd_create(const char *zonename, const char *datalink,
+ const char *linkname, vnd_errno_t *vnderr, int *syserr);
+
+vnd_handle_t *vnd_open(const char *zonename, const char *linkname,
+ vnd_errno_t *vnderr, int *syserr);
+
+int vnd_unlink(vnd_handle_t *vhp);
+
+void vnd_close(vnd_handle_t *vhp);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+These functions create vnd devices, obtain handles to extant vnd
+devices, and close handles to vnd devices, for use with the rest of
+libvnd(3LIB).
+
+.LP
+The vnd_create function creates a new vnd device in the zone specified
+by zonename. The zone name argument may be null, in which case the
+caller's current zone is used instead. The vnd device and data link it
+is created over must both be in the same zone. The datalink argument
+indicates the name of the DLPI data link to create the vnd device over.
+The linkname argument indicates the name of the new vnd device. The
+linkname argument must be less than VND_NAMELEN characters long,
+excluding the null terminator. It should be an alphanumeric string. The
+only non-alphanumeric characters allowed are ':', '-', and \'_'.
+Neither the datalink argument nor linkname argument may be NULL. A
+handle to the created device is returned to the caller. Once the
+vnd_create function returns, the device can be subsequently opened with
+a call to vnd_open. The named device persists until a call to vnd_unlink
+or the containing zone is halted. Creating a vnd device requires
+PRIV_SYS_NET_CONFIG as well as PRIV_RAWACCESS. The arguments vnderr and
+syserr are used to obtain errors in the cases where the call to
+vnd_create fails. Both arguments may be NULL pointers, in which case the
+more detailed error information is discarded.
+
+.LP
+The vnd_open function opens an existing vnd device and returns a
+unique handle to that device. The vnd device to open is specified by
+both zonename and linkname. The zonename argument specifies what zone
+to look for the vnd device in. The linkname specifies the name of the
+link. The zonename argument may be NULL. If it is, the current zone is
+used. Similar to vnd_create, the integer values pointed to by the
+arguments vnderr and syserr will be filled in with additional error
+information in the cases where a call to vnd_open fails. Both
+arguments may be NULL to indicate that the error information is not
+requested, though this is not recommended.
+
+.LP
+The vnd_unlink function unlinks the vnd device specified by the vnd
+handle vhp. This unlink is similar to the use of unlink in a file
+system. After a call to unlink, the vnd device will no longer be
+accessible by callers to vnd_open and the name will be available for
+use in vnd_create. However, the device will continue to exist until
+all handles to the device have been closed.
+
+.LP
+The vnd_close function relinquishes the vnd device referenced by the
+handle vhp. After a call to vnd_close, the handle is invalidated and
+must not be used by the consumer again. The act of calling vnd_close
+on a handle does not remove the device. The device is persisted as
+long as vnd_unlink has not been called on the device or the containing
+zone has not been destroyed.
+
+.SH RETURN VALUES
+
+.LP
+Upon successful completion, the functions vnd_create and vnd_open
+return a pointer to a vnd_handle_t. This handle is used for all
+subsequent library operations. If either function fails, then a NULL
+pointer is returned and more detailed error information is filled into
+the integers pointed to by vnderr and syserr. The vnderr and syserr
+correspond to the values that would normally be returned by a call to
+vnd_errno(3VND) and vnd_syserrno(3VND). For the full list of possible
+errors see libvnd(3LIB).
+
+.LP
+The vnd_unlink function returns zero on success and -1 on failure. On
+failure, the vnd and system errnos are updated and available through
+the vnd_errno(3VND) and vnd_syserrno(3VND) functions.
+
+.LP
+The vnd_close function does not return any values nor does it set
+vnderr or syserr. The handle passed to vnd_close can no longer be
+used.
+
+.SH EXAMPLES
+.LP
+Example 1 Creating a device
+.sp
+.LP
+
+The following sample C program shows how to create a vnd device over
+an existing datalink named "net0" that other applications can open
+and use as "vnd0".
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+
+ /* Errors are considered fatal */
+ vhp = vnd_create(NULL, "net0", "vnd0", &vnderr, &syserr);
+
+ if (vhp == NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to create device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to create device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully created vnd0\n");
+ vnd_close(vhp);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 2 Opening an existing device in another zone
+.sp
+.LP
+
+The following sample C program opens the device named "vnd1" in the zone
+named "turin" for further use.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, ret;
+
+ vhp = vnd_open("turin", "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ /*
+ * Use the device vnd1 with the handle vhp with any of
+ * the other interfaces documented in libvnd(3LIB) here.
+ *
+ * After an arbitrary amount of code, the program will
+ * set the variable ret with the exit code for the
+ * program and should execute the following code before
+ * returning.
+ */
+ vnd_close(vhp);
+ return (ret);
+}
+.fi
+.in -2
+
+
+.LP
+Example 3 Removing a device
+.sp
+.LP
+
+The following sample C program removes a vnd device named vnd0. This
+program makes it so no additional programs can access the device.
+However, if anyone is actively using it, it will still exist,
+similar to calling unlink(2).
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, ret;
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_unlink(vhp) != 0) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to unlink device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to unlink device: %s",
+ vnd_strerror(vnderr));
+ ret = 1;
+ } else {
+ (void) printf("successfully unlinked vnd0!\n");
+ ret = 0;
+ }
+
+ vnd_close(vhp);
+ return (ret);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+
+libvnd(3LIB), vnd_errno(3VND), vnd_syserrno(3VND), attributes(5), privileges(5)
diff --git a/usr/src/man/man3vnd/vnd_errno.3vnd b/usr/src/man/man3vnd/vnd_errno.3vnd
new file mode 100644
index 0000000000..ddd6126dd1
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_errno.3vnd
@@ -0,0 +1,170 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_ERRNO 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_errno, vnd_syserrno, vnd_strerror, vnd_strsyserror \- obtain and
+translate vnd errors
+
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+uint32_t vnd_errno(vnd_handle_t *vhp);
+
+const char *vnd_strerror(vnd_errno_t err);
+
+int vnd_syserrno(vnd_handle_t *vhp);
+
+const char *vnd_strsyserror(int syserr);
+.fi
+
+.SH DESCRIPTION
+
+.LP
+The libvnd(3LIB) library supports a complementary array of errors that
+give more specific error information than the traditional set of
+system errors available via errno(3C). When an error occurs, consumers
+should call the vnd_errno function first and check its value. If the
+value of the vnd_errno_t is VND_E_SYS, then the system errno should be
+checked. If the vnd_errno_t is not VND_E_SYS, then the contents of the
+system errno returned from vnd_syserrno are undefined. Both the vnd
+and system errors are only valid for a given handle after a libvnd
+library function returned an error and before another libvnd library
+function is called on the same handle. The act of making an additional
+function call with the same vnd_handle_t invalidates any prior vnd or
+system error numbers. For the full list of valid vnd errors see
+libvnd(3LIB). For the full list of valid system errors, see Intro(2).
+
+.LP
+The vnd_errno and vnd_syserrno functions retrieve the most recent vnd
+and syserr error number respectively from a vnd handle vhp.
+
+.LP
+The vnd_strerror function translates a vnd_errno_t err to a
+corresponding string and returns a pointer to that constant string.
+
+.LP
+The vnd_syserrno function is analogous to the vnd_strerror function,
+except that it translates a system error back to a string.
+
+
+.SH RETURN VALUES
+
+.LP
+The vnd_errno function returns a vnd_errno_t which contains the vnd
+error information.
+
+.LP
+The vnd_syserror function returns an integer which contains the system
+error information. These values are the same as those returned by
+errno(3C).
+
+.LP
+The vnd_strerror function returns a pointer to a constant string. If
+the error passed in is unknown, the string "unknown error" is
+returned.
+
+.LP
+The vnd_strsyserror function returns a pointer to the translated
+constant string. If an unknown error number is passed, it returns the
+string "Unknown error". If an error occurs, it returns a NULL pointer.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Obtaining errors from a vnd_handle_t
+
+.sp
+.LP
+The following sample C function, which can be incorporated into a larger
+program, shows how to obtain the vnd and system errors from a
+vnd_handle_t after a vnd interface on a handle failed.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+
+static void
+print_errnos(vnd_handle_t *vhp)
+{
+ vnd_errno_t vnderr;
+ int syserr;
+
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+
+ (void) printf("vnd err: %d, sys err: %d\n",
+ vnderr, syserr);
+}
+.fi
+.in -2
+
+.LP
+Example 2 A perror-like function
+
+.sp
+.LP
+The following sample C function which can be incorporated into a
+larger program shows how to write a perror-like function to print
+out error messages for a vnd device.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+
+static void
+sample_perror(const char *msg, vnd_error_t vnderr, int syserr)
+{
+ (void) fprintf(stderr, "%s: %s", msg,
+ vnderr != VND_E_SYS ? vnd_strerror(vnderr) :
+ vnd_strsyserror(syserr));
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See below
+.TE
+
+.LP
+The MT-Level of the functions vnd_strerror and vnd_strsyserror is
+MT-Safe. See "THREADING" in libvnd(3LIB) for a discussion of the
+MT-Level of vnd_errno and vnd_syserrno.
+
+
+.SH SEE ALSO
+
+Intro(2), errno(3C), libvnd(3LIB), attributes(5)
diff --git a/usr/src/man/man3vnd/vnd_frameio_read.3vnd b/usr/src/man/man3vnd/vnd_frameio_read.3vnd
new file mode 100644
index 0000000000..5fc65c96a3
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_frameio_read.3vnd
@@ -0,0 +1,705 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_FRAMEIO_READ 3VND "Mar 06, 2014"
+
+.SH NAME
+
+vnd_frameio_read, vnd_frameio_write \- perform framed I/O to a vnd device
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_frameio_read(vnd_handle_t *vhp, frameio_t *fiop);
+
+int vnd_frameio_write(vnd_handle_t *vhp, frameio_t *fiop);
+.fi
+
+.SH DESCRIPTION
+.LP
+Framed I/O is a general means to manipulate data that is inherently
+framed, meaning that there is a maximum frame size, but the data may
+often be less than that size. As an example, an Ethernet device's MTU
+describes the maximum frame size, but the size of an individual frame
+is often much less. You can read a single frame at a time, or you can
+read multiple frames in a single call.
+
+In addition, framed I/O allows the consumer to break individual frames
+into a series of vectors. This is analogous to the use of an iovec(9S)
+with readv(2) and writev(2).
+
+vnd_frameio_read performs a framed I/O read of the device represented by
+the handle vhp, with the framed I/O data described by fiop.
+vnd_frameio_write works in the same manner, except performing a write
+instead of a read.
+
+.LP
+The basic vector component of the frameio_t is the framevec_t. Each
+framevec_t represents a single vector entry. An array of these is
+present in the frameio_t. The framevec_t structure has the following
+members:
+
+.in +2
+.nf
+void *fv_buf /* data buffer */
+size_t fv_buflen; /* total size of buffer */
+size_t fv_actlen; /* amount of buffer consumed */
+.fi
+.in -2
+
+.LP
+The fv_buf member points to a buffer which contains the data for this
+individual vector. When reading, data is consumed from fv_buf. When
+writing, data is written into fv_buf.
+
+The fv_buflen should indicate the total amount of data that is in the
+buffer. When reading, it indicates the size of the buffer. It must be
+set prior to calling vnd_frameio_read(). When writing, it indicates the
+amount of data that is valid in the buffer.
+
+The fv_actlen is a read-only member. It is set on successful return of
+the functions vnd_frameio_read and vnd_frameio_write. When reading, it
+is updated with the amount of data that was read into fv_buf. When
+writing, it is instead updated with the amount of data from fv_buf that
+was actually consumed. Generally when writing data, a framevec_t will
+either be entirely consumed or it will not be consumed at all.
+
+
+.LP
+A series of framevec_t's is encapsulated in a frameio_t. The frameio_t
+structure has the following members:
+
+.in +2
+.nf
+uint_t fio_version; /* current version */
+uint_t fio_nvpf; /* number of vectors in one frame */
+uint_t fio_nvecs; /* The total number of vectors */
+framevec_t fio_vecs[]; /* vectors */
+.fi
+.in -2
+
+.LP
+The fio_version member represents the current version of the frameio_t.
+The fio_version should be set to the macro FRAMEIO_CURRENT_VERSION,
+which is currently 1.
+
+The members fio_nvpf and fio_nvecs describe the number of frames that
+exist. fio_nvecs describes the total number of vectors that are present
+in fio_vecs. The upper bound on this is described by FRAMEIO_NVECS_MAX
+which is currently 32. fio_nvpf describe the number of vectors that
+should be used to make up each frame. By setting fio_vecs to be an even
+multiple of fio_nvpf, multiple frames can be read or written in a single
+call.
+
+After a call to vnd_frameio_read or vnd_frameio_write fio_nvecs is
+updated with total number of vectors read or written to. This value can
+be divided by fio_nvpf to determine the total number of frames that were
+written or read.
+
+.LP
+Each frame can be broken down into a series of multiple vectors. As an
+example, someone might want to break Ethernet frames into mac headers
+and payloads. The value of fio_nvpf would be set to two, to indicate
+that a single frame consists of two different vector components. The
+member fio_nvecs describes the total number of frames. As such, the
+value of fio_vecs divided by fio_nvpf describes the total number of
+frames that can be consumed in one call. As a result of this, fio_nvpf
+must evenly divide fio_vecs. If fio_nvpf is set to two and
+fio_nvecs is set to ten, then a total of five frames can be processed
+at once, each frame being broken down into two different vector
+components.
+
+A given frame will never overflow the number of vectors described by
+fio_nvpf. Consider the case where each vector component has a buffer
+sized to 1518 bytes, fio_nvpf is set to one, and fio_nvecs is set to
+three. If a call to vnd_frameio_read is made and four 500 byte Ethernet
+frames come in, then each frame will be mapped to a single vector. The
+500 bytes will be copied into fio_nvecs[i]->fio_buf and
+fio_nvecs[i]->fio_actlen will be set to 500. To contrast this, if
+readv(2) had been called, the first three frames would all be in the
+first iov and the fourth frame's first eight bytes would be in the first
+iov and the remaining in the second.
+
+.LP
+The user must properly initialize fio_nvecs framevec_t's worth of the
+fio_vecs array. When multiple vectors comprise a frame, fv_buflen data
+is consumed before moving onto the next vector. Consider the case
+where the user wants to break a vector into three different
+components, an 18 byte vector for an Ethernet VLAN header, a 20 byte
+vector for an IPv4 header, and a third 1500 byte vector for the
+remaining payload. If a frame was received that only had 30 bytes,
+then the first 18 bytes would fill up the first vector, the remaining
+12 bytes would fill up the IPv4 header. If instead a 524 byte frame
+came in, then the first 18 bytes would be placed in the first vector,
+the next 24 bytes would be placed in the next vector, and the remaining
+500 bytes in the third.
+
+.LP
+The functions vnd_frameio_read and vnd_frameio_write operate in both
+blocking and non-blocking mode. If either O_NONBLOCK or O_NDELAY have
+been set on the file descriptor, then the I/O will behave in
+non-blocking mode. When in non-blocking mode, if no data is available
+when vnd_frameio_read is called, EAGAIN is returned. When
+vnd_frameio_write is called in non-blocking mode, if sufficient buffer
+space to hold all of the output frames is not available, then
+vnd_frameio_write will return EAGAIN. To know when the given vnd device
+has sufficient space, the device fires POLLIN/POLLRDNORM when data is
+available for read and POLLOUT/POLLRDOUT when space in the buffer has
+opened up for write. These events can be watched for through
+port_associate(3C) and similar routines with a file descriptor returned
+from vnd_polfd(3VND).
+
+.LP
+When non-blocking mode is disabled, calls to vnd_frameio_read will
+block until some amount of data is available. Calls to
+vnd_frameio_write will block until sufficient buffer space is
+available.
+
+.LP
+Similar to read(2) and write(2), vnd_frameio_read and
+vnd_frameio_write make no guarantees about the ordering of data when
+multiple threads simultaneously call the interface. While the data
+itself will be atomic, the ordering of multiple simultaneous calls is
+not defined.
+
+.SH RETURN VALUES
+
+.LP
+The vnd_frameio_read function returns zero on success. The member
+fio_nvecs of fiop is updated with the total number of vectors that had
+data read into them. Each updated framevec_t will have the buffer
+pointed to by fv_buf filled in with data, and fv_actlen will be
+updated with the amount of valid data in fv_buf.
+
+.LP
+The vnd_frameio_write function returns zero on success. The member
+fio_nvecs of fiop is updated with the total number of vectors that
+were written out to the underlying datalink. The fv_actlen of each
+vector is updated to indicate the amount of data that was written from
+that buffer.
+
+.LP
+On failure, both vnd_frameio_read and vnd_frameio_write return -1. The
+vnd and system error numbers are updated and available via
+vnd_errno(3VND) and vnd_syserrno(3VND). See ERRORS below for a list of
+errors and their meaning.
+
+
+.SH ERRORS
+.LP
+The functions vnd_frameio_read and vnd_frameio_write always set the
+vnd error to VND_E_SYS. The following system errors will be
+encountered:
+
+.sp
+.ne 2
+.na
+EAGAIN
+.ad
+.RS 10n
+Insufficient system memory was available for the operation.
+.sp
+Non-blocking mode was enabled and during the call to vnd_frameio_read,
+no data was available. Non-blocking mode was enabled and during the call
+to vnd_frameio_write, insufficient buffer space was available.
+.RE
+
+.sp
+.ne 2
+.na
+ENXIO
+.ad
+.RS 10n
+The vnd device referred to by vhp is not currently attached to an
+underlying data link and cannot send data.
+.RE
+
+.sp
+.ne 2
+.na
+EFAULT
+.ad
+.RS 10n
+The fiop argument points to an illegal address or the fv_buf members of
+the framevec_t's associated with the fiop member fio_vecs point to
+illegal addresses.
+.RE
+
+.sp
+.ne 2
+.na
+EINVAL
+.ad
+.RS 10n
+The fio_version member of fiop was unknown, the number of vectors
+specified by fio_nvecs is zero or greater than FRAMEIO_NVECS_MAX,
+fio_nvpf equals zero, fio_nvecs is not evenly divisible by fio_nvpf, or
+a buffer in fio_vecs[] has set fv_buf or fv_buflen to zero.
+.RE
+
+
+.sp
+.ne 2
+.na
+EINTR
+.ad
+.RS 10n
+A signal was caught during vnd_frameio_read or vnd_frameio_write, and no
+data was transferred.
+.RE
+
+
+.sp
+.ne 2
+.na
+EOVERFLOW
+.ad
+.RS 10n
+During vnd_frameio_read, the size of a frame specified by fiop->fio_nvpf
+and fiop->fio_vecs[].fv_buflen cannot contain a frame.
+.sp
+In a ILP32 environment, more data than UINT_MAX would be set in
+fv_actlen.
+.RE
+
+
+.sp
+.ne 2
+.na
+ERANGE
+.ad
+.RS 10n
+During vnd_frameio_write, the size of a frame is less than the device's
+minimum transmission unit or it is larger than the size of the maximum
+transmission unit.
+.RE
+
+
+.SH EXAMPLES
+
+.LP
+Example 1 Read a single frame with a single vector
+
+.sp
+.LP
+The following sample C program opens an existing vnd device named
+"vnd0" in the current zone and performs a blocking read of a single
+frame from it.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i;
+ frameio_t *fiop;
+
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 1;
+ fiop->fio_vecs[0].fv_buf = malloc(1518);
+ fiop->fio_vecs[0].fv_buflen = 1518;
+ if (fiop->fio_vecs[0].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ free(fiop);
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+ if (frameio_read(vhp, fiop) != 0) {
+ vnd_errno_t vnderr = vnd_errno(vhp);
+ int syserr = vnd_syserrno(vhp);
+
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+
+ /* Consume the data however it's desired */
+ (void) printf("received %d bytes\n", fiop->fio_vecs[0].fv_actlen);
+ for (i = 0; i < fiop->fio_vecs[0].fv_actlen)
+ (void) printf("%x ", fiop->fio_vecs[0].fv_buf[i]);
+
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(viop);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 2 Write a single frame with a single vector
+.sp
+.LP
+The following sample C program opens an existing vnd device named
+"vnd0" in the current zone and performs a blocking write of a single
+frame to it.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <string.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ frameio_t *fiop;
+
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 1;
+ fiop->fio_vecs[0].fv_buf = malloc(1518);
+ if (fiop->fio_vecs[0].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ free(fiop);
+ return (1);
+ }
+
+ /*
+ * Fill in your data however you desire. This is an entirely
+ * invalid frame and while the frameio write may succeed, the
+ * networking stack will almost certainly drop it.
+ */
+ (void) memset(fiop->fio_vecs[0].fv_buf, 'r', 1518);
+ fiop->fio_vecs[0].fv_buflen = 1518;
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+ if (frameio_write(vhp, fiop) != 0) {
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to write: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to write: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+
+ (void) printf("wrote %d bytes\n", fiop->fio_vecs[0].fv_actlen);
+
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(viop);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 3 Read frames comprised of multiple vectors
+.sp
+.LP
+The following sample C program is similar to example 1, except instead
+of reading a single frame consisting of a single vector it reads
+multiple frames consisting of two vectors. The first vector has room for
+an 18 byte VLAN enabled Ethernet header and the second vector has room
+for a 1500 byte payload.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i, nframes;
+ frameio_t *fiop;
+
+ /* Allocate enough framevec_t's for 5 frames */
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t) * 10);
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 2;
+ fiop->fio_nvecs = 10;
+ for (i = 0; i < 10; i += 2) {
+ fiop->fio_vecs[i].fv_buf = malloc(18);
+ fiop->fio_vecs[i].fv_buflen = 18;
+ if (fiop->fio_vecs[i].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+ fiop->fio_vecs[i+1].fv_buf = malloc(1500);
+ fiop->fio_vecs[i+1].fv_buflen = 1500;
+ if (fiop->fio_vecs[i+1].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+
+ if (frameio_read(vhp, fiop) != 0) {
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+
+ /* Consume the data however it's desired */
+ nframes = fiop->fio_nvecs / fiop->fio_nvpf;
+ (void) printf("consumed %d frames!\n", nframes);
+ for (i = 0; i < nframes; i++) {
+ (void) printf("received %d bytes of Ethernet Header\n",
+ fiop->fio_vecs[i].fv_actlen);
+ (void) printf("received %d bytes of payload\n",
+ fiop->fio_vecs[i+1].fv_actlen);
+ }
+
+ vnd_close(vhp);
+ /* Do proper memory cleanup */
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 4 Perform non-blocking reads of multiple frames with a
+single vector
+.sp
+.LP
+In this sample C program, opens an existing vnd device named "vnd0" in
+the current zone, ensures that it is in non-blocking mode, and uses
+event ports to do device reads.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <port.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/tpyes.h>
+#include <fcntl.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i, nframes, port, vfd;
+ frameio_t *fiop;
+
+ port = port_create();
+ if (port < 0) {
+ perror("port_create");
+ return (1);
+ }
+ /* Allocate enough framevec_t's for 10 frames */
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t) * 10);
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ (void) close(port);
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 10;
+ for (i = 0; i < 10; i++) {
+ fiop->fio_vecs[i].fv_buf = malloc(1518);
+ fiop->fio_vecs[i].fv_buflen = 1518;
+ if (fiop->fio_vecs[i].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+ vfd = vnd_pollfd(vhp);
+ if (fcntl(fd, F_SETFL, O_NONBLOCK) != 0) {
+ (void) fprintf(stderr, "failed to enable non-blocking mode: %s",
+ strerrror(errno));
+ }
+
+ for (;;) {
+ port_event_t pe;
+
+ if (port_associate(port, PORT_SOURCE_FD, vfd, POLLIN,
+ vhp) != 0) {
+ perror("port_associate");
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ if (port_get(port, &pe, NULL) != 0) {
+ if (errno == EINTR)
+ continue;
+ perror("port_associate");
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ /*
+ * Most real applications will need to compare the file
+ * descriptor and switch on it. In this case, assume
+ * that the fd in question that is readable is 'vfd'.
+ */
+ if (frameio_read(pe.portev_user, fiop) != 0) {
+ vnd_errno_t vnderr = vnd_errno(vhp);
+ int syserr = vnd_syserrno(vhp);
+
+ if (vnderr == VND_E_SYS && (syserr == EINTR ||
+ syserr == EAGAIN))
+ continue;
+ (void) fprintf(stderr, "failed to get read: %s",
+ vnd_strsyserror(vnderr));
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ /* Consume the data however it's desired */
+ nframes = fiop->fio_nvecs / fiop->fio_nvpf;
+ for (i = 0; i < nframes; i++) {
+ (void) printf("frame %d is %d bytes large\n", i,
+ fiop->fio_vecs[i].fv_actlen);
+ }
+
+ }
+
+ vnd_close(vhp);
+ /* Do proper memory cleanup */
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+
+.SH SEE ALSO
+
+Intro(2), getmsg(2), read(2), readv(2), write(2), writev(2),
+libvnd(3VND), vnd_errno(3VND), vnd_pollfd(3VND), vnd_syserrno(3VND),
+iovec(9S)
diff --git a/usr/src/man/man3vnd/vnd_pollfd.3vnd b/usr/src/man/man3vnd/vnd_pollfd.3vnd
new file mode 100644
index 0000000000..500d3bac99
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_pollfd.3vnd
@@ -0,0 +1,155 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_POLLFD 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_pollfd \- get file descriptor for polling
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_pollfd(vnd_handle_t *vhp);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_pollfd() function returns an integer id which corresponds to
+the file descriptor that represents the underlying device that is
+associated with the vnd handle vhp. This file descriptor is suitable
+for use with port_associate(3C) and similar polling techniques such as
+poll(2). Use of the file descriptor outside of these uses may cause
+undocumented behavior from the rest of the library.
+
+.LP
+The file descriptor in question is still managed by libvnd. The caller
+must not call close(2) on it. Once vnd_close(3VND) has been called,
+any further use of the file descriptor is undefined behavior.
+
+
+.SH RETURN VALUES
+.LP
+The function returns the integer id of the file descriptor that
+corresponds to the underlying vnd device.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Use event ports for vnd notifications
+.sp
+.LP
+The following sample C program shows how to use the vnd_pollfd
+function with event ports to be notified whenever there is data
+available to be read. This program assumes that a vnd device named
+"vnd0" exists in the current zone. For an example of creating the
+device, see Example 1 in vnd_create(3VND).
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <port.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int port, syserr, vfd, ret;
+
+ port = port_create();
+ if (port < 0) {
+ perror("port_create");
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp == NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ (void) close(port);
+ return (1);
+ }
+
+ vfd = vnd_pollfd(vhp);
+ if (fcntl(vfd, F_SETFL, O_NONBLOCK) != 0) {
+ perror("fcntl");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ if (port_associate(port, PORT_SOURCE_FD, vfd, POLLIN, NULL) != 0) {
+ perror("port_associate");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ for (;;) {
+ port_event_t pe;
+
+
+ if (port_get(port, &pe, NULL) != 0) {
+ if (errno == EINTR)
+ continue;
+ perror("port_get");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ /*
+ * Read the data with vnd_frameio_read(3VND) and
+ * optionally break out of the loop or continue to the
+ * next iteration and reassociate vfd with the event
+ * port.
+ */
+ }
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+
+close(2), poll(2), port_create(3C), libvnd(3LIB), vnd_close(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_get.3vnd b/usr/src/man/man3vnd/vnd_prop_get.3vnd
new file mode 100644
index 0000000000..e47698c85e
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_get.3vnd
@@ -0,0 +1,242 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_GET 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_get, vnd_prop_set \- get and set vnd properties
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_prop_get(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len);
+
+int vnd_prop_set(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_get and vnd_prop_set functions are used to retrieve
+and set property values on the vnd_handle_t referred to by vhp. The
+property to get or set is specified by the argument prop. The
+argument buf and the size of buf, in len, should be a pointer to the
+appropriate structure for the property as defined in libvnd(3LIB).
+
+.LP
+All of the supported properties are listed and described in the
+libvnd(3LIB) manual page.
+
+
+.SH RETURN VALUES
+.LP
+On success, the vnd_prop_get and vnd_prop_set functions return zero.
+On failure, they return -1 and additional error information is
+available through vnd_errno(3VND) and vnd_syserrno(3VND).
+
+.LP
+When vnd_prop_get returns successfully, the contents of buf are
+filled in with the value of the corresponding property. The contents
+of buf should not change across a call to vnd_prop_set.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Getting the value of the rxbuf property
+.LP
+The following sample C program retrieves the value of the
+rxbuf property and prints it to standard out.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_get(vhp, VND_PROP_RXBUF, &vpn, sizeof (vpn)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to get VND_PROP_RXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to get VND_PROP_RXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("recieve buffer size is %d bytes\n", vpb.vpb_size);
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+EXAMPLE 2 Setting a property
+.LP
+This sample C program sets the property VND_PROP_RXBUF to the value of
+4200 bytes.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ vpb.vpb_size = 4200;
+ if (vnd_prop_set(vhp, VND_PROP_RXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to set VND_PROP_RXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to set VND_PROP_RXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully set VND_PROP_RXBUF to 4200\n");
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 3 Setting a property to the value of another.
+.LP
+In this sample C program, we set the VND_PROP_TXBUF to the maximum
+allowable size as determined by the read-only property VND_PROP_MAXBUF.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_get(vhp, VND_PROP_MAXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to get VND_PROP_MAXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to get VND_PROP_MAXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_set(vhp, VND_PROP_TXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to set VND_PROP_TXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to set VND_PROP_TXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully set VND_PROP_TXBUF to %d\n", vpb.vpb_size);
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+libvnd(3VND), vnd_errno(3VND, vnd_syserrno(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_iter.3vnd b/usr/src/man/man3vnd/vnd_prop_iter.3vnd
new file mode 100644
index 0000000000..29221734c5
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_iter.3vnd
@@ -0,0 +1,148 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_ITER 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_iter \- iterate vnd properties
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+typedef int (vnd_prop_iter_f)(vnd_handle_t *vhp, vnd_prop_t prop,
+ void *cbarg);
+
+int vnd_prop_iter(vnd_handle_t *vhp, vnd_prop_iter_f cb,
+ void *arg);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_iter function iterates over all the available properties
+for the vnd handle vhp and calls the user supplied callback function
+cb. The argument arg is passed directly to the callback function.
+
+.LP
+The function specified by cb receives three arguments. The first, vhp,
+is the same vnd library handle that was passed to vnd_prop_iter. During
+the callback, the consumer should not call vnd_close(3VND). Doing so
+will lead to undefined and undocumented behavior. The second argument,
+prop, is the current property. While vnd_prop_iter guarantees that all
+properties will be recieved, it does not guarantee the order of them.
+The final argument, cbarg, is the same argument that the caller passed
+in during arg.
+
+.LP
+The return value of the callback function cb indicates whether or not
+property iteration should continue. To continue iteration, the
+function cb should return zero. Otherwise, to stop property iteration
+it should return non-zero.
+
+.SH RETURN VALUES
+
+.LP
+On success, the function vnd_prop_iter returns zero. If the callback
+function returned non-zero to terminate iteration, vnd_prop_iter will
+instead return one. In the case of library failure, vnd_prop_iter will
+return -1. In such cases, the vnd and system errors will be updated
+and available via vnd_errno(3VND) and vnd_syserrno(3VND).
+
+.SH EXAMPLES
+
+.LP
+Example 1 Print writeable properties
+
+.LP
+The following sample C program walks over every vnd property and
+prints out whether the property is read-only or read-write for the
+vnd device "vnd1" in the current zone.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+static int
+print_prop(vnd_handle_t *vhp, vnd_prop_t prop, void *unused)
+{
+ boolean_t canwrite;
+
+ if (vnd_prop_writeable(vhp, &canwrite) != 0)
+ abort();
+
+ (void) printf("prop %d is %s", prop, canwrite == B_TRUE ? "rw" : "r-");
+ return (0);
+}
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_iter(vhp, print_prop, NULL) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+libvnd(3LIB), vnd_close(3VND), vnd_errno(3VND), vnd_syserrno(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_writeable.3vnd b/usr/src/man/man3vnd/vnd_prop_writeable.3vnd
new file mode 100644
index 0000000000..c23414718b
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_writeable.3vnd
@@ -0,0 +1,101 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_WRITEABLE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_writeable \- determine if a vnd property can be updated
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_prop_writeable(vnd_prop_t prop, boolean_t *wp);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_writeable function is used as a programmatic means to
+determine whether a given vnd property is writeable or not. The
+property to check is specified in prop and should be from the list
+described in libvnd(3VND). The argument wp is a pointer to a boolean_t
+which will be updated upon the successful completion of the function.
+The argument wp must be a valid pointer. If a property is writeable
+than the value pointed to by wp is set to B_TRUE. If the property is
+read-only, then the value is set to B_FALSE.
+
+
+.SH RETURN VALUES
+.LP
+On success, vnd_prop_writeable returns zero and the value pointed to
+by wp is updated with whether the property is writeable. If the
+property prop does not exist, then vnd_prop_writeable will return -1.
+
+.SH EXAMPLES
+.LP
+Example 1 Check whether the property VND_PROP_TXBUF is writable
+.LP
+The following sample C program checks whether the vnd property
+VND_PROP_TXBUF is writeable or not.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(void)
+{
+ boolean_t canwrite;
+
+ if (vnd_prop_writeable(VND_PROP_TXBUF, &prop) != 0)
+ abort();
+
+ if (canwrite == B_TRUE)
+ (void) printf("VND_PROP_TXBUF is writeable\n");
+ else
+ (void) printf("VND_PROP_TXBUF is read only\n");
+
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level MT-Safe
+.TE
+
+.SH SEE ALSO
+
+vndadm(1M), libvnd(3VND)
diff --git a/usr/src/man/man3vnd/vnd_walk.3vnd b/usr/src/man/man3vnd/vnd_walk.3vnd
new file mode 100644
index 0000000000..bed53130d3
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_walk.3vnd
@@ -0,0 +1,155 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_CREATE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_walk \- walk all vnd devices
+
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+typedef int (*vnd_walk_cb_f)(vnd_info_t *viip, void *cbarg);
+
+int vnd_walk(vnd_walk_cb_t cb, void *arg, vnd_errno_t *vnderr, int *syserr);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+The vnd_walk() function fires the callback function cb once for every
+vnd device that is visible in the current zone. If the caller is in
+the global zone, then all vnd devices in all zones will be walked. If
+the caller is in a non-global zone, then only the devices in that zone
+will be visible.
+
+.LP
+The function cb will be called with two arguments. The first argument,
+viip, is a pointer to a structure that contains information about the
+link. The second argument to the function cb, cbarg, is the same
+argument that is passed to the function vnd_walk as arg. To continue
+the function cb should return zero. If the function cb returns
+non-zero the walk will terminate.
+
+.LP
+As the vnd_walk function does not have a handle, errors are returned
+in vnderr and syserr. Both vnderr and syserr are allowed to be NULL
+pointers. If either one is a NULL pointer, then error information for
+that class of error will not be returned. It is not recommended that
+consumers supply NULL pointers.
+
+.LP
+The vnd_info_t structure contains the following members:
+
+.in +2
+.nf
+uint32_t vi_version
+zoneid_t vi_zone
+char vi_name[LIBVND_NAMELEN];
+char vi_datalink[LIBVND_NAMELEN];
+.fi
+.in -2
+
+.LP
+The member vi_version is guaranteed to be the first member of the
+structure. This number indicates the current revision of the structure
+and is set to the integer value 1. More properties may be added in
+future releases. Those properties will be tied to a greater version
+number so software knows whether or not it is legal to access them.
+
+.LP
+The vi_zone field indicates the zone id that the vnd device exists in.
+The vi_name field is the name of the vnd device. If the vnd_device is
+not linked, the name field is set to "<unknown>". The vi_datalink
+field is filled in with the name of the data link the vnd device is on
+top of.
+
+
+.SH RETURN VALUES
+
+.LP
+The vnd_walk function will return zero on success. If the consumer
+supplied callback function returned non-zero, then the vnd_walk
+function will return 1. If an error occurred, -1 is returned, and if
+vnderr and syserr are non-null, they are filled in with their
+respective error values. See vnd_errno(3VND) for more information on
+these errors.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Walk all devices and print information about them
+
+.LP
+The following sample C program walks every vnd device and prints out
+information about them.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+static int
+print_entry(vnd_info_t *viip, void *unused)
+{
+ (void) printf("device %s over data link %s in zone %d\n",
+ viip->vi_name, viip->vi_datalink, viip->vi_zone);
+ return (0);
+}
+
+int
+main(void)
+{
+ vnd_errno_t vnderr;
+ int syserr;
+
+ if (vnd_walk(print_entry, NULL, &vnderr, &syserr) != 0) {
+ (void) fprintf(stderr, "failed to walk vnd devices: %s\n",
+ vnderr != VND_E_SYS ? vnd_strerror(vnderr) :
+ vnd_strsyserror(syserr));
+ return (1);
+ }
+
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level MT-Safe
+.TE
+
+.SH SEE ALSO
+
+libvnd(3VND), vnd_errno(3VND), attributes(5), zones(5)
diff --git a/usr/src/man/man3xnet/Makefile b/usr/src/man/man3xnet/Makefile
index 81cd96cba3..cc231f60c4 100644
--- a/usr/src/man/man3xnet/Makefile
+++ b/usr/src/man/man3xnet/Makefile
@@ -62,6 +62,7 @@ MANLINKS= getaddrinfo.3xnet \
getservbyname.3xnet \
getservbyport.3xnet \
getservent.3xnet \
+ htonll.3xnet \
htons.3xnet \
if_freenameindex.3xnet \
if_indextoname.3xnet \
@@ -73,6 +74,7 @@ MANLINKS= getaddrinfo.3xnet \
inet_ntoa.3xnet \
inet_pton.3xnet \
ntohl.3xnet \
+ ntohll.3xnet \
ntohs.3xnet \
sethostent.3xnet \
setnetent.3xnet \
@@ -97,6 +99,7 @@ getservbyname.3xnet := LINKSRC = endservent.3xnet
getservbyport.3xnet := LINKSRC = endservent.3xnet
getservent.3xnet := LINKSRC = endservent.3xnet
+htonll.3xnet := LINKSRC = htonl.3xnet
htons.3xnet := LINKSRC = htonl.3xnet
if_freenameindex.3xnet := LINKSRC = if_nametoindex.3xnet
@@ -112,6 +115,7 @@ inet_ntoa.3xnet := LINKSRC = inet_addr.3xnet
inet_pton.3xnet := LINKSRC = inet_ntop.3xnet
ntohl.3xnet := LINKSRC = htonl.3xnet
+ntohll.3xnet := LINKSRC = htonl.3xnet
ntohs.3xnet := LINKSRC = htonl.3xnet
sethostent.3xnet := LINKSRC = endhostent.3xnet
diff --git a/usr/src/man/man3xnet/htonl.3xnet b/usr/src/man/man3xnet/htonl.3xnet
index aa8470a28e..dda4586eb7 100644
--- a/usr/src/man/man3xnet/htonl.3xnet
+++ b/usr/src/man/man3xnet/htonl.3xnet
@@ -7,7 +7,8 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH HTONL 3XNET "Jun 10, 2002"
+.\" Portions Copyright (c) 2012 Joyent, Inc. All rights reserved.
+.TH HTONL 3XNET "Jan 03, 2013"
.SH NAME
htonl, htons, ntohl, ntohs \- convert values between host and network byte
order
@@ -22,6 +23,11 @@ order
.LP
.nf
+\fBuint64_t\fR \fBhtonll\fR(\fBuint64_t\fR \fIhostlonglong\fR);
+.fi
+
+.LP
+.nf
\fBuint16_t\fR \fBhtons\fR(\fBuint16_t\fR \fIhostshort\fR);
.fi
@@ -32,18 +38,23 @@ order
.LP
.nf
+\fBuint64_t\fR \fBntohll\fR(\fBuint64_t\fR \fInetlonglong\fR);
+.fi
+
+.LP
+.nf
\fBuint16_t\fR \fBntohs\fR(\fBuint16_t\fR \fI netshort\fR);
.fi
.SH DESCRIPTION
.sp
.LP
-These functions convert 16-bit and 32-bit quantities between network byte order
-and host byte order.
+These functions convert 16-bit, 32-bit, and 64-bit quantities between network
+byte order and host byte order.
.sp
.LP
-The \fBuint32_t\fR and \fBuint16_t\fR types are made available by inclusion
-of \fB<inttypes.h>\fR\&.
+The \fBuint32_t\fR, \fBuint16_t\fR, and \fBuint64_t\fR types are made available
+by inclusion of \fB<inttypes.h>\fR\&.
.SH USAGE
.sp
.LP
@@ -56,12 +67,12 @@ value of their argument.
.SH RETURN VALUES
.sp
.LP
-The \fBhtonl()\fR and \fBhtons()\fR functions return the argument value
-converted from host to network byte order.
+The \fBhtonl()\fR, \fBhtonll()\fR, and \fBhtons()\fR functions return the
+argument value converted from host to network byte order.
.sp
.LP
-The \fBntohl()\fR and \fBntohs()\fR functions return the argument value
-converted from network to host byte order.
+The \fBntohl()\fR, \fBntohll()\fR, and \fBntohs()\fR functions return the
+argument value converted from network to host byte order.
.SH ERRORS
.sp
.LP
diff --git a/usr/src/man/man4/proc.4 b/usr/src/man/man4/proc.4
index 20f2089f5e..63568928f4 100644
--- a/usr/src/man/man4/proc.4
+++ b/usr/src/man/man4/proc.4
@@ -1,11 +1,11 @@
'\" te
.\" Copyright 1989 AT&T
.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright (c) 2013, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PROC 4 "Mar 31, 2013"
+.TH PROC 4 "May 19, 2014"
.SH NAME
proc \- /proc, the process file system
.SH DESCRIPTION
@@ -930,6 +930,15 @@ migrate to checking \fBPR_ISSYS\fR in the \fBpstatus\fR structure's
0x8000). \fBpr_pctcpu\fR is the summation over all lwps in the process.
.sp
.LP
+The \fBpr_fname\fR and \fBpr_psargs\fR are writable by the owner of the
+process. To write to them, the \fBpsinfo\fR file should be open for writing
+and the desired value for the field should be written at the file offset
+that corresponds to the member of structure. No other entry may be written
+to; if a write is attempted to an offset that does not represent one of
+these two memers, or if the size of the write is not exactly the size of
+the member being written, no bytes will be written and zero will be returned.
+.sp
+.LP
\fBpr_lwp\fR contains the \fBps\fR(1) information for the representative lwp.
If the process is a \fIzombie\fR, \fBpr_nlwp\fR, \fBpr_nzomb\fR, and
\fBpr_lwp.pr_lwpid\fR are zero and the other fields of \fBpr_lwp\fR are
diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile
index 84d3f6aab6..466d5c0f16 100644
--- a/usr/src/man/man5/Makefile
+++ b/usr/src/man/man5/Makefile
@@ -14,6 +14,7 @@
# Copyright (c) 2012 by Delphix. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
include $(SRC)/Makefile.master
@@ -41,6 +42,7 @@ MANFILES= Intro.5 \
device_clean.5 \
dhcp.5 \
environ.5 \
+ epoll.5 \
eqn.5 \
eqnchar.5 \
extendedFILE.5 \
@@ -55,6 +57,7 @@ MANFILES= Intro.5 \
iconv_unicode.5 \
ieee802.3.5 \
ieee802.11.5 \
+ inotify.5 \
ipfilter.5 \
isalist.5 \
kerberos.5 \
diff --git a/usr/src/man/man5/epoll.5 b/usr/src/man/man5/epoll.5
new file mode 100644
index 0000000000..94314861d9
--- /dev/null
+++ b/usr/src/man/man5/epoll.5
@@ -0,0 +1,113 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH EPOLL 5 "Apr 17, 2014"
+.SH NAME
+epoll \- Linux-compatible I/O event notification facility
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/epoll.h>
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+
+\fBepoll\fR is a facility for efficient event-oriented I/O that has a
+similar model to \fBpoll\fR(2), but does not necessitate rescanning a
+set of file descriptors to wait for an event. \fBepoll\fR is of Linux
+origins, and this facility is designed to be binary-compatible with
+the Linux facility, including the following interfaces:
+
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBepoll_create\fR(3C) creates an \fBepoll\fR instance, returning a file
+descriptor. It contains a size arugment which is meaningful only in as
+much as it cannot be 0.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBepoll_create1\fR(3C) also creates an \fBepoll\fR instance, but eliminates
+the meaningless size argument -- replacing it instead with a flags
+argument.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBepoll_ctl\fR(3C) allows file descriptors to be added
+(via \fBEPOLL_CTL_ADD\fR), deleted (via \fBEPOLL_CTL_DEL\fR) or
+modified (via \fBEPOLL_CTL_MOD\fR) with respect to the \fBepoll\fR'd set
+of file descriptors.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBepoll_wait\fR(3C) fetches pending events for file descriptors added
+via \fBepoll_ctl\fR(3C), blocking the caller if no such events are pending.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBepoll_pwait\fR(3C) opeates in a similar manner to \fBepoll_wait\fR(3C), but
+allows the caller to specify a signal mask to be set atomically with respect
+to waiting for events.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+The \fBepoll\fR facility is implemented
+for purposes of offering compatibility to and portability of Linux-borne
+applications; native applications should continue to prefer using event ports
+via the \fBport_create\fR(3C),
+\fBport_associate\fR(3C) and \fBport_getn\fR(3C) interfaces.
+In particular, use of \fBepoll\fR in a multithreaded environment is fraught
+with peril; even when using \fBEPOLLONESHOT\fR for one-shot events,
+there are race conditions with respect to \fBclose\fR(2) that are unresolvable.
+(For more details, see the aborted effort in Linux to resolve this via the
+proposed
+\fBEPOLL_CTL_DISABLE\fR operation.)
+The event port facility -- like the BSD kqueue facility that inspired it --
+is designed to deal with such issues via explicit event source dissociation.
+
+While a best effort has been made to mimic the Linux semantics, there
+are some semantics that are too peculiar or ill-conceived to merit
+accommodation. In particular, the Linux \fBepoll\fR facility will -- by
+design -- continue to generate events for closed file descriptors where/when
+the underlying file description remains open. For example, if one were
+to \fBfork\fR(2) and subsequently close an actively \fBepoll\fR'd file
+descriptor in the parent,
+any events generated in the child on the implicitly duplicated file descriptor
+will continue to be delivered to the parent -- despite the fact that the
+parent itself no longer has any notion of the file description!
+This \fBepoll\fR facility refuses to honor
+these semantics; closing the \fBEPOLL_CTL_ADD\fR'd file descriptor
+will always result in no further
+events being generated for that event description.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBepoll_create\fR(3C), \fBepoll_create1\fR(3C), \fBepoll_ctl\fR(3C),
+\fBepoll_wait\fR(3C), \fBepoll_pwait\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_dissociate\fR(3C),
+\fBport_get\fR(3C),
+\fBpselect\fR(3C)
diff --git a/usr/src/man/man5/inotify.5 b/usr/src/man/man5/inotify.5
new file mode 100644
index 0000000000..810e889d74
--- /dev/null
+++ b/usr/src/man/man5/inotify.5
@@ -0,0 +1,305 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY 5 "Sep 17, 2014"
+.SH NAME
+inotify \- Linux-compatible file event notification facility
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+
+\fBinotify\fR is a facility for receiving file system events on specified
+files or directories. When monitoring a directory, \fBinotify\fR can be
+used to retrieve events not only on the directory, but also on any files
+that the directory contains. \fBinotify\fR originated with Linux, and
+this facility is designed to be binary-compatible with the Linux facility,
+including the following interfaces:
+
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_init\fR(3C) creates an \fBinotify\fR instance, returning a file
+descriptor associated with the in-kernel event queue.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_init1\fR(3C) also creates an \fBinotify\fR instance, but allows
+for a flags argument that controls some attributes of the returned file
+descriptor.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_add_watch\fR(3C) allows a watch of a particular file or directory
+to be added to a watch list associated with the specified \fBinotify\fR
+instance. \fBinotify_add_watch\fR(3C) returns a watch descriptor that will
+be reflected in the \fIwd\fR member of the \fIinotify_event\fR structure
+returned via a \fBread\fR(2) of the instance.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_rm_watch\fR(3C) removes the watch that corresponds to the specified
+watch descriptor.
+.RE
+
+When all file descriptors referring to a particular \fBinotify\fR instance
+are closed, the instance and all watches associated with that instance are
+freed.
+
+To consume events on an \fBinotify\fR instance, an application should
+issue a \fBread\fR(2) to the instance. If no events are available
+(and the \fBinotify\fR instance has not been explicitly made non-blocking
+via \fBinotify_init1\fR(3C)) the \fBread\fR(2) will block until a
+watched event occurs. If and when events are available, \fBread\fR(2) will
+return an array of the following structures:
+
+.sp
+.in +2
+.nf
+struct inotify_event {
+ int wd; /* watch descriptor */
+ uint32_t mask; /* mask of event */
+ uint32_t cookie; /* cookie for associating renames */
+ uint32_t len; /* size of name field */
+ char name[]; /* optional name */
+};
+.fi
+.in -2
+
+\fIwd\fR contains the watch descriptor that corresponds to the event,
+as returned by \fBinotify_add_watch\fR(3C).
+
+\fImask\fR is a bitwise \fBOR\fR of event masks (see below) that
+describes the event.
+
+\fIcookie\fR is an opaque value that can be used to associate different
+events into a single logical event. In particular, it allows consumers to
+associate \fBIN_MOVED_FROM\fR events with subsequent \fBIN_MOVED_TO\fR
+events.
+
+\fIlen\fR denotes the length of the \fIname\fR field, including any padding
+required for trailing null bytes and alignment. The size of the entire
+event is therefore the size of the \fIinotify_event\fR structure plus the
+value of \fIlen\fR.
+
+\fIname\fR contains the name of the file associated with the event, if any.
+This field is only present when the watched entity is a directory and
+the event corresponds to a file that was contained by the watched directory
+(though see \fBNOTES\fR and \fBWARNINGS\fR for details and limitations).
+When present, \fIname\fR is null terminated, and may contain additional
+zero bytes
+to pad for alignment. (The length of this field -- including any bytes
+for alignment -- is denoted by the \fIlen\fR field.)
+
+.SS "Events"
+
+The events that can be generated on a watched entity are as follows:
+
+.sp
+.in +2
+.TS
+c c
+l l .
+\fIEvent\fR \fIDescription\fR
+\fBIN_ACCESS\fR File/directory was accessed
+\fBIN_ATTRIB\fR File/directory attributes were changed
+\fBIN_CLOSE_WRITE\fR File/directory opened for writing was closed
+\fBIN_CLOSE_NOWRITE\fR File/directory not opened for writing was closed
+\fBIN_CREATE\fR File/directory created in watched directory
+\fBIN_DELETE\fR File/directory deleted from watched directory
+\fBIN_DELETE_SELF\fR Watched file/directory was deleted
+\fBIN_MODIFY\fR File/directory was modified
+\fBIN_MODIFY_SELF\fR Watched file/directory was modified
+\fBIN_MOVED_FROM\fR File was renamed from entity in watched directory
+\fBIN_MOVED_TO\fR File was renamed to entity in watched directory
+\fBIN_OPEN\fR File/directory was opened
+.TE
+.in -2
+
+Of these, all events except \fBIN_MOVE_SELF\fR and \fBIN_DELETE_SELF\fR
+can refer to either the watched entity or (if the watched entity
+is a directory) a file or directory contained by the watched directory.
+(See \fBNOTES\fR and \fBWARNINGS\fR, below for details on this
+mechanism and its limitations.)
+If the event corresponds to a contained entity,
+\fIname\fR will be set to the name of the affected
+entity.
+
+In addition to speciyfing events of interest, watched events may
+be modified by potentially setting any of the following when adding a
+watch via \fBinotify_add_watch\fR(3C):
+
+.sp
+.ne 2
+.na
+\fBIN_DONT_FOLLOW\fR
+.ad
+.RS 12n
+Don't follow the specified pathname if it is a symbolic link.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_EXCL_UNLINK\fR
+.ad
+.RS 12n
+If watching a directory and a contained entity becomes unlinked, cease
+generating events for that entity. (By default, contained entities will
+continue to generate events on their former parent directory.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_MASK_ADD\fR
+.ad
+.RS 12n
+If the specified pathname is already being watched, the specified events
+will be added to the watched events instead of the default behavior of
+replacing them. (If one
+may forgive the editorializing, this particular interface gewgaw
+seems entirely superfluous, and a canonical example of
+feasibility trumping wisdom.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ONESHOT\fR
+.ad
+.RS 12n
+Once an event has been generated for the watched entity, remove the
+watch from the watch list as if \fBinotify_rm_watch\fR(3C) had been called
+on it (thereby inducing an \fBIN_IGNORED\fR event).
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ONLYDIR\fR
+.ad
+.RS 12n
+Only watch the specified pathname if it is a directory.
+.RE
+
+In addition to the specified events, the following bits may be specified
+in the \fImask\fR field as returned from \fBread\fR(2):
+
+.sp
+.ne 2
+.na
+\fBIN_IGNORED\fR
+.ad
+.RS 12n
+A watch was removed explicitly (i.e, via \fBinotify_rm_watch\fR(3C)) or
+implicitly (e.g., because \fBIN_ONESHOT\fR was set or because the watched
+entity was deleted).
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ISDIR\fR
+.ad
+.RS 12n
+The entity inducing the event is a directory.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_Q_OVERFLOW\fR
+.ad
+.RS 12n
+The event queue exceeded the maximum event queue length per instance.
+(By default, this is 16384, but it can be tuned by setting
+\fBinotify_maxevents\fR via \fB/etc/system\fR.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_UNMOUNT\fR
+.ad
+.RS 12n
+The filesystem containing the watched entity was unmounted.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+\fBinotify\fR instances can be monitored via \fBpoll\fR(2),
+\fBport_get\fR(3C), \fBepoll\fR(5), etc.
+
+The event queue associated with an \fBinotify\fR instance is serialized
+and ordered: events will be placed on the tail of the queue in the order
+that they occur.
+
+If at the time an event occurs the tail of the event queue is identical
+to the newly received event, the newly received event will be dropped,
+effectively coalescing the two events.
+
+When watching a directory and receieving events on contained elements
+(i.e., a contained file or subdirectory), note that the information
+received in the \fIname\fR field may be stale: the file may have been
+renamed between the event and its processing. If a file has been unlinked
+(and if \fBIN_EXCL_UNLINK\fR has not been set),
+the \fIname\fR will reflect the last name that resolved to the file.
+If a new file is created in the same directory with the old name, events
+on the new file and the old (unlinked) file will become undistinguishable.
+
+The number of bytes that are available to be read on an \fBinotify\fR
+instance can be determined via a \fBFIONREAD\fR \fBioctl\fR(2).
+
+.sp
+.SH WARNINGS
+.sp
+.LP
+
+While a best effort has been made to mimic the Linux semantics, there
+remains a fundamental difference with respect to hard links: on Linux,
+if a file has multiple hard links to it, a notification on watched
+directory or file will be received if and only if that event was received
+via a watched directory or file. For events that are induced by open files
+(such as \fBIN_MODIFY\fR), these semantics seem peculiar: the watched
+file is in fact changing, but because it is not changing via the watched
+path, no notification is received. By contrast, the implementation here
+will always yield an event in this case -- even if the event was induced
+by an \fBopen\fR(2) via an unwatched path. If an event occurs within a
+watched directory on a file for which there exist multiple hard links within
+the same (watched) directory, the event's \fIname\fR will correspond to one
+of the links to the file. If multiple hard links exist to the
+same file in the same watched directory and one of the links is removed,
+notifications may not necessarily continue to be received for the file,
+despite the (remaining) link in the watched directory; users of
+\fBinotify\fR should exercise extreme caution when watching directories
+that contain files with multiple hard links in the same directory.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C), \fBinotify_add_watch\fR(3C),
+\fBinotify_rm_watch\fR(3C), \fBport_get\fR(3C), \fBepoll\fR(5)
diff --git a/usr/src/man/man5/privileges.5 b/usr/src/man/man5/privileges.5
index 6cbf5277fb..7fc9c00f45 100644
--- a/usr/src/man/man5/privileges.5
+++ b/usr/src/man/man5/privileges.5
@@ -4,7 +4,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PRIVILEGES 5 "May 29, 2009"
+.TH PRIVILEGES 5 "March 7, 2012"
.SH NAME
privileges \- process privilege model
.SH DESCRIPTION
@@ -284,6 +284,16 @@ Allow a process to perform privileged mappings through a graphics device.
.sp
.ne 2
.na
+\fB\fBPRIV_HYPRLOFS_CONTROL\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow a process to perform hyprlofs name space management.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBPRIV_IPC_DAC_READ\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man5/resource_controls.5 b/usr/src/man/man5/resource_controls.5
index 745d777624..35a78aae6e 100644
--- a/usr/src/man/man5/resource_controls.5
+++ b/usr/src/man/man5/resource_controls.5
@@ -1,16 +1,18 @@
'\" te
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.TH RESOURCE_CONTROLS 5 "Jul 19, 2013"
.SH NAME
-resource_controls \- resource controls available through project database
+resource_controls \- resource controls available through projects and zones
.SH DESCRIPTION
.sp
.LP
-The resource controls facility is configured through the project database. See
-\fBproject\fR(4). You can set and modify resource controls through the
+For projects the resource controls facility is configured through the project
+database. See \fBproject\fR(4). For zones, resource controls are configured
+through \fBzonecfg\fR(1M). You can set and modify resource controls through the
following utilities:
.RS +4
.TP
@@ -36,6 +38,12 @@ following utilities:
.el o
\fBrctladm\fR(1M)
.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBzonecfg\fR(1M)
+.RE
.sp
.LP
In a program, you use \fBsetrctl\fR(2) to set resource control values.
@@ -283,6 +291,19 @@ Maximum allowable number of event ports, expressed as an integer.
.sp
.ne 2
.na
+\fB\fBproject.max-processes\fR\fR
+.ad
+.sp .6
+.RS 4n
+Maximum number of processes that can be active in a project. This rctl is
+similar to \fBproject.max-lwps\fR, except that zombie processes are included.
+This rctl prevents process-slot exhaustion which can occur due to an excessive
+number of zombies. Expressed as an integer.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBproject.max-sem-ids\fR\fR
.ad
.sp .6
@@ -371,6 +392,33 @@ The following zone-wide resource controls are available:
.sp
.ne 2
.na
+\fB\fBzone.cpu-baseline\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a baseline amount of CPU time that a zone can use before it is considered
+to be bursting. The unit used is the percentage of a single CPU that is being
+used by all user threads in a zone. The value should be less than the
+\fBzone.cpu-cap\fR rctl value and is expressed as an integer.
+This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBzone.cpu-burst-time\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets the number of seconds that a zone can exceed the \fBzone.cpu-baseline\fR
+rctl value before being cpu-capped down to the \fBzone.cpu-baseline\fR.
+A value of 0 means that \fBzone.cpu-baseline\fR can be exceeded indefinitely.
+This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.cpu-cap\fR\fR
.ad
.sp .6
@@ -389,7 +437,7 @@ not support the \fBsyslog\fR action.
.ad
.sp .6
.RS 4n
-Sets a limit on the number of fair share scheduler (FSS) CPU shares for a zone.
+Sets a value on the number of fair share scheduler (FSS) CPU shares for a zone.
CPU shares are first allocated to the zone, and then further subdivided among
projects within the zone as specified in the \fBproject.cpu-shares\fR entries.
Expressed as an integer. This resource control does not support the
@@ -409,14 +457,25 @@ Total amount of physical locked memory available to a zone.
.sp
.ne 2
.na
+\fB\fBzone.max-lofi\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a limit on the number of \fBLOFI\fR(7D) devices that can be created in a
+zone. Expressed as an integer. This resource control does not support the
+\fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.max-lwps\fR\fR
.ad
.sp .6
.RS 4n
-Enhances resource isolation by preventing too many LWPs in one zone from
-affecting other zones. A zone's total LWPs can be further subdivided among
-projects within the zone within the zone by using \fBproject.max-lwps\fR
-entries. Expressed as an integer.
+Sets a limit on how many LWPs can be active in a zone. A zone's total LWPs
+can be further subdivided among projects within the zone within the zone by
+using \fBproject.max-lwps\fR entries. Expressed as an integer.
.RE
.sp
@@ -433,6 +492,33 @@ integer.
.sp
.ne 2
.na
+\fB\fBzone.max-physical-memory\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a limit on the amount of physical memory (RSS) that can be used by a zone
+before resident pages start being forcibly paged out. The unit used is bytes.
+Expressed as an integer. This resource control does not support the
+\fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBzone.max-processes\fR\fR
+.ad
+.sp .6
+.RS 4n
+Maximum number of processes that can be active in a zone. This rctl is
+similar to \fBzone.max-lwps\fR, except that zombie processes are included.
+This rctl prevents process-slot exhaustion which can occur due to an excessive
+number of zombies. This rctl can be further subdivided among projects within
+the zone using \fBproject.max-processes\fR. Expressed as an integer.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.max-sem-ids\fR\fR
.ad
.sp .6
@@ -474,6 +560,18 @@ mappings and \fBtmpfs\fR mounts for this zone.
.RE
.sp
+.ne 2
+.na
+\fB\fBzone.zfs-io-priority\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a value for the \fBzfs\fR(1M) I/O priority for a zone. This is used as
+one of the inputs to determine if a zone's I/O should be throttled. Expressed
+as an integer. This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
.LP
See \fBzones\fR(5).
.SS "Units Used in Resource Controls"
diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile
index 19a3bd41c1..5140f21c65 100644
--- a/usr/src/man/man7d/Makefile
+++ b/usr/src/man/man7d/Makefile
@@ -144,6 +144,7 @@ _MANFILES= aac.7d \
virtualkm.7d \
vni.7d \
vr.7d \
+ vnd.7d \
wscons.7d \
wusb_ca.7d \
wusb_df.7d \
diff --git a/usr/src/man/man7d/poll.7d b/usr/src/man/man7d/poll.7d
index cd3db77de9..7a3292eb97 100644
--- a/usr/src/man/man7d/poll.7d
+++ b/usr/src/man/man7d/poll.7d
@@ -73,15 +73,6 @@ Pointer to \fBpollfd\fR structure.
.SH DESCRIPTION
.LP
-Note -
-.sp
-.RS 2
-The \fB/dev/poll\fR device, associated driver and corresponding manpages may be
-removed in a future Solaris release. For similar functionality in the event
-ports framework, see \fBport_create\fR(3C).
-.RE
-.sp
-.LP
The \fB/dev/poll\fR driver is a special driver that enables you to monitor
multiple sets of polled file descriptors. By using the \fB/dev/poll\fR
driver, you can efficiently poll large numbers of file descriptors. Access to
diff --git a/usr/src/man/man7d/vnd.7d b/usr/src/man/man7d/vnd.7d
new file mode 100644
index 0000000000..d311c4dc08
--- /dev/null
+++ b/usr/src/man/man7d/vnd.7d
@@ -0,0 +1,118 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND 7D "Feb 11, 2014"
+.SH NAME
+vnd \- virtual layer two network driver
+
+.SH SYNOPSIS
+.nf
+.LP
+/dev/vnd/ctl
+.LP
+/dev/vnd/*
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The vnd driver provides support for a layer two datapath in an
+analogous way that IP(7P) provides a support for an IP-based layer
+three datapath. Both devices operate exclusively on datalinks. A
+datalink that has been plumbed up with IP via ifconfig(1M) or
+ipadm(1M) cannot be used with vnd or vice-versa.
+.sp
+.LP
+The vnd driver supports and takes advantage of the the following
+illumos features:
+.RS
+.sp
+.LP
+Supports dld/dls feature negotation of GLDv3 features, such
+as direct calls, flow control, checksum offloading, and more.
+.sp
+.LP
+All IP and IPv6 based traffic is sent through ipfilter(5),
+allowing packet filtering.
+.sp
+.LP
+Better control over vectored reads and writes in a frame-centric manner
+through framed I/O. See libvnd(3LIB) for more information on these
+interfaces.
+.RE
+.sp
+.LP
+The vnd driver exposes two different kinds of device nodes. The first is
+a self-cloning control node which can be used to create vnd devices on
+top of datalinks. Those devices can optionally be bound into the file
+system namespace under /dev/vnd. Control operations on the control node
+or named devices are private to the implementation. Instead,
+libvnd(3LIB) provides a stable interfaces for using, creating, and
+manipulating vnd devices.
+.sp
+.SH FILES
+.sp
+.ne 2
+.na
+/dev/vnd/ctl
+.ad
+.RS 16n
+vnd self-cloning control node
+.RE
+
+.sp
+.ne 2
+.na
+/dev/vnd/%link
+.ad
+.RS 16n
+Character device that corresponds to the vnd device of the given
+name (%link). A given device will appear for each actively linked device
+in the current zone.
+.RE
+
+.sp
+.ne 2
+.na
+/dev/vnd/zone/%zone/%link
+.ad
+.RS 16n
+These are character devices that correspond to the vnd device of
+the given name (%link). They are organized based on the zone that they
+appear in. Thus if a zone named foo has a vnd device named
+bar, then the global zone will have the file
+/dev/vnd/zone/foo/bar. Note, these only occur in the global zone.
+.RE
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Interface Stability Evolving
+.TE
+
+.SH SEE ALSO
+.sp
+.LP
+dladm(1M), ipflter(5), libvnd(3LIB), vndadm(1M),
+vndstat(1)
diff --git a/usr/src/man/man7fs/Makefile b/usr/src/man/man7fs/Makefile
index a288c94893..d985e95410 100644
--- a/usr/src/man/man7fs/Makefile
+++ b/usr/src/man/man7fs/Makefile
@@ -12,19 +12,23 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2014 Joyent, Inc. All rights reserved.
#
include $(SRC)/Makefile.master
MANSECT= 7fs
-MANFILES= ctfs.7fs \
+MANFILES= bootfs.7fs \
+ ctfs.7fs \
dcfs.7fs \
dev.7fs \
devfs.7fs \
fd.7fs \
hsfs.7fs \
+ hyprlofs.7fs \
lofs.7fs \
+ lxproc.7fs \
objfs.7fs \
pcfs.7fs \
sharefs.7fs \
diff --git a/usr/src/man/man7fs/bootfs.7fs b/usr/src/man/man7fs/bootfs.7fs
new file mode 100644
index 0000000000..130530a1f6
--- /dev/null
+++ b/usr/src/man/man7fs/bootfs.7fs
@@ -0,0 +1,90 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014 Joyent, Inc. All rights reserved.
+.\"
+
+.TH BOOTFS 7FS "May 8, 2014"
+.SH NAME
+bootfs \- boot-time module file system
+
+.SH DESCRIPTION
+
+The
+.B bootfs
+file system is a read-only file system that provides access to any
+boot-time modules that were passed in to the system loader which were
+tagged with the type
+.IR file .
+.B bootfs
+does not display any boot-time modules that were tagged as type
+.I hash
+or type
+.IR rootfs .
+
+If modules with duplicate names and paths are specified, only the first
+such entry will be present in the file system and a counter will be
+incremented to indicate that a duplicate entry was found, but is not
+present into the file system. If a module's name only consists of
+invalid characters, such as '.', '..', or '/', then the module will not
+be present in the file system and a counter will be incremented to
+indicate that this has occurred. In both cases, diagnostic information
+is available through the kstats facility.
+
+.SH FILES
+.sp
+.ne 2
+.na
+.B /system/boot
+.ad
+.RS 8n
+The mount point for the
+.B bootfs
+file system in the global zone.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Determining if collisions or invalid names are present
+.sp
+.LP
+To determine if any boot-time modules were not created due to collisions
+or invalid names, enter the following command:
+
+.sp
+.in +2
+.nf
+# kstat -m bootfs
+module: bootfs instance: 1
+name: bootfs class: fs
+ crtime 236063.651324041
+ nbytes 8749355
+ ndirs 3
+ ndiscard 0
+ ndup 0
+ nfiles 2
+ snaptime 236063.651324041
+.fi
+.in -2
+.sp
+.LP
+The field
+.B ndiscard
+lists the number of boot-time modules that were discarded due to naming conflicts. The field
+.B ndup
+lists the number of duplicate entries that were found and therefore not displayed in the file system.
+.sp
+.LP
+This information is provided for informational purposes only, it is not to be construed as a stable interface.
+
+.SH SEE ALSO
+.BR kstat (1M),
+.BR grub (5)
diff --git a/usr/src/man/man7fs/hyprlofs.7fs b/usr/src/man/man7fs/hyprlofs.7fs
new file mode 100644
index 0000000000..8655791193
--- /dev/null
+++ b/usr/src/man/man7fs/hyprlofs.7fs
@@ -0,0 +1,62 @@
+'\" te
+.\" Copyright (c) 2012, Joyent, Inc.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH HYPRLOFS 7FS "March 7, 2012"
+.SH NAME
+hyprlofs \- fast name space virtual file system
+.SH SYNOPSIS
+.LP
+.nf
+#include <sys/fs/hyprlofs.h>
+
+\fB\fR\fBmount\fR (\fB\fR\fIspecial\fR, \fB\fR\fIdirectory\fR, \fB\fR\fIMS_DATA\fR, \fB\fR\fI"hyprlofs"\fR, \fB\fR\fINULL\fR, \fB\fR\fI0\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+\fBhyprlofs\fR is a hybrid file system combining features from
+\fBtmpfs(7FS)\fR and \fBlofs(7FS)\fR.
+As with \fBlofs\fR, the \fBhyprlofs\fR file system allows new, virtual file
+systems to be created which provide access to existing files using alternate
+pathnames. In addition, the files themselves may have alternate names and
+paths within the mount.
+Unlike \fBlofs\fR, files cannot be created and backing files cannot be removed.
+The name space is completely managed through ioctls on the mount.
+Entries in the name space are not mounts and thus, they will not appear in the
+mnttab. The file system is designed to provide a very fast name space to the
+backing files. The name space can be modified very quickly through the ioctl
+interface.
+.sp
+.LP
+\fBhyprlofs\fR file systems can be mounted with the command:
+.sp
+.in +2
+.nf
+\fBmount \fR\fB-F\fR\fB hyprlofs swap \fR\fIdirectory\fR
+.fi
+.in -2
+
+.sp
+.LP
+The name space used by \fBhyprlofs\fR exists only in-memory so it will consume
+a small amount of the system's virtual memory. The files themselves are backed
+by the original file as with \fBlofs\fR.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBdf\fR(1M), \fBmount\fR(1M), \fBswap\fR(1M),
+\fBmount\fR(2), \fBumount\fR(2)
+.sp
+.LP
+\fISystem Administration Guide: Basic Administration\fR
+.SH DIAGNOSTICS
+.sp
+.LP
+\fBdf\fR(1M) output is of limited accuracy since
+the space available to \fBhyprlofs\fR is dependent on the swap
+space demands of the entire system and the files in the name space are not
+included.
diff --git a/usr/src/man/man7fs/lxproc.7fs b/usr/src/man/man7fs/lxproc.7fs
new file mode 100644
index 0000000000..7ef10ce343
--- /dev/null
+++ b/usr/src/man/man7fs/lxproc.7fs
@@ -0,0 +1,115 @@
+'\" te
+.\" Copyright (c) 2012, Joyent, Inc.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH LXPROC 7FS "April 25, 2012"
+.SH NAME
+lxproc \- a loosely Linux-compatible /proc
+.SH SYNOPSIS
+.LP
+.nf
+\fB\fR\fBmount\fR (\fB\fR\fI"lxproc"\fR, \fB\fR\fIdirectory\fR, \fB\fR\fIMS_DATA\fR, \fB\fR\fI"lxproc"\fR, \fB\fR\fINULL\fR, \fB\fR\fI0\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+\fBlxproc\fR is an implementation of the \fB/proc\fR filesystem that
+loosely matches the Linux semantics of providing human-readable text files
+that correspond to elements of the system.
+As with both \fBproc\fR(4) and Linux \fB/proc\fR, \fBlxproc\fR makes available
+a directory for every process, with each directory containing a number
+of files; like Linux \fB/proc\fR but unlike \fBproc\fR(4), \fBlxproc\fR also
+makes available a number of files related to system-wide information.
+To ascertain the meaning and structure of the files provided via
+\fBlxproc\fR, users should consult the Linux documentation.
+.sp
+.LP
+The \fBlxproc\fR compatibility layer is
+provided only as a best-effort for simple Linux \fB/proc\fR readers; it
+is not intended to exactly mimic Linux semantics and nor does it attempt to
+somehow fool a consumer into believing that it is operating within a Linux
+environment. As such, \fBlxproc\fR should only be used by Linux-specific
+programs that are willing to trade precision in understanding the
+system in return for Linux compatibility. To programmatically understand
+the system precisely and in terms of its native constructs,
+one should not use \fBlxproc\fR, but rather \fBproc\fR(4) or
+\fBkstat\fR(3KSTAT).
+To understand
+a process or group of processes from either a shell script or the command line,
+one should not use \fBlxproc\fR, but rather \fBproc\fR(4)-based tools like
+\fBprstat\fR(1M),
+\fBpfiles\fR(1),
+\fBpargs\fR(1),
+\fBpmap\fR(1),
+\fBptree\fR(1),
+\fBplimit\fR(1),
+\fBpflags\fR(1),
+\fBpcred\fR(1),
+\fBpstack\fR(1),
+\fBpldd\fR(1),
+\fBpsig\fR(1),
+or
+\fBpwdx\fR(1).
+To understand system-wide constructs from either a shell script or the
+command line, one should not use \fBlxproc\fR, but rather
+\fBkstat\fR(3KSTAT)-based tools like
+\fBkstat\fR(1M),
+\fBmpstat\fR(1M),
+\fBiostat\fR(1M),
+\fBnetstat\fR(1M) or
+\fBpsrinfo\fR(1M).
+.sp
+.LP
+Like \fB/proc\fR, \fBlxproc\fR can be mounted on any mount point, but the
+preferred mount point is \fB/system/lxproc\fR; if a zone brand elects to
+mount it by default, this will (or should) generally be the mount point.
+.sp
+.LP
+\fBlxproc\fR can be mounted with the command:
+.sp
+.in +2
+.nf
+\fBmount \fR\fB-F\fR\fB lxproc lxproc \fR\fIdirectory\fR
+.fi
+.in -2
+
+.SH SEE ALSO
+.sp
+.LP
+\fBdf\fR(1M),
+\fBiostat\fR(1M),
+\fBkstat\fR(1M),
+\fBmpstat\fR(1M),
+\fBmount\fR(1M),
+\fBnetstat\fR(1M),
+\fBpargs\fR(1),
+\fBpcred\fR(1),
+\fBpfiles\fR(1),
+\fBpflags\fR(1),
+\fBpldd\fR(1),
+\fBplimit\fR(1),
+\fBpmap\fR(1),
+\fBprstat\fR(1M),
+\fBpsig\fR(1),
+\fBpsrinfo\fR(1M),
+\fBpstack\fR(1),
+\fBptree\fR(1),
+\fBpwdx\fR(1),
+\fBmount\fR(2), \fBumount\fR(2), \fBkstat\fR(3KSTAT), \fBproc\fR(4),
+\fBkstat\fR(9S)
+
+.SH NOTES
+.sp
+.LP
+When choosing between offering
+Linux compatibility and telling the truth, \fBlxproc\fR emphatically picks
+the truth. A particular glaring example of this is the Linux notion of
+"tasks" (that is, threads), which -- due to historical misadventures on
+Linux -- allocate their identifiers from the process identifier space.
+(That is, each thread has in effect a pid.) Some Linux \fB/proc\fR readers
+have come to depend on this attribute, and become confused when threads
+appear with proper identifiers, so \fBlxproc\fR simply opts for the pre-2.6
+behavior, and does not present the tasks directory at all.
+
diff --git a/usr/src/man/man9e/chpoll.9e b/usr/src/man/man9e/chpoll.9e
index a2adaf7a9c..24e3379861 100644
--- a/usr/src/man/man9e/chpoll.9e
+++ b/usr/src/man/man9e/chpoll.9e
@@ -123,6 +123,17 @@ The same as \fBPOLLOUT\fR.
Priority data (priority band > 0) may be written.
.RE
+.sp
+.ne 2
+.na
+\fB\fBPOLLET\fR\fR
+.ad
+.RS 14n
+The desired event is to be edge-triggered; calls to \fBpollwakeup\fR(9F)
+should not be suppressed, even if the event is pending at the time of
+call to the \fBchpoll()\fR function.
+.RE
+
.RE
.sp
@@ -200,7 +211,11 @@ be called with multiple events at one time. The \fBpollwakup()\fR can be called
regardless of whether or not the \fBchpoll()\fR entry is called; it should be
called every time the driver detects the pollable event. The driver must not
hold any mutex across the call to \fBpollwakeup\fR(9F) that is acquired in its
-\fBchpoll()\fR entry point, or a deadlock may result.
+\fBchpoll()\fR entry point, or a deadlock may result. Note that if
+\fBPOLLET\fR is set in the specified events, the driver must call
+\fBpollwakeup\fR(9F) on subsequent events, even if events are pending at
+the time of the call to \fBchpoll()\fR.
+
.RE
.SH RETURN VALUES
.sp
diff --git a/usr/src/man/man9f/kmem_cache_create.9f b/usr/src/man/man9f/kmem_cache_create.9f
index 95b6fd6525..3fa34c3253 100644
--- a/usr/src/man/man9f/kmem_cache_create.9f
+++ b/usr/src/man/man9f/kmem_cache_create.9f
@@ -336,7 +336,7 @@ requirement is that the client must free the object (using
this, so programming errors will lead to hard-to-find bugs.
.sp
.LP
-A driver should call \fBkmem_cache_create()\fR at the time of \fB_fini\fR(9E)
+A driver should call \fBkmem_cache_create()\fR at the time of \fB_init\fR(9E)
or \fBattach\fR(9E), and call the corresponding \fBkmem_cache_destroy()\fR at
the time of \fB_fini\fR(9E) or \fBdetach\fR(9E).
.sp
diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf
index ae37e60cf7..ce3a1b915f 100644
--- a/usr/src/pkg/manifests/SUNWcs.mf
+++ b/usr/src/pkg/manifests/SUNWcs.mf
@@ -235,9 +235,6 @@ $(i386_ONLY)dir path=usr/sbin/$(ARCH32)
dir path=usr/sbin/$(ARCH64)
dir path=usr/share
dir path=usr/share/doc group=other
-dir path=usr/share/doc/ksh
-dir path=usr/share/doc/ksh/images
-dir path=usr/share/doc/ksh/images/callouts
dir path=usr/share/lib
dir path=usr/share/lib/mailx
dir path=usr/share/lib/pub
@@ -1419,31 +1416,6 @@ file path=usr/sbin/volcopy mode=0555
file path=usr/sbin/wall group=tty mode=2555
file path=usr/sbin/zdump mode=0555
file path=usr/sbin/zic mode=0555
-file path=usr/share/doc/ksh/COMPATIBILITY
-file path=usr/share/doc/ksh/DESIGN
-file path=usr/share/doc/ksh/OBSOLETE
-file path=usr/share/doc/ksh/README
-file path=usr/share/doc/ksh/RELEASE
-file path=usr/share/doc/ksh/TYPES
-file path=usr/share/doc/ksh/images/callouts/1.png
-file path=usr/share/doc/ksh/images/callouts/10.png
-file path=usr/share/doc/ksh/images/callouts/2.png
-file path=usr/share/doc/ksh/images/callouts/3.png
-file path=usr/share/doc/ksh/images/callouts/4.png
-file path=usr/share/doc/ksh/images/callouts/5.png
-file path=usr/share/doc/ksh/images/callouts/6.png
-file path=usr/share/doc/ksh/images/callouts/7.png
-file path=usr/share/doc/ksh/images/callouts/8.png
-file path=usr/share/doc/ksh/images/callouts/9.png
-file path=usr/share/doc/ksh/images/tag_bourne.png
-file path=usr/share/doc/ksh/images/tag_i18n.png
-file path=usr/share/doc/ksh/images/tag_ksh.png
-file path=usr/share/doc/ksh/images/tag_ksh88.png
-file path=usr/share/doc/ksh/images/tag_ksh93.png
-file path=usr/share/doc/ksh/images/tag_l10n.png
-file path=usr/share/doc/ksh/images/tag_perf.png
-file path=usr/share/doc/ksh/shell_styleguide.docbook
-file path=usr/share/doc/ksh/shell_styleguide.html
file path=usr/share/lib/mailx/mailx.help
file path=usr/share/lib/mailx/mailx.help.~
file path=usr/share/lib/tabset/3101
diff --git a/usr/src/pkg/manifests/SUNWlx.mf b/usr/src/pkg/manifests/SUNWlx.mf
index 0d5d285fe5..b7da7af793 100644
--- a/usr/src/pkg/manifests/SUNWlx.mf
+++ b/usr/src/pkg/manifests/SUNWlx.mf
@@ -20,11 +20,12 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-# Was renamed to system/zones/brand/lx, both now obsolete.
-
-set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.133
+set name=pkg.renamed value=true
set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+depend fmri=pkg:/system/zones/brand/lx@0.5.11,5.11-0.133 type=require
diff --git a/usr/src/pkg/manifests/driver-crypto-nfp.mf b/usr/src/pkg/manifests/driver-crypto-nfp.mf
new file mode 100644
index 0000000000..fcdf538c1d
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-crypto-nfp.mf
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 <contributor>
+#
+
+set name=pkg.fmri value=pkg:/driver/crypto/nfp@$(PKGVERS)
+set name=pkg.description value="nfast Crypto Accelerator Driver"
+set name=pkg.summary value="nfast Crypto Accelerator"
+set name=info.classification \
+ value=org.opensolaris.category.2008:System/Hardware
+set name=variant.arch value=i386
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+driver name=nfp \
+ alias=pci1011,1065.100.100 \
+ alias=pci8086,b555.100.100 \
+ alias=pciex1011,1065.100.100 \
+ alias=pciex8086,b555.100.100
+file path=kernel/drv/$(ARCH64)/nfp group=sys
+$(i386_ONLY)file path=kernel/drv/nfp group=sys
+license usr/src/uts/common/io/nfp/THIRDPARTYLICENSE \
+ license=usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
diff --git a/usr/src/pkg/manifests/driver-network-bge.mf b/usr/src/pkg/manifests/driver-network-bge.mf
index 74b2555119..ddf657b4b4 100644
--- a/usr/src/pkg/manifests/driver-network-bge.mf
+++ b/usr/src/pkg/manifests/driver-network-bge.mf
@@ -24,6 +24,9 @@
#
#
+# Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+#
+
# The default for payload-bearing actions in this package is to appear in the
# global zone only. See the include file for greater detail, as well as
# information about overriding the defaults.
@@ -57,9 +60,11 @@ $(i386_ONLY)driver name=bge clone_perms="bge 0666 root sys" \
alias=pci14e4,1649 \
alias=pci14e4,1653 \
alias=pci14e4,1654 \
+ alias=pci14e4,1657 \
alias=pci14e4,1659 \
alias=pci14e4,165d \
alias=pci14e4,165e \
+ alias=pci14e4,165f \
alias=pci14e4,1668 \
alias=pci14e4,1669 \
alias=pci14e4,166a \
@@ -79,9 +84,11 @@ $(i386_ONLY)driver name=bge clone_perms="bge 0666 root sys" \
alias=pci14e4,16c7 \
alias=pciex14e4,1655 \
alias=pciex14e4,1656 \
+ alias=pciex14e4,1657 \
alias=pciex14e4,165a \
alias=pciex14e4,165b \
alias=pciex14e4,165c \
+ alias=pciex14e4,165f \
alias=pciex14e4,1673 \
alias=pciex14e4,1674 \
alias=pciex14e4,1677 \
@@ -90,7 +97,13 @@ $(i386_ONLY)driver name=bge clone_perms="bge 0666 root sys" \
alias=pciex14e4,1680 \
alias=pciex14e4,1681 \
alias=pciex14e4,1684 \
+ alias=pciex14e4,1688 \
+ alias=pciex14e4,1689 \
+ alias=pciex14e4,1690 \
+ alias=pciex14e4,1691 \
alias=pciex14e4,1692 \
+ alias=pciex14e4,1694 \
+ alias=pciex14e4,1698 \
alias=pciex14e4,169d \
alias=pciex14e4,16fd \
alias=pciex14e4,1713
diff --git a/usr/src/pkg/manifests/service-file-system-smb.mf b/usr/src/pkg/manifests/service-file-system-smb.mf
index 2ddf448a86..cd3a4ed1a8 100644
--- a/usr/src/pkg/manifests/service-file-system-smb.mf
+++ b/usr/src/pkg/manifests/service-file-system-smb.mf
@@ -22,6 +22,7 @@
#
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
#
set name=pkg.fmri value=pkg:/service/file-system/smb@$(PKGVERS)
diff --git a/usr/src/pkg/manifests/system-zones-brand-joyent.mf b/usr/src/pkg/manifests/system-zones-brand-joyent.mf
new file mode 100644
index 0000000000..33452cdcd7
--- /dev/null
+++ b/usr/src/pkg/manifests/system-zones-brand-joyent.mf
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2010 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+set name=pkg.fmri value=pkg:/system/zones/brand/joyent@$(PKGVERS)
+set name=pkg.description value="Support for the 'joyent' Brand"
+set name=pkg.summary value="Joyent Containers: joyent brand support"
+set name=info.classification \
+ value=org.opensolaris.category.2008:System/Virtualization
+set name=variant.arch value=$(ARCH)
+dir path=etc group=sys
+dir path=etc/zones group=sys
+dir path=lib variant.opensolaris.zone=global
+dir path=lib/svc variant.opensolaris.zone=global
+dir path=lib/svc/manifest group=sys variant.opensolaris.zone=global
+dir path=lib/svc/manifest/system group=sys variant.opensolaris.zone=global
+dir path=lib/svc/method variant.opensolaris.zone=global
+dir path=usr group=sys
+dir path=usr/lib
+dir path=usr/lib/brand
+dir path=usr/lib/brand/joyent group=sys
+file path=etc/zones/Joyent.xml mode=0444
+file path=lib/svc/manifest/system/joyinit.xml group=sys mode=0444 \
+ variant.opensolaris.zone=global
+file path=lib/svc/method/svc-joyinit mode=0555 variant.opensolaris.zone=global
+file path=usr/lib/brand/joyent/config.xml mode=0444
+file path=usr/lib/brand/joyent/jinstall mode=0755
+file path=usr/lib/brand/joyent/juninstall mode=0755
+file path=usr/lib/brand/joyent/pinstall mode=0755
+file path=usr/lib/brand/joyent/platform.xml mode=0444
+file path=usr/lib/brand/joyent/prestate mode=0755
+license lic_CDDL license=lic_CDDL
diff --git a/usr/src/pkg/manifests/system-zones-brand-lx.mf b/usr/src/pkg/manifests/system-zones-brand-lx.mf
index ca3a8cc541..0d8cc82e0c 100644
--- a/usr/src/pkg/manifests/system-zones-brand-lx.mf
+++ b/usr/src/pkg/manifests/system-zones-brand-lx.mf
@@ -20,9 +20,110 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-set name=pkg.fmri value=pkg:/system/zones/brand/lx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/system/zones/brand/lx@$(PKGVERS)
+set name=pkg.description value="Support for the 'lx' Brand"
+set name=pkg.summary value="lx Brand"
+set name=info.classification \
+ value="org.opensolaris.category.2008:Applications/System Utilities"
set name=variant.arch value=i386
+dir path=etc group=sys
+dir path=etc/zones group=sys
+dir path=usr group=sys
+dir path=usr/kernel group=sys
+dir path=usr/kernel/brand group=sys
+dir path=usr/kernel/brand/$(ARCH64) group=sys
+dir path=usr/kernel/drv group=sys
+dir path=usr/kernel/drv/$(ARCH64) group=sys
+dir path=usr/kernel/dtrace group=sys
+dir path=usr/kernel/dtrace/$(ARCH64) group=sys
+dir path=usr/kernel/fs group=sys
+dir path=usr/kernel/fs/$(ARCH64) group=sys
+dir path=usr/kernel/strmod group=sys
+dir path=usr/kernel/strmod/$(ARCH64) group=sys
+dir path=usr/lib
+dir path=usr/lib/brand
+dir path=usr/lib/brand/lx
+dir path=usr/lib/brand/lx/$(ARCH64)
+dir path=usr/lib/brand/lx/distros
+dir path=usr/lib/devfsadm group=sys
+dir path=usr/lib/devfsadm/linkmod group=sys
+driver name=lx_audio
+driver name=lx_ptm perms="lx_ptmajor 0666 root sys"
+driver name=lx_systrace perms="* 0644 root sys"
+file path=etc/zones/SUNWlx.xml mode=0444
+file path=etc/zones/SUNWlx26.xml mode=0444
+file path=usr/kernel/brand/$(ARCH64)/lx_brand group=sys mode=0755
+file path=usr/kernel/brand/lx_brand group=sys mode=0755
+file path=usr/kernel/drv/$(ARCH64)/lx_audio group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_ptm group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_systrace group=sys
+file path=usr/kernel/drv/lx_audio group=sys
+file path=usr/kernel/drv/lx_audio.conf group=sys
+file path=usr/kernel/drv/lx_ptm group=sys
+file path=usr/kernel/drv/lx_ptm.conf group=sys
+file path=usr/kernel/drv/lx_systrace group=sys
+file path=usr/kernel/drv/lx_systrace.conf group=sys
+file path=usr/kernel/fs/$(ARCH64)/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/$(ARCH64)/lx_proc group=sys mode=0755
+file path=usr/kernel/fs/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/lx_proc group=sys mode=0755
+file path=usr/kernel/strmod/$(ARCH64)/ldlinux group=sys mode=0755
+file path=usr/kernel/strmod/ldlinux group=sys mode=0755
+file path=usr/lib/brand/lx/$(ARCH64)/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/$(ARCH64)/lx_nametoaddr.so.1
+file path=usr/lib/brand/lx/$(ARCH64)/lx_thunk.so.1
+file path=usr/lib/brand/lx/config.xml mode=0444
+file path=usr/lib/brand/lx/distros/centos35.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos36.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos37.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel35.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel36.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel37.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel_centos_common mode=0444
+file path=usr/lib/brand/lx/etc_default_nfs group=sys mode=0444
+file path=usr/lib/brand/lx/etc_netconfig group=sys mode=0444
+file path=usr/lib/brand/lx/lx_distro_install mode=0755
+file path=usr/lib/brand/lx/lx_init_zone mode=0755
+file path=usr/lib/brand/lx/lx_init_zone_debian mode=0755
+file path=usr/lib/brand/lx/lx_init_zone_redhat mode=0755
+file path=usr/lib/brand/lx/lx_install mode=0755
+file path=usr/lib/brand/lx/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/lx_lockd mode=0755
+file path=usr/lib/brand/lx/lx_nametoaddr.so.1
+file path=usr/lib/brand/lx/lx_native mode=0755
+file path=usr/lib/brand/lx/lx_statd mode=0755
+file path=usr/lib/brand/lx/lx_support mode=0755
+file path=usr/lib/brand/lx/lx_thunk mode=0755
+file path=usr/lib/brand/lx/lx_thunk.so.1
+file path=usr/lib/brand/lx/platform.xml mode=0444
+file path=usr/lib/devfsadm/linkmod/SUNW_lx_link_$(ARCH).so group=sys
+file path=usr/lib/lx_brand.so.1
+hardlink path=usr/kernel/dtrace/$(ARCH64)/lx_systrace \
+ target=../../../kernel/drv/$(ARCH64)/lx_systrace
+hardlink path=usr/kernel/dtrace/lx_systrace \
+ target=../../kernel/drv/lx_systrace
+legacy pkg=SUNWlxr arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Root)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+legacy pkg=SUNWlxu arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Usr)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+link path=usr/lib/brand/lx/64 target=$(ARCH64)
diff --git a/usr/src/psm/stand/bootblks/ufs/i386/mboot.S b/usr/src/psm/stand/bootblks/ufs/i386/mboot.S
index 48afbae207..756a643482 100644
--- a/usr/src/psm/stand/bootblks/ufs/i386/mboot.S
+++ b/usr/src/psm/stand/bootblks/ufs/i386/mboot.S
@@ -23,6 +23,7 @@
* Copyright 2012 OmniTI Computer Consulting, Inc. All rights reserved.
*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2012 OmniTI Computer Consulting, Inc. All rights reserved.
* Use is subject to license terms.
*/
diff --git a/usr/src/stand/lib/fs/hsfs/hsfsops.c b/usr/src/stand/lib/fs/hsfs/hsfsops.c
index 0119d3fe99..21f8c37081 100644
--- a/usr/src/stand/lib/fs/hsfs/hsfsops.c
+++ b/usr/src/stand/lib/fs/hsfs/hsfsops.c
@@ -1026,6 +1026,7 @@ boot_hsfs_getdents(int fd, struct dirent *dep, unsigned size)
* alignment.
*/
n = strlen(hdp->hs_ufs_dir.d_name);
+
n = roundup((sizeof (struct dirent) + ((n > SLOP) ? n : 0)),
sizeof (off_t));
diff --git a/usr/src/test/test-runner/cmd/Makefile b/usr/src/test/test-runner/cmd/Makefile
index 33e7a61275..948aea9ed8 100644
--- a/usr/src/test/test-runner/cmd/Makefile
+++ b/usr/src/test/test-runner/cmd/Makefile
@@ -34,4 +34,6 @@ $(ROOTBIN):
$(INS.dir)
$(ROOTBIN)/%: %.py
- $(INS.rename)
+ $(RM) $@
+ $(SED.py) $< > $@
+ $(CHMOD) 0555 $@
diff --git a/usr/src/test/test-runner/cmd/run.py b/usr/src/test/test-runner/cmd/run.py
index da0bca1d26..c1d73a1184 100644
--- a/usr/src/test/test-runner/cmd/run.py
+++ b/usr/src/test/test-runner/cmd/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This file and its contents are supplied under the terms of the
diff --git a/usr/src/test/util-tests/tests/Makefile b/usr/src/test/util-tests/tests/Makefile
index 4709c7adcd..151ff11b6a 100644
--- a/usr/src/test/util-tests/tests/Makefile
+++ b/usr/src/test/util-tests/tests/Makefile
@@ -14,7 +14,6 @@
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
#
-SUBDIRS = dladm printf xargs
SUBDIRS = dladm libnvpair_json printf xargs
include $(SRC)/test/Makefile.com
diff --git a/usr/src/tools/README.tools b/usr/src/tools/README.tools
index 6cb51f028a..323d6c679c 100644
--- a/usr/src/tools/README.tools
+++ b/usr/src/tools/README.tools
@@ -192,6 +192,9 @@ findunref
sort > ~/unref-sparc.out
$ comm -12 ~/unref-i386.out ~/unref-sparc.out > ~/unref.out
+git-active
+ helper used by webrev to generate file lists for Git workspaces.
+
hdrchk
checks headers for compliance with OS/Net standards (form, includes,
C++ guards).
diff --git a/usr/src/tools/cscope-fast/Makefile b/usr/src/tools/cscope-fast/Makefile
index 6a2270bbed..0237a6e9e9 100644
--- a/usr/src/tools/cscope-fast/Makefile
+++ b/usr/src/tools/cscope-fast/Makefile
@@ -34,6 +34,7 @@ OBJS= main.o dir.o crossref.o scanner.o lookup.o command.o display.o \
vpaccess.o vpfopen.o vpinit.o vpopen.o vpstat.o
SRCS= $(OBJS:%.o=%.c)
CLEANFILES += $(OBJS)
+CLOBBERFILES += scanner.c
TMPDIR= /tmp
include ../Makefile.tools
diff --git a/usr/src/tools/ctf/Makefile b/usr/src/tools/ctf/Makefile
index 0746c4943f..675c16aa84 100644
--- a/usr/src/tools/ctf/Makefile
+++ b/usr/src/tools/ctf/Makefile
@@ -26,7 +26,7 @@
include ../Makefile.tools
-SUBDIRS = cvt dump stabs scripts ctfstrip
+SUBDIRS = cvt dump stabs scripts ctfstrip libctf .WAIT ctfdiff
.PARALLEL: $(SUBDIRS)
diff --git a/usr/src/tools/ctf/Makefile.ctf b/usr/src/tools/ctf/Makefile.ctf
index 7c5b041746..0cf32bf1bc 100644
--- a/usr/src/tools/ctf/Makefile.ctf
+++ b/usr/src/tools/ctf/Makefile.ctf
@@ -41,6 +41,7 @@ HDRDIRS= \
-I$(SRC) \
-I/usr/include \
-I$(SRC)/uts/common \
+ -I$(SRC)/common/ctf \
-I$(NATIVE_ADJUNCT)/include
CPPFLAGS += $(HDRDIRS)
diff --git a/usr/src/tools/ctf/common/ctf_headers.h b/usr/src/tools/ctf/common/ctf_headers.h
index b00b8fd9a6..453c6450ed 100644
--- a/usr/src/tools/ctf/common/ctf_headers.h
+++ b/usr/src/tools/ctf/common/ctf_headers.h
@@ -27,8 +27,6 @@
#ifndef _CTF_HEADERS_H
#define _CTF_HEADERS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Because the ON tools are executed on the system where they are built,
* the tools need to include the headers installed on the build system,
@@ -67,6 +65,18 @@
#include <uts/common/sys/ctf.h>
#include <uts/common/sys/ctf_api.h>
+#include <common/ctf/ctf_impl.h>
#include <lib/libctf/common/libctf.h>
+/*
+ * XXX: This is hack to deal with GCC 4.x removing __builtin_stdarg_start
+ *
+ * We need to build on systems that don't have the fixed va_impl.h on the
+ * system, to achieve that, we stub it out here and in all similar places to
+ * give us a leg up.
+ */
+#if __GNUC__ >= 4
+#define __builtin_stdarg_start(list, name) __builtin_va_start(list, name)
+#endif
+
#endif /* _CTF_HEADERS_H */
diff --git a/usr/src/tools/ctf/common/utils.h b/usr/src/tools/ctf/common/utils.h
index 9b07361a53..4ae2dd0917 100644
--- a/usr/src/tools/ctf/common/utils.h
+++ b/usr/src/tools/ctf/common/utils.h
@@ -27,9 +27,8 @@
#ifndef _UTILS_H
#define _UTILS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdarg.h>
+#include <ctf_headers.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/tools/ctf/ctfdiff/Makefile b/usr/src/tools/ctf/ctfdiff/Makefile
new file mode 100644
index 0000000000..07fadc5f8f
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../../Makefile.tools
+
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+install all clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/ctfdiff/Makefile.com b/usr/src/tools/ctf/ctfdiff/Makefile.com
new file mode 100644
index 0000000000..3c5e19fb6e
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+PROG = ctfdiff
+SRCS = ctfdiff.c
+
+include ../../Makefile.ctf
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+LDFLAGS = \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -include ../../common/ctf_headers.h
+
+OBJS = $(SRCS:%.c=%.o)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/ctfdiff/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/ctf/ctfdiff/i386/Makefile b/usr/src/tools/ctf/ctfdiff/i386/Makefile
new file mode 100644
index 0000000000..cd5462bee3
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/i386/Makefile
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfdiff/sparc/Makefile b/usr/src/tools/ctf/ctfdiff/sparc/Makefile
new file mode 100644
index 0000000000..cd5462bee3
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/sparc/Makefile
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/cvt/ctftools.h b/usr/src/tools/ctf/cvt/ctftools.h
index a9519dc06c..fd9d454e06 100644
--- a/usr/src/tools/ctf/cvt/ctftools.h
+++ b/usr/src/tools/ctf/cvt/ctftools.h
@@ -43,6 +43,17 @@
extern "C" {
#endif
+/*
+ * XXX: This is hack to deal with GCC 4.x removing __builtin_stdarg_start
+ *
+ * We need to build on systems that don't have the fixed va_impl.h on the
+ * system, to achieve that, we stub it out here and in all similar places to
+ * give us a leg up.
+ */
+#if __GNUC__ >= 4
+#define __builtin_stdarg_start(list, name) __builtin_va_start(list, name)
+#endif
+
#include "list.h"
#include "hash.h"
diff --git a/usr/src/tools/ctf/libctf/Makefile b/usr/src/tools/ctf/libctf/Makefile
new file mode 100644
index 0000000000..5e9236c43d
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../../Makefile.tools
+
+HDRS = libctf.h
+HDRDIR = common
+
+SUBDIRS = $(MACH)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/libctf/Makefile.com b/usr/src/tools/ctf/libctf/Makefile.com
new file mode 100644
index 0000000000..22e8e82318
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/Makefile.com
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/lib/libctf/Makefile.shared.com
+include ../../Makefile.ctf
+
+CPPFLAGS += -include ../../common/ctf_headers.h -DCTF_OLD_VERSIONS
+LDLIBS += -lc
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+install: all $(ROOTONBLDLIBMACH)/libctf.so.1 $(ROOTONBLDLIBMACH)/libctf.so
+
+$(ROOTONBLDLIBMACH)/%: %
+ $(INS.file)
+
+$(ROOTONBLDLIBMACH)/$(LIBLINKS): $(ROOTONBLDLIBMACH)/$(LIBLINKS)$(VERS)
+ $(INS.liblink)
+
+#
+# Just like with libdwarf, we can't actually add ctf to ourselves,
+# because we're part of the tools for creating CTF.
+#
+$(DYNLIB) := CTFMERGE_POST= :
+CTFCONVERT_O= :
+
+include $(SRC)/lib/Makefile.targ
+include $(SRC)/lib/libctf/Makefile.shared.targ
diff --git a/usr/src/tools/ctf/libctf/i386/Makefile b/usr/src/tools/ctf/libctf/i386/Makefile
new file mode 100644
index 0000000000..fff958ac6e
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/i386/Makefile
@@ -0,0 +1,8 @@
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+all: $(LIBS)
diff --git a/usr/src/tools/ctf/libctf/sparc/Makefile b/usr/src/tools/ctf/libctf/sparc/Makefile
new file mode 100644
index 0000000000..fff958ac6e
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/sparc/Makefile
@@ -0,0 +1,8 @@
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+all: $(LIBS)
diff --git a/usr/src/tools/env/illumos.sh b/usr/src/tools/env/illumos.sh
index ecfec4d795..162f1f702f 100644
--- a/usr/src/tools/env/illumos.sh
+++ b/usr/src/tools/env/illumos.sh
@@ -229,3 +229,9 @@ export SPRO_VROOT="$SPRO_ROOT"
# Uncomment this to disable support for SMB printing.
# export ENABLE_SMB_PRINTING='#'
+
+#
+# These checks ensure that if we accidentally run a program linked against the
+# proto area, that we then fail the build.
+#
+export LD_TOXIC_PATH="$ROOT/lib:$ROOT/usr/lib"
diff --git a/usr/src/tools/findunref/exception_list.git b/usr/src/tools/findunref/exception_list.git
new file mode 100644
index 0000000000..d14dbd22f0
--- /dev/null
+++ b/usr/src/tools/findunref/exception_list.git
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Cyril Plisko. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Git-specific exception list
+#
+# See README.exception_lists for details
+#
+
+#
+# Without nested repositories, this list could be empty, because ON
+# checks for unref relative to usr, and the git files are all in the
+# root of the repository.
+#
+
+*/.git
diff --git a/usr/src/tools/findunref/exception_list.unknown b/usr/src/tools/findunref/exception_list.unknown
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/tools/findunref/exception_list.unknown
diff --git a/usr/src/tools/onbld/Checks/Cddl.py b/usr/src/tools/onbld/Checks/Cddl.py
index 1f5f99f953..0f4d995e89 100644
--- a/usr/src/tools/onbld/Checks/Cddl.py
+++ b/usr/src/tools/onbld/Checks/Cddl.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
CDDL = '''
CDDL HEADER START
diff --git a/usr/src/tools/onbld/Checks/CmtBlk.py b/usr/src/tools/onbld/Checks/CmtBlk.py
index 2f3d29fa79..ddf5caec48 100644
--- a/usr/src/tools/onbld/Checks/CmtBlk.py
+++ b/usr/src/tools/onbld/Checks/CmtBlk.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
diff --git a/usr/src/tools/onbld/Checks/Comments.py b/usr/src/tools/onbld/Checks/Comments.py
index daf6aa47d9..5d6feb265a 100644
--- a/usr/src/tools/onbld/Checks/Comments.py
+++ b/usr/src/tools/onbld/Checks/Comments.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -38,7 +38,7 @@ import re, sys
from onbld.Checks.DbLookups import BugDB
-bugre = re.compile(r'^(\d{2,7}) (.*)$')
+bugre = re.compile(r'^(\d{2,7}|[A-Z]{1,7}-\d{1,7}) (.*)$')
def isBug(comment):
diff --git a/usr/src/tools/onbld/Checks/Copyright.py b/usr/src/tools/onbld/Checks/Copyright.py
index 81a80058aa..8071b7f435 100644
--- a/usr/src/tools/onbld/Checks/Copyright.py
+++ b/usr/src/tools/onbld/Checks/Copyright.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/DbLookups.py b/usr/src/tools/onbld/Checks/DbLookups.py
index 11fd4185be..2843673035 100644
--- a/usr/src/tools/onbld/Checks/DbLookups.py
+++ b/usr/src/tools/onbld/Checks/DbLookups.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -37,6 +37,7 @@ import htmllib
import re
import urllib
import urllib2
+import json
try: # Python >= 2.5
from xml.etree import ElementTree
@@ -62,9 +63,9 @@ class BugDB(object):
print r["6505625"]["synopsis"]
"""
- VALID_DBS = ["illumos"]
+ VALID_DBS = ["illumos", "smartos"]
- def __init__(self, priority = ["illumos"]):
+ def __init__(self, priority = VALID_DBS):
"""Create a BugDB object.
Keyword argument:
@@ -75,6 +76,24 @@ class BugDB(object):
raise BugDBException, database
self.__priority = priority
+ def __smartosbug(self, cr):
+ url = "http://smartos.org/bugview/json/%s" % cr
+ req = urllib2.Request(url)
+
+ try:
+ data = urllib2.urlopen(req)
+ except urllib2.HTTPError, e:
+ if e.code == 404 or e.code == 403 or e.code == 400:
+ raise NonExistentBug(cr)
+ else:
+ raise
+
+ bug = json.load(data)
+
+ return {'cr_number': bug['id'],
+ 'synopsis': bug['summary']
+ }
+
def __illbug(self, cr):
url = "http://illumos.org/issues/%s.xml" % cr
@@ -117,6 +136,12 @@ class BugDB(object):
results[str(cr)] = self.__illbug(cr)
except NonExistentBug:
continue
+ elif database == "smartos":
+ for cr in crs:
+ try:
+ results[str(cr)] = self.__smartosbug(cr)
+ except NonExistentBug:
+ continue
# the CR has already been found by one bug database
# so don't bother looking it up in the others
diff --git a/usr/src/tools/onbld/Checks/HdrChk.py b/usr/src/tools/onbld/Checks/HdrChk.py
index c2697dcaf2..8f7b946d12 100644
--- a/usr/src/tools/onbld/Checks/HdrChk.py
+++ b/usr/src/tools/onbld/Checks/HdrChk.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/Keywords.py b/usr/src/tools/onbld/Checks/Keywords.py
index 5c374c3abb..ac3555be32 100644
--- a/usr/src/tools/onbld/Checks/Keywords.py
+++ b/usr/src/tools/onbld/Checks/Keywords.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,8 +24,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
#
# Mercurial (lack of) keyword checks
diff --git a/usr/src/tools/onbld/Checks/Mapfile.py b/usr/src/tools/onbld/Checks/Mapfile.py
index 2a8cb74aed..d4fa70d141 100644
--- a/usr/src/tools/onbld/Checks/Mapfile.py
+++ b/usr/src/tools/onbld/Checks/Mapfile.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/__init__.py b/usr/src/tools/onbld/Checks/__init__.py
index 7051b0c565..2fde833098 100644
--- a/usr/src/tools/onbld/Checks/__init__.py
+++ b/usr/src/tools/onbld/Checks/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Scm/__init__.py b/usr/src/tools/onbld/Scm/__init__.py
index f45ecbc95f..8934eb3942 100644
--- a/usr/src/tools/onbld/Scm/__init__.py
+++ b/usr/src/tools/onbld/Scm/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,5 +24,3 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
diff --git a/usr/src/tools/onbld/hgext/__init__.py b/usr/src/tools/onbld/hgext/__init__.py
index f45ecbc95f..8934eb3942 100644
--- a/usr/src/tools/onbld/hgext/__init__.py
+++ b/usr/src/tools/onbld/hgext/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,5 +24,3 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
diff --git a/usr/src/tools/scripts/cddlchk.py b/usr/src/tools/scripts/cddlchk.py
index 816d2f33a7..ad6a6e6f56 100644
--- a/usr/src/tools/scripts/cddlchk.py
+++ b/usr/src/tools/scripts/cddlchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/copyrightchk.py b/usr/src/tools/scripts/copyrightchk.py
index 210ef1b46c..f778947bfc 100644
--- a/usr/src/tools/scripts/copyrightchk.py
+++ b/usr/src/tools/scripts/copyrightchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/git-pbchk.py b/usr/src/tools/scripts/git-pbchk.py
index fd16e109b2..123b5fa088 100644
--- a/usr/src/tools/scripts/git-pbchk.py
+++ b/usr/src/tools/scripts/git-pbchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
diff --git a/usr/src/tools/scripts/hdrchk.py b/usr/src/tools/scripts/hdrchk.py
index 84acbc8616..aa62f7b090 100644
--- a/usr/src/tools/scripts/hdrchk.py
+++ b/usr/src/tools/scripts/hdrchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/hg-active.py b/usr/src/tools/scripts/hg-active.py
index 88394e98e5..23f6c7cd98 100644
--- a/usr/src/tools/scripts/hg-active.py
+++ b/usr/src/tools/scripts/hg-active.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
diff --git a/usr/src/tools/scripts/mapfilechk.py b/usr/src/tools/scripts/mapfilechk.py
index 9cf2000c7a..ed90d8b426 100644
--- a/usr/src/tools/scripts/mapfilechk.py
+++ b/usr/src/tools/scripts/mapfilechk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/nightly.sh b/usr/src/tools/scripts/nightly.sh
index 77d23a76aa..f5bce4fb08 100644
--- a/usr/src/tools/scripts/nightly.sh
+++ b/usr/src/tools/scripts/nightly.sh
@@ -25,6 +25,7 @@
# Copyright 2008, 2010, Richard Lowe
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
+# Copyright 2013 (c) Joyent, Inc. All rights reserved.
#
# Based on the nightly script from the integration folks,
# Mostly modified and owned by mike_s.
@@ -973,37 +974,16 @@ elif [[ ! -x "$MAKE" ]]; then
fi
# get the dmake version string alone
DMAKE_VERSION=$( $MAKE -v )
-DMAKE_VERSION=${DMAKE_VERSION#*: }
-# focus in on just the dotted version number alone
-DMAKE_MAJOR=$( echo $DMAKE_VERSION | \
- sed -e 's/.*\<\([^.]*\.[^ ]*\).*$/\1/' )
-# extract the second (or final) integer
-DMAKE_MINOR=${DMAKE_MAJOR#*.}
-DMAKE_MINOR=${DMAKE_MINOR%%.*}
-# extract the first integer
-DMAKE_MAJOR=${DMAKE_MAJOR%%.*}
-CHECK_DMAKE=${CHECK_DMAKE:-y}
-# x86 was built on the 12th, sparc on the 13th.
-if [ "$CHECK_DMAKE" = "y" -a \
- "$DMAKE_VERSION" != "Sun Distributed Make 7.3 2003/03/12" -a \
- "$DMAKE_VERSION" != "Sun Distributed Make 7.3 2003/03/13" -a \( \
- "$DMAKE_MAJOR" -lt 7 -o \
- "$DMAKE_MAJOR" -eq 7 -a "$DMAKE_MINOR" -lt 4 \) ]; then
- if [ -z "$DMAKE_VERSION" ]; then
- echo "$MAKE is missing."
- exit 1
- fi
- echo `whence $MAKE`" version is:"
- echo " ${DMAKE_VERSION}"
+# Admittedly not the best check, but better than the old one.
+if ! echo $DMAKE_VERSION | grep -q rm; then
cat <<EOF
-
-This version may not be safe for use, if you really want to use this version
-anyway add the following to your environment to disable this check:
-
- CHECK_DMAKE=n
+The version of dmake you are using ($DMAKE_VERSION) is not the current open
+source version for building illumos. Please ensure that you are building
+illumos-joyent with the latest version of dmake in illumos-extra.
EOF
exit 1
fi
+
export PATH
export MAKE
diff --git a/usr/src/tools/scripts/validate_pkg.py b/usr/src/tools/scripts/validate_pkg.py
index 6678b211e8..155df9ba25 100644
--- a/usr/src/tools/scripts/validate_pkg.py
+++ b/usr/src/tools/scripts/validate_pkg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/webrev.sh b/usr/src/tools/scripts/webrev.sh
index 23289c13fe..8a122f7358 100644
--- a/usr/src/tools/scripts/webrev.sh
+++ b/usr/src/tools/scripts/webrev.sh
@@ -2840,7 +2840,12 @@ done
#
# Output directory.
#
-WDIR=${WDIR:-$CWS/webrev}
+if [[ $SCM_MODE == "git" ]]; then
+ ws_top_dir=$(dirname $CWS)
+ WDIR=${WDIR:-$ws_top_dir/webrev}
+else
+ WDIR=${WDIR:-$CWS/webrev}
+fi
#
# Name of the webrev, derived from the workspace name or output directory;
@@ -3535,6 +3540,7 @@ do
fi
if [[ $SCM_MODE == "mercurial" ||
+ $SCM_MODE == "git" ||
$SCM_MODE == "unknown" ]]; then
# Include warnings for important file mode situations:
# 1) New executable files
diff --git a/usr/src/tools/scripts/wsdiff.py b/usr/src/tools/scripts/wsdiff.py
index 27458f43fc..4ae79e889f 100644
--- a/usr/src/tools/scripts/wsdiff.py
+++ b/usr/src/tools/scripts/wsdiff.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/ucbhead/Makefile b/usr/src/ucbhead/Makefile
new file mode 100644
index 0000000000..ef9fc7f2fe
--- /dev/null
+++ b/usr/src/ucbhead/Makefile
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 1989-2003 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ucbhead/Makefile
+#
+# include global definitions
+include ../Makefile.master
+
+LN= ln
+
+HDRS= curses.h dbm.h setjmp.h sgtty.h signal.h stdio.h strings.h unistd.h
+
+SYSHDRS= \
+dir.h fcntl.h file.h ioctl.h \
+param.h resource.h rusage.h signal.h \
+sysmacros.h ttychars.h types.h vfs.h \
+wait.h
+
+ROOTHDRS= $(HDRS:%=$(ROOT)/usr/ucbinclude/%) \
+ $(SYSHDRS:%=$(ROOT)/usr/ucbinclude/sys/%)
+
+DIRS= sys
+ROOTDIRS= $(ROOT)/usr/ucbinclude $(DIRS:%=$(ROOT)/usr/ucbinclude/%)
+
+FCNTLH= $(ROOT)/usr/ucbinclude/fcntl.h
+SYSFCNTLH= $(ROOT)/usr/ucbinclude/sys/fcntl.h
+
+INS.FCNTLH= $(RM) $@; $(SYMLINK) sys/fcntl.h $@
+
+# install rules
+$(ROOT)/usr/ucbinclude/sys/%: sys/%
+ $(INS.file)
+
+$(ROOT)/usr/ucbinclude/%: %
+ $(INS.file)
+
+.KEEP_STATE:
+
+install_h: $(ROOTDIRS) $(ROOTHDRS) $(FCNTLH)
+
+$(FCNTLH): $(SYSFCNTLH)
+ $(INS.FCNTLH)
+
+$(ROOTDIRS):
+ $(INS.dir)
+
diff --git a/usr/src/ucbhead/sys/file.h b/usr/src/ucbhead/sys/file.h
index 10863380dd..152141fcd7 100644
--- a/usr/src/ucbhead/sys/file.h
+++ b/usr/src/ucbhead/sys/file.h
@@ -77,6 +77,10 @@ typedef struct file
#include <sys/fcntl.h>
#endif
+#if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__)
+int flock(int, int);
+#endif
+
/* flags - see also fcntl.h */
#ifndef FOPEN
diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts
index ae53983d07..33de8780bd 100644
--- a/usr/src/uts/Makefile.uts
+++ b/usr/src/uts/Makefile.uts
@@ -213,6 +213,7 @@ AS_CPPFLAGS = $(ALWAYS_DEFS) $(ALL_DEFS) $(CONFIG_DEFS) $(AS_DEFS) \
# Override the default, the kernel is squeaky clean
CERRWARN = -errtags=yes -errwarn=%all
+$(__GNUC4)CERRWARN += -_gcc=-Wno-address
CERRWARN += -_gcc=-Wno-missing-braces
CERRWARN += -_gcc=-Wno-sign-compare
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 2534527201..1f3166fa85 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -24,6 +24,7 @@
# Copyright (c) 2013 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
#
#
@@ -426,6 +427,8 @@ PROFILE_OBJS += profile.o
SYSTRACE_OBJS += systrace.o
+LX_SYSTRACE_OBJS += lx_systrace.o
+
LOCKSTAT_OBJS += lockstat.o
FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
@@ -490,6 +493,12 @@ PTSL_OBJS += tty_pts.o
PTM_OBJS += ptm.o
+LX_PTM_OBJS += lx_ptm.o
+
+LX_AUDIO_OBJS += lx_audio.o
+
+LX_NETLINK_OBJS += lx_netlink.o
+
MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \
mii_marvell.o mii_realtek.o mii_other.o
@@ -673,6 +682,10 @@ NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \
VNIC_OBJS += vnic_ctl.o vnic_dev.o
+VND_OBJS += vnd.o frameio.o
+
+GSQUEUE_OBJS += gsqueue.o
+
SIMNET_OBJS += simnet.o
IB_OBJS += ibnex.o ibnex_ioctl.o ibnex_hca.o
@@ -994,6 +1007,8 @@ DEVPOOL_OBJS += devpool.o
I8042_OBJS += i8042.o
+INOTIFY_OBJS += inotify.o
+
KB8042_OBJS += \
at_keyprocess.o \
kb8042.o \
@@ -1128,8 +1143,7 @@ DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o
DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \
sdev_ptsops.o sdev_zvolops.o sdev_comm.o \
sdev_profile.o sdev_ncache.o sdev_netops.o \
- sdev_ipnetops.o \
- sdev_vtops.o
+ sdev_ipnetops.o sdev_vtops.o sdev_plugin.o
CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \
ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o
@@ -1146,8 +1160,13 @@ PIPE_OBJS += pipe.o
HSFS_OBJS += hsfs_node.o hsfs_subr.o hsfs_vfsops.o hsfs_vnops.o \
hsfs_susp.o hsfs_rrip.o hsfs_susp_subr.o
+HYPRLOFS_OBJS += hyprlofs_dir.o hyprlofs_subr.o \
+ hyprlofs_vnops.o hyprlofs_vfsops.o
+
LOFS_OBJS += lofs_subr.o lofs_vfsops.o lofs_vnops.o
+LXPROC_OBJS += lxpr_subr.o lxpr_vfsops.o lxpr_vnops.o
+
NAMEFS_OBJS += namevfs.o namevno.o
NFS_OBJS += nfs_client.o nfs_common.o nfs_dump.o \
@@ -1301,6 +1320,7 @@ SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \
smbfs_rwlock.o smbfs_xattr.o \
$(SMBFS_COMMON_OBJS)
+BOOTFS_OBJS += bootfs_construct.o bootfs_vfsops.o bootfs_vnops.o
#
# LVM modules
@@ -1394,6 +1414,7 @@ ZFS_COMMON_OBJS += \
zfs_fuid.o \
zfs_sa.o \
zfs_znode.o \
+ zfs_zone.o \
zil.o \
zio.o \
zio_checksum.o \
@@ -2040,6 +2061,11 @@ MEGA_SAS_OBJS = megaraid_sas.o
MR_SAS_OBJS = ld_pd_map.o mr_sas.o mr_sas_tbolt.o mr_sas_list.o
#
+# DR_SAS module
+#
+DR_SAS_OBJS = dr_sas.o
+
+#
# CPQARY3 module
#
CPQARY3_OBJS = cpqary3.o cpqary3_noe.o cpqary3_talk2ctlr.o \
@@ -2076,3 +2102,17 @@ MYRI10GE_OBJS = myri10ge.o myri10ge_lro.o
NULLDRIVER_OBJS = nulldriver.o
TPM_OBJS = tpm.o tpm_hcall.o
+
+#
+# USB Fast ethernet drivers
+#
+USBGEM_OBJS = usbgem.o
+AXF_OBJS = axf_usbgem.o
+UDMF_OBJS = udmf_usbgem.o
+URF_OBJS = urf_usbgem.o
+UPF_OBJS = upf_usbgem.o
+
+#
+# NFP objects
+#
+NFP_OBJS = hostif.o osif.o drvlist.o i21555.o i21285.o i21555d.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 3545ebb9a7..9b523bbec4 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -23,6 +23,7 @@
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
#
#
@@ -103,6 +104,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/sn1/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/sngl/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/solaris10/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -207,6 +212,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/autofs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/bootfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/cachefs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -243,10 +252,18 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/hsfs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/hyprlofs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/lofs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/lxproc/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/mntfs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -726,6 +743,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/drm/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/dr_sas/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/efe/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -954,6 +975,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/net80211/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nfp/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nge/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1105,6 +1130,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/sdcard/targets/sdcard/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/gsqueue/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/sfe/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1117,6 +1146,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/softmac/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vnd/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/uath/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1261,6 +1294,30 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/usb/hwa/hwahc/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/usbgem/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/axf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/udf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/udmf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/upf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/urf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/uwb/uwba/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1626,6 +1683,9 @@ $(LINTS_DIR)/%.ln: $(COMMONBASE)/ucode/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/sn1/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/sngl/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/solaris10/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -1728,6 +1788,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/autofs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/bootfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/cachefs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -1755,9 +1818,15 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/fifofs/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/hsfs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/hyprlofs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/lofs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/lxproc/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/mntfs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2079,6 +2148,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/dmfe/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/drm/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/dr_sas/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/efe/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2250,6 +2322,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/mwl/mwl_fw/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/net80211/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nfp/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nge/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2487,6 +2562,21 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/uwb/uwba/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/usb/hwa/hwahc/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/usbgem/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/axf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/udmf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/upf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/urf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/vuidmice/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
new file mode 100644
index 0000000000..d2bb03c118
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
@@ -0,0 +1,1569 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <fs/fs_subr.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/dirent.h>
+#include <sys/fs/fifonode.h>
+#include <sys/modctl.h>
+#include <sys/mount.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+
+#include <sys/lx_autofs_impl.h>
+
+/*
+ * External functions
+ */
+extern uintptr_t space_fetch(char *key);
+extern int space_store(char *key, uintptr_t ptr);
+
+/*
+ * Globals
+ */
+static vfsops_t *lx_autofs_vfsops;
+static vnodeops_t *lx_autofs_vn_ops = NULL;
+static int lx_autofs_fstype;
+static major_t lx_autofs_major;
+static minor_t lx_autofs_minor = 0;
+
+/*
+ * Support functions
+ */
+static void
+i_strfree(char *str)
+{
+ kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+i_strdup(char *str)
+{
+ int n = strlen(str);
+ char *ptr = kmem_alloc(n + 1, KM_SLEEP);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+static int
+i_str_to_int(char *str, int *val)
+{
+ long res;
+
+ if (str == NULL)
+ return (-1);
+
+ if ((ddi_strtol(str, NULL, 10, &res) != 0) ||
+ (res < INT_MIN) || (res > INT_MAX))
+ return (-1);
+
+ *val = res;
+ return (0);
+}
+
+static void
+i_stack_init(list_t *lp)
+{
+ list_create(lp,
+ sizeof (stack_elem_t), offsetof(stack_elem_t, se_list));
+}
+
+static void
+i_stack_fini(list_t *lp)
+{
+ ASSERT(list_head(lp) == NULL);
+ list_destroy(lp);
+}
+
+static void
+i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3)
+{
+ stack_elem_t *se;
+
+ se = kmem_alloc(sizeof (*se), KM_SLEEP);
+ se->se_ptr1 = ptr1;
+ se->se_ptr2 = ptr2;
+ se->se_ptr3 = ptr3;
+ list_insert_head(lp, se);
+}
+
+static int
+i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3)
+{
+ stack_elem_t *se;
+
+ if ((se = list_head(lp)) == NULL)
+ return (-1);
+ list_remove(lp, se);
+ if (ptr1 != NULL)
+ *ptr1 = se->se_ptr1;
+ if (ptr2 != NULL)
+ *ptr2 = se->se_ptr2;
+ if (ptr3 != NULL)
+ *ptr3 = se->se_ptr3;
+ kmem_free(se, sizeof (*se));
+ return (0);
+}
+
+static vnode_t *
+fifo_peer_vp(vnode_t *vp)
+{
+ fifonode_t *fnp = VTOF(vp);
+ fifonode_t *fn_dest = fnp->fn_dest;
+ return (FTOV(fn_dest));
+}
+
+static vnode_t *
+i_vn_alloc(vfs_t *vfsp, vnode_t *uvp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *vp, *vp_old;
+
+ /* Allocate a new vnode structure in case we need it. */
+ vp = vn_alloc(KM_SLEEP);
+ vn_setops(vp, lx_autofs_vn_ops);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev);
+ vp->v_data = uvp;
+ ASSERT(vp->v_count == 1);
+
+ /*
+ * Take a hold on the vfs structure. This is how unmount will
+ * determine if there are any active vnodes in the file system.
+ */
+ VFS_HOLD(vfsp);
+
+ /*
+ * Check if we already have a vnode allocated for this underlying
+ * vnode_t.
+ */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) {
+
+ /*
+ * Didn't find an existing node.
+ * Add this node to the hash and return.
+ */
+ VERIFY(mod_hash_insert(data->lav_vn_hash,
+ (mod_hash_key_t)uvp,
+ (mod_hash_val_t)vp) == 0);
+ mutex_exit(&data->lav_lock);
+ return (vp);
+ }
+
+ /* Get a hold on the existing vnode and free up the one we allocated. */
+ VN_HOLD(vp_old);
+ mutex_exit(&data->lav_lock);
+
+ /* Free up the new vnode we allocated. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+
+ return (vp_old);
+}
+
+static void
+i_vn_free(vnode_t *vp)
+{
+ vfs_t *vfsp = vp->v_vfsp;
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *uvp = vp->v_data;
+ vnode_t *vp_tmp;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+ ASSERT(MUTEX_HELD((&vp->v_lock)));
+
+ ASSERT(vp->v_count == 0);
+
+ /* We're about to free this vnode so take it out of the hash. */
+ (void) mod_hash_remove(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp);
+
+ /*
+ * No one else can lookup this vnode any more so there's no need
+ * to hold locks.
+ */
+ mutex_exit(&data->lav_lock);
+ mutex_exit(&vp->v_lock);
+
+ /* Release the underlying vnode. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr, *lalr_dup;
+
+ /* Pre-allocate a new automounter request before grabbing locks. */
+ lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP);
+ mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL);
+ lalr->lalr_ref = 1;
+ lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION;
+
+ /* Assign a unique id for this request. */
+ lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids);
+
+ /*
+ * The token expected by the linux automount is the name of
+ * the directory entry to look up. (And not the entire
+ * path that is being accessed.)
+ */
+ lalr->lalr_pkt.lap_name_len = strlen(nm);
+ if (lalr->lalr_pkt.lap_name_len >
+ (sizeof (lalr->lalr_pkt.lap_name) - 1)) {
+ zcmn_err(getzoneid(), CE_NOTE,
+ "invalid autofs lookup: \"%s\"", nm);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ return (NULL);
+ }
+ (void) strlcpy(lalr->lalr_pkt.lap_name, nm,
+ sizeof (lalr->lalr_pkt.lap_name));
+
+ /* Check for an outstanding request for this path. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_path_hash,
+ (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) {
+ /*
+ * There's already an outstanding request for this
+ * path so we don't need a new one.
+ */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ lalr = lalr_dup;
+
+ /* Bump the ref count on the old request. */
+ atomic_add_int(&lalr->lalr_ref, 1);
+
+ *dup_request = 1;
+ } else {
+ /* Add it to the hashes. */
+ VERIFY(mod_hash_insert(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)lalr) == 0);
+ VERIFY(mod_hash_insert(data->lav_path_hash,
+ (mod_hash_key_t)i_strdup(nm),
+ (mod_hash_val_t)lalr) == 0);
+
+ *dup_request = 0;
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (lalr);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_find(lx_autofs_vfs_t *data, int id)
+{
+ lx_autofs_lookup_req_t *lalr;
+
+ /* Check for an outstanding request for this id. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id,
+ (mod_hash_val_t *)&lalr) != 0) {
+ mutex_exit(&data->lav_lock);
+ return (NULL);
+ }
+ atomic_add_int(&lalr->lalr_ref, 1);
+ mutex_exit(&data->lav_lock);
+ return (lalr);
+}
+
+static void
+i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /* Remove this request from the hashes so no one can look it up. */
+ mutex_enter(&data->lav_lock);
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* Mark this requst as complete and wakeup anyone waiting on it. */
+ mutex_enter(&lalr->lalr_lock);
+ lalr->lalr_complete = 1;
+ cv_broadcast(&lalr->lalr_cv);
+ mutex_exit(&lalr->lalr_lock);
+}
+
+static void
+i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ ASSERT(!MUTEX_HELD(&lalr->lalr_lock));
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0)
+ return;
+ ASSERT(lalr->lalr_ref == 0);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static void
+i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /*
+ * This is a little tricky. We're aborting the wait for this
+ * request. So if anyone else is waiting for this request we
+ * can't free it, but if no one else is waiting for the request
+ * we should free it.
+ */
+ mutex_enter(&data->lav_lock);
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) {
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+ ASSERT(lalr->lalr_ref == 0);
+
+ /* Remove this request from the hashes so no one can look it up. */
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* It's ok to free this now because the ref count was zero. */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static int
+i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_wr, *ufp_rd = NULL;
+ file_t *fp_wr, *fp_rd = NULL;
+ vnode_t *vp_wr, *vp_rd;
+ int i;
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /* Sanity check fifo write fd. */
+ if (fd >= fip->fi_nfiles) {
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /* Get a pointer to the write fifo. */
+ UF_ENTER(ufp_wr, fip, fd);
+ if (((fp_wr = ufp_wr->uf_file) == NULL) ||
+ ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) {
+ /* Invalid fifo fd. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ vp_rd = fifo_peer_vp(fp_wr->f_vnode);
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * We need to drop fi_lock before we can try to acquire f_tlock
+ * the good news is that the file pointers are protected because
+ * we're still holding uf_lock.
+ */
+ mutex_exit(&fip->fi_lock);
+
+ /*
+ * Here we bump the open counts on the fifos. The reason
+ * that we do this is because when we go to write to the
+ * fifo we want to ensure that they are actually open (and
+ * not in the process of being closed) without having to
+ * stop the automounter. (If the write end of the fifo
+ * were closed and we tried to write to it we would panic.
+ * If the read end of the fifo was closed and we tried to
+ * write to the other end, the process that invoked the
+ * lookup operation would get an unexpected SIGPIPE.)
+ */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ ASSERT(fp_wr->f_count >= 2);
+ mutex_exit(&fp_wr->f_tlock);
+
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ ASSERT(fp_rd->f_count >= 2);
+ mutex_exit(&fp_rd->f_tlock);
+
+ /* Release all our locks. */
+ UF_EXIT(ufp_wr);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ /* Return the file pointers. */
+ *fpp_rd = fp_rd;
+ *fpp_wr = fp_wr;
+ return (0);
+}
+
+static uint_t
+/*ARGSUSED*/
+i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ int *id = (int *)arg;
+ /* Return the key and terminate the walk. */
+ *id = (uintptr_t)key;
+ return (MH_WALK_TERMINATE);
+}
+
+static void
+i_fifo_close(lx_autofs_vfs_t *data)
+{
+ /*
+ * Close the fifo to prevent any future requests from
+ * getting sent to the automounter.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr != NULL) {
+ (void) closef(data->lav_fifo_wr);
+ data->lav_fifo_wr = NULL;
+ }
+ if (data->lav_fifo_rd != NULL) {
+ (void) closef(data->lav_fifo_rd);
+ data->lav_fifo_rd = NULL;
+ }
+ mutex_exit(&data->lav_lock);
+
+ /*
+ * Wakeup any threads currently waiting for the automounter
+ * note that it's possible for multiple threads to have entered
+ * this function and to be doing the work below simultaneously.
+ */
+ for (;;) {
+ lx_autofs_lookup_req_t *lalr;
+ int id;
+
+ /* Lookup the first entry in the hash. */
+ id = -1;
+ mod_hash_walk(data->lav_id_hash,
+ i_fifo_close_cb, &id);
+ if (id == -1) {
+ /* No more id's in the hash. */
+ break;
+ }
+ if ((lalr = i_lalr_find(data, id)) == NULL) {
+ /* Someone else beat us to it. */
+ continue;
+ }
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ }
+}
+
+static int
+i_fifo_verify_rd(lx_autofs_vfs_t *data)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_rd = NULL;
+ file_t *fp_rd = NULL;
+ vnode_t *vp_rd;
+ int i;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+
+ /* Check if we've already been shut down. */
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ return (-1);
+ }
+ vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode);
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(data->lav_pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Seems the automounter still has the read end of the fifo
+ * open, we're done here. Release all our locks and exit.
+ */
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ return (0);
+}
+
+static int
+i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap)
+{
+ struct uio uio;
+ struct iovec iov;
+ file_t *fp_wr, *fp_rd;
+ int error;
+
+ /*
+ * The catch here is we need to make sure _we_ don't close
+ * the the fifo while writing to it. (Another thread could come
+ * along and realize the automounter process is gone and close
+ * the fifo. To do this we bump the open count before we
+ * write to the fifo.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ fp_wr = data->lav_fifo_wr;
+ fp_rd = data->lav_fifo_rd;
+
+ /* Bump the open count on the write fifo. */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ mutex_exit(&fp_wr->f_tlock);
+
+ /* Bump the open count on the read fifo. */
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ mutex_exit(&fp_rd->f_tlock);
+
+ mutex_exit(&data->lav_lock);
+
+ iov.iov_base = (caddr_t)lap;
+ iov.iov_len = sizeof (*lap);
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_loffset = 0;
+ uio.uio_segflg = (short)UIO_SYSSPACE;
+ uio.uio_resid = sizeof (*lap);
+ uio.uio_llimit = 0;
+ uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK;
+
+ error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL);
+ (void) closef(fp_wr);
+ (void) closef(fp_rd);
+
+ /*
+ * After every write we verify that the automounter still has
+ * these files open.
+ */
+ mutex_enter(&data->lav_lock);
+ if (i_fifo_verify_rd(data) != 0) {
+ /*
+ * Something happened to the automounter.
+ * Close down the communication pipe we setup.
+ */
+ mutex_exit(&data->lav_lock);
+ i_fifo_close(data);
+ if (error != 0)
+ return (error);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (error);
+}
+
+static int
+i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack)
+{
+ struct iovec iov;
+ struct uio uio;
+ dirent64_t *dp, *dbuf;
+ vnode_t *vp;
+ size_t dlen, dbuflen;
+ int eof, error, ndirents = 64;
+ char *nm;
+
+ dlen = ndirents * (sizeof (*dbuf));
+ dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ uio.uio_loffset = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ eof = 0;
+ error = 0;
+ while (!error && !eof) {
+ uio.uio_resid = dlen;
+ iov.iov_base = (char *)dbuf;
+ iov.iov_len = dlen;
+
+ (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ if (VOP_READDIR(dvp, &uio, kcred, &eof, NULL, 0) != 0) {
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+
+ if ((dbuflen = dlen - uio.uio_resid) == 0) {
+ /* We're done. */
+ break;
+ }
+
+ for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
+
+ nm = dp->d_name;
+
+ if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
+ continue;
+
+ if (VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ if (vp->v_type == VDIR) {
+ if (dir_stack != NULL) {
+ i_stack_push(dir_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ } else {
+ if (file_stack != NULL) {
+ i_stack_push(file_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ }
+ }
+ }
+ kmem_free(dbuf, dlen);
+ return (0);
+}
+
+static void
+i_bs_destroy(vnode_t *dvp, char *path)
+{
+ list_t search_stack;
+ list_t dir_stack;
+ list_t file_stack;
+ vnode_t *pdvp, *vp;
+ char *dpath, *fpath;
+ int ret;
+
+ if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ /* A directory entry with this name doesn't actually exist. */
+ return;
+ }
+
+ if ((vp->v_type & VDIR) == 0) {
+ /* Easy, the directory entry is a file so delete it. */
+ VN_RELE(vp);
+ (void) VOP_REMOVE(dvp, path, kcred, NULL, 0);
+ return;
+ }
+
+ /*
+ * The directory entry is a subdirectory, now we have a bit more
+ * work to do. (We'll have to recurse into the sub directory.)
+ * It would have been much easier to do this recursively but kernel
+ * stacks are notoriously small.
+ */
+ i_stack_init(&search_stack);
+ i_stack_init(&dir_stack);
+ i_stack_init(&file_stack);
+
+ /* Save our newfound subdirectory into a list. */
+ i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path));
+
+ /* Do a recursive depth first search into the subdirectories. */
+ while (i_stack_pop(&search_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the subdirectories in this directory. */
+ if (i_bs_readdir(dvp, &search_stack, NULL) != 0)
+ goto exit;
+
+ /* Save the current directory a separate stack. */
+ i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath);
+ }
+
+ /*
+ * Now dir_stack contains a list of directories, the deepest paths
+ * are at the top of the list. So let's go through and process them.
+ */
+ while (i_stack_pop(&dir_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the files in this directory. */
+ if (i_bs_readdir(dvp, NULL, &file_stack) != 0) {
+ VN_RELE(dvp);
+ i_strfree(dpath);
+ goto exit;
+ }
+
+ /* Delete all the files in this directory. */
+ while (i_stack_pop(&file_stack,
+ NULL, (caddr_t *)&vp, &fpath) == 0) {
+ VN_RELE(vp)
+ ret = VOP_REMOVE(dvp, fpath, kcred, NULL, 0);
+ i_strfree(fpath);
+ if (ret != 0) {
+ i_strfree(dpath);
+ goto exit;
+ }
+ }
+
+ /* Delete this directory. */
+ VN_RELE(dvp);
+ ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred, NULL, 0);
+ i_strfree(dpath);
+ if (ret != 0)
+ goto exit;
+ }
+
+exit:
+ while (
+ (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) {
+ VN_RELE(vp);
+ i_strfree(path);
+ }
+ i_stack_fini(&search_stack);
+ i_stack_fini(&dir_stack);
+ i_stack_fini(&file_stack);
+}
+
+static vnode_t *
+i_bs_create(vnode_t *dvp, char *bs_name)
+{
+ vnode_t *vp;
+ vattr_t vattr;
+
+ /*
+ * After looking at the mkdir syscall path it seems we don't need
+ * to initialize all of the vattr_t structure.
+ */
+ bzero(&vattr, sizeof (vattr));
+ vattr.va_type = VDIR;
+ vattr.va_mode = 0755; /* u+rwx,og=rx */
+ vattr.va_mask = AT_TYPE|AT_MODE;
+
+ if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred, NULL, 0, NULL) != 0)
+ return (NULL);
+ return (vp);
+}
+
+static int
+i_automounter_call(vnode_t *dvp, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr;
+ lx_autofs_vfs_t *data;
+ int error, dup_request;
+
+ /* Get a pointer to the vfs mount data. */
+ data = dvp->v_vfsp->vfs_data;
+
+ /* The automounter only support queries in the root directory. */
+ if (dvp != data->lav_root)
+ return (ENOENT);
+
+ /*
+ * Check if the current process is in the automounters process
+ * group. (If it is, the current process is either the autmounter
+ * itself or one of it's forked child processes.) If so, don't
+ * redirect this lookup back into the automounter because we'll
+ * hang.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp == curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ /* Verify that the automount process pipe still exists. */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Allocate an automounter request structure. */
+ if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL)
+ return (ENOENT);
+
+ /*
+ * If we were the first one to allocate this request then we
+ * need to send it to the automounter.
+ */
+ if ((!dup_request) &&
+ ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) {
+ /*
+ * Unable to send the request to the automounter.
+ * Unblock any other threads waiting on the request
+ * and release the request.
+ */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (error);
+ }
+
+ /* Wait for someone to signal us that this request has compleated. */
+ mutex_enter(&lalr->lalr_lock);
+ while (!lalr->lalr_complete) {
+ if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) {
+ /* We got a signal, abort this lookup. */
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_abort(data, lalr);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_release(data, lalr);
+
+ return (0);
+}
+
+static int
+i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+ /*
+ * Be strict.
+ * We only accept ioctls from the automounter process group.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp != curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) {
+ lx_autofs_lookup_req_t *lalr;
+ int id = arg;
+
+ /*
+ * We don't actually care if the request failed or succeeded.
+ * We do the same thing either way.
+ */
+ if ((lalr = i_lalr_find(data, id)) == NULL)
+ return (ENXIO);
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (0);
+ }
+ if (cmd == LX_AUTOFS_IOC_CATATONIC) {
+ /* The automounter is shutting down. */
+ i_fifo_close(data);
+ return (0);
+ }
+ return (ENOTSUP);
+}
+
+static int
+i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data)
+{
+ char *fd_str, *pgrp_str, *minproto_str, *maxproto_str;
+ int fd, pgrp, minproto, maxproto;
+ file_t *fp_wr, *fp_rd;
+
+ /* Require all options to be present. */
+ if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1))
+ return (EINVAL);
+
+ /* Get the values for each parameter. */
+ if ((i_str_to_int(fd_str, &fd) != 0) ||
+ (i_str_to_int(pgrp_str, &pgrp) != 0) ||
+ (i_str_to_int(minproto_str, &minproto) != 0) ||
+ (i_str_to_int(maxproto_str, &maxproto) != 0))
+ return (EINVAL);
+
+ /*
+ * We support v2 of the linux kernel automounter protocol.
+ * Make sure the mount request we got indicates support
+ * for this version of the protocol.
+ */
+ if ((minproto > 2) || (maxproto < 2))
+ return (EINVAL);
+
+ /*
+ * Now we need to lookup the fifos we'll be using
+ * to talk to the userland automounter process.
+ */
+ if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0)
+ return (EINVAL);
+
+ /* Save the mount options and fifo pointers. */
+ data->lav_fd = fd;
+ data->lav_pgrp = pgrp;
+ data->lav_fifo_rd = fp_rd;
+ data->lav_fifo_wr = fp_wr;
+ return (0);
+}
+
+/*
+ * VFS entry points
+ */
+static int
+lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ lx_autofs_vfs_t *data;
+ dev_t dev;
+ char name[40];
+ int error;
+
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
+ return (EBUSY);
+
+ /* We don't support mountes in the global zone. */
+ if (getzoneid() == GLOBAL_ZONEID)
+ return (EPERM);
+
+ /* We don't support mounting on top of ourselves. */
+ if (vn_matchops(mvp, lx_autofs_vn_ops))
+ return (EPERM);
+
+ /* Allocate a vfs struct. */
+ data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP);
+
+ /* Parse mount options. */
+ if ((error = i_parse_mntopt(vfsp, data)) != 0) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (error);
+ }
+
+ /* Initialize the backing store. */
+ i_bs_destroy(mvp, LX_AUTOFS_BS_DIR);
+ if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (EBUSY);
+ }
+ data->lav_bs_name = LX_AUTOFS_BS_DIR;
+
+ /* We have to hold the underlying vnode we're mounted on. */
+ data->lav_mvp = mvp;
+ VN_HOLD(mvp);
+
+ /* Initialize vfs fields */
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lx_autofs_fstype;
+ vfsp->vfs_data = data;
+
+ /* Invent a dev_t (sigh) */
+ do {
+ dev = makedevice(lx_autofs_major,
+ atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32);
+ } while (vfs_devismounted(dev));
+ vfsp->vfs_dev = dev;
+ vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype);
+
+ /* Create an id space arena for automounter requests. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_ids = id_space_create(name, 1, INT_MAX);
+
+ /* Create hashes to keep track of automounter requests. */
+ mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL);
+ (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_path_hash = mod_hash_create_strhash(name,
+ LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor);
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_id_hash = mod_hash_create_idhash(name,
+ LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor);
+
+ /* Create a hash to keep track of vnodes. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_vn_hash = mod_hash_create_ptrhash(name,
+ LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor,
+ sizeof (vnode_t));
+
+ /* Create root vnode */
+ data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp);
+ data->lav_root->v_flag |=
+ VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+ return (0);
+}
+
+static int
+lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+
+ if (secpolicy_fs_unmount(cr, vfsp) != 0)
+ return (EPERM);
+
+ /* We do not currently support forced unmounts. */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ /*
+ * We should never have a reference count of less than 2: one for the
+ * caller, one for the root vnode.
+ */
+ ASSERT(vfsp->vfs_count >= 2);
+
+ /* If there are any outstanding vnodes, we can't unmount. */
+ if (vfsp->vfs_count > 2)
+ return (EBUSY);
+
+ /* Check for any remaining holds on the root vnode. */
+ data = vfsp->vfs_data;
+ ASSERT(data->lav_root->v_vfsp == vfsp);
+ if (data->lav_root->v_count > 1)
+ return (EBUSY);
+
+ /* Close the fifo to the automount process. */
+ if (data->lav_fifo_wr != NULL)
+ (void) closef(data->lav_fifo_wr);
+ if (data->lav_fifo_rd != NULL)
+ (void) closef(data->lav_fifo_rd);
+
+ /*
+ * We have to release our hold on our root vnode before we can
+ * delete the backing store. (Since the root vnode is linked
+ * to the backing store.)
+ */
+ VN_RELE(data->lav_root);
+
+ /* Cleanup the backing store. */
+ i_bs_destroy(data->lav_mvp, data->lav_bs_name);
+ VN_RELE(data->lav_mvp);
+
+ /* Cleanup out remaining data structures. */
+ mod_hash_destroy_strhash(data->lav_path_hash);
+ mod_hash_destroy_idhash(data->lav_id_hash);
+ mod_hash_destroy_ptrhash(data->lav_vn_hash);
+ id_space_destroy(data->lav_ids);
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+
+ return (0);
+}
+
+static int
+lx_autofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+
+ *vpp = data->lav_root;
+ VN_HOLD(*vpp);
+
+ return (0);
+}
+
+static int
+lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *urvp = data->lav_root->v_data;
+ dev32_t d32;
+ int error;
+
+ if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0)
+ return (error);
+
+ /* Update some of values before returning. */
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
+ sizeof (sp->f_basetype));
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+ return (0);
+}
+
+static const fs_operation_def_t lx_autofs_vfstops[] = {
+ { VFSNAME_MOUNT, { .vfs_mount = lx_autofs_mount } },
+ { VFSNAME_UNMOUNT, { .vfs_unmount = lx_autofs_unmount } },
+ { VFSNAME_ROOT, { .vfs_root = lx_autofs_root } },
+ { VFSNAME_STATVFS, { .vfs_statvfs = lx_autofs_statvfs } },
+ { NULL, NULL }
+};
+
+/*
+ * VOP entry points - simple passthrough
+ *
+ * For most VOP entry points we can simply pass the request on to
+ * the underlying filesystem we're mounted on.
+ */
+static int
+lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_CLOSE(uvp, flag, count, offset, cr, ctp));
+}
+
+static int
+lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_READDIR(uvp, uiop, cr, eofp, ctp, flags));
+}
+
+static int
+lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_ACCESS(uvp, mode, flags, cr, ctp));
+}
+
+static int
+lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_RWLOCK(uvp, write_lock, ctp));
+}
+
+static void
+lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ VOP_RWUNLOCK(uvp, write_lock, ctp);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *udvp = dvp->v_data;
+
+ /*
+ * cdir is the calling processes current directory.
+ * If cdir is lx_autofs vnode then get its real underlying
+ * vnode ptr. (It seems like the only thing cdir is
+ * ever used for is to make sure the user doesn't delete
+ * their current directory.)
+ */
+ if (vn_matchops(cdir, lx_autofs_vn_ops)) {
+ vnode_t *ucdir = cdir->v_data;
+ return (VOP_RMDIR(udvp, nm, ucdir, cr, ctp, flags));
+ }
+
+ return (VOP_RMDIR(udvp, nm, cdir, cr, ctp, flags));
+}
+
+/*
+ * VOP entry points - special passthrough
+ *
+ * For some VOP entry points we will first pass the request on to
+ * the underlying filesystem we're mounted on. If there's an error
+ * then we immediately return the error, but if the request succeeds
+ * we have to do some extra work before returning.
+ */
+static int
+lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ctp)
+{
+ vnode_t *ovp = *vpp;
+ vnode_t *uvp = ovp->v_data;
+ int error;
+
+ if ((error = VOP_OPEN(&uvp, flag, cr, ctp)) != 0)
+ return (error);
+
+ /* Check for clone opens. */
+ if (uvp == ovp->v_data)
+ return (0);
+
+ /* Deal with clone opens by returning a new vnode. */
+ *vpp = i_vn_alloc(ovp->v_vfsp, uvp);
+ VN_RELE(ovp);
+ return (0);
+}
+
+static int
+lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ int error;
+
+ if ((error = VOP_GETATTR(uvp, vap, flags, cr, ctp)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ return (0);
+}
+
+static int
+lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp,
+ cred_t *cr, caller_context_t *ctp, int flags, vsecattr_t *vsecp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr,
+ ctp, flags, vsecp)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = dvp->v_vfsp->vfs_dev;
+
+ /* Allocate a new vnode. */
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+}
+
+/*
+ * VOP entry points - custom
+ */
+/*ARGSUSED*/
+static void
+lx_autofs_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ctp)
+{
+ lx_autofs_vfs_t *data = vp->v_vfsp->vfs_data;
+
+ /*
+ * We need to hold the vfs lock because if we're going to free
+ * this vnode we have to prevent anyone from looking it up
+ * in the vnode hash.
+ */
+ mutex_enter(&data->lav_lock);
+ mutex_enter(&vp->v_lock);
+
+ if (vp->v_count < 1) {
+ panic("lx_autofs_inactive: bad v_count");
+ /*NOTREACHED*/
+ }
+
+ /* Drop the temporary hold by vn_rele now. */
+ if (--vp->v_count > 0) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+
+ /*
+ * No one should have been blocked on this lock because we're
+ * about to free this vnode.
+ */
+ i_vn_free(vp);
+}
+
+static int
+lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ctp,
+ int *direntflags, pathname_t *realpnp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ /* First try to lookup if this path component already exitst. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp,
+ direntflags, realpnp)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+
+ /* Only query the automounter if the path does not exist. */
+ if (error != ENOENT)
+ return (error);
+
+ /* Refer the lookup to the automounter. */
+ if ((error = i_automounter_call(dvp, nm)) != 0)
+ return (error);
+
+ /* Retry the lookup operation. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp,
+ direntflags, realpnp)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rvalp, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+
+ /* Intercept certain ioctls. */
+ switch ((uint_t)cmd) {
+ case LX_AUTOFS_IOC_READY:
+ case LX_AUTOFS_IOC_FAIL:
+ case LX_AUTOFS_IOC_CATATONIC:
+ case LX_AUTOFS_IOC_EXPIRE:
+ case LX_AUTOFS_IOC_PROTOVER:
+ case LX_AUTOFS_IOC_SETTIMEOUT:
+ return (i_automounter_ioctl(vp, cmd, arg));
+ }
+
+ /* Pass any remaining ioctl on. */
+ return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp, ctp));
+}
+
+/*
+ * VOP entry points definitions
+ */
+static const fs_operation_def_t lx_autofs_tops_root[] = {
+ { VOPNAME_OPEN, { .vop_open = lx_autofs_open } },
+ { VOPNAME_CLOSE, { .vop_close = lx_autofs_close } },
+ { VOPNAME_IOCTL, { .vop_ioctl = lx_autofs_ioctl } },
+ { VOPNAME_RWLOCK, { .vop_rwlock = lx_autofs_rwlock } },
+ { VOPNAME_RWUNLOCK, { .vop_rwunlock = lx_autofs_rwunlock } },
+ { VOPNAME_GETATTR, { .vop_getattr = lx_autofs_getattr } },
+ { VOPNAME_ACCESS, { .vop_access = lx_autofs_access } },
+ { VOPNAME_READDIR, { .vop_readdir = lx_autofs_readdir } },
+ { VOPNAME_LOOKUP, { .vop_lookup = lx_autofs_lookup } },
+ { VOPNAME_INACTIVE, { .vop_inactive = lx_autofs_inactive } },
+ { VOPNAME_MKDIR, { .vop_mkdir = lx_autofs_mkdir } },
+ { VOPNAME_RMDIR, { .vop_rmdir = lx_autofs_rmdir } },
+ { NULL }
+};
+
+/*
+ * lx_autofs_init() gets invoked via the mod_install() call in
+ * this modules _init() routine. Therefor, the code that cleans
+ * up the structures we allocate below is actually found in
+ * our _fini() routine.
+ */
+/* ARGSUSED */
+static int
+lx_autofs_init(int fstype, char *name)
+{
+ int error;
+
+ if ((lx_autofs_major =
+ (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) {
+
+ if ((lx_autofs_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't get unique device number");
+ return (EAGAIN);
+ }
+
+ if (space_store(LX_AUTOFS_SPACE_KEY_UDEV,
+ (uintptr_t)lx_autofs_major) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't save unique device number");
+ return (EAGAIN);
+ }
+ }
+
+ lx_autofs_fstype = fstype;
+ if ((error = vfs_setfsops(
+ fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template");
+ return (error);
+ }
+
+ if ((error = vn_make_ops("lx_autofs vnode ops",
+ lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) {
+ VERIFY(vfs_freevfsops_by_type(fstype) == 0);
+ lx_autofs_vn_ops = NULL;
+ return (error);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Module linkage
+ */
+static mntopt_t lx_autofs_mntopt[] = {
+ { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE }
+};
+
+static mntopts_t lx_autofs_mntopts = {
+ sizeof (lx_autofs_mntopt) / sizeof (mntopt_t),
+ lx_autofs_mntopt
+};
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ LX_AUTOFS_NAME,
+ lx_autofs_init,
+ VSW_HASPROTO | VSW_VOLATILEDEV,
+ &lx_autofs_mntopts
+};
+
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "linux autofs filesystem", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ if (lx_autofs_vn_ops != NULL) {
+ vn_freevnodeops(lx_autofs_vn_ops);
+ lx_autofs_vn_ops = NULL;
+ }
+
+ /*
+ * In our init routine, if we get an error after calling
+ * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type().
+ * But we don't need to call vfs_freevfsops_by_type() here
+ * because the fs framework did this for us as part of the
+ * mod_remove() call above.
+ */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
new file mode 100644
index 0000000000..b4e38f062a
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
@@ -0,0 +1,1196 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+#include <sys/modctl.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/frame.h>
+#include <sys/dtrace.h>
+#include <sys/dtrace_impl.h>
+
+#include <sys/lx_impl.h>
+
+/*
+ * We store the syscall number in the low 16 bits (which limits us to 64k
+ * syscalls). The next bit indicates entry/return probe and the next bit
+ * indicates 64bit/32bit syscall.
+ */
+#define SCALL_MASK 0xffff
+#define ENTRY_FLAG 0x10000
+#define SYSC_64_BIT 0x100000
+
+#define LX_SYSTRACE_IS64BIT(x) ((int)(x) & SYSC_64_BIT)
+#define LX_SYSTRACE_ISENTRY(x) ((int)(x) & ENTRY_FLAG)
+#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & SCALL_MASK)
+
+#define LX_SYSTRACE32_ENTRY(id) (ENTRY_FLAG | (id))
+#define LX_SYSTRACE32_RETURN(id) (id)
+
+#define LX_SYSTRACE64_ENTRY(id) (SYSC_64_BIT | ENTRY_FLAG | (id))
+#define LX_SYSTRACE64_RETURN(id) (SYSC_64_BIT | id)
+
+#define LX_SYSTRACE_ENTRY_AFRAMES 2
+#define LX_SYSTRACE_RETURN_AFRAMES 4
+
+typedef struct lx_sys_names {
+ char *sy_name;
+} lx_sys_names_t;
+
+static lx_sys_names_t lx_sysnames32[] =
+{
+ {"lx_nosys"}, /* 0 */
+ {"exit"}, /* 1 */
+ {"lx_fork"},
+ {"read"},
+ {"write"},
+ {"open"},
+ {"close"},
+ {"waitpid"},
+ {"creat"},
+ {"link"},
+ {"unlink"}, /* 10 */
+ {"exec"},
+ {"chdir"},
+ {"gtime"},
+ {"mknod"},
+ {"chmod"},
+ {"lchown16"},
+ {"break"},
+ {"stat"},
+ {"lseek"},
+ {"getpid"}, /* 20 */
+ {"mount"},
+ {"umount"},
+ {"setuid16"},
+ {"getuid16"},
+ {"stime"},
+ {"ptrace"},
+ {"alarm"},
+ {"fstat"},
+ {"pause"},
+ {"utime"}, /* 30 */
+ {"stty"},
+ {"gtty"},
+ {"access"},
+ {"nice"},
+ {"ftime"},
+ {"sync"},
+ {"kill"},
+ {"rename"},
+ {"mkdir"},
+ {"rmdir"}, /* 40 */
+ {"dup"},
+ {"pipe"},
+ {"times"},
+ {"prof"},
+ {"brk"},
+ {"setgid16"},
+ {"getgid16"},
+ {"signal"},
+ {"geteuid16"},
+ {"getegid16"}, /* 50 */
+ {"sysacct"},
+ {"umount2"},
+ {"lock"},
+ {"ioctl"},
+ {"fcntl"},
+ {"mpx"},
+ {"setpgid"},
+ {"ulimit"},
+ {"olduname"},
+ {"umask"}, /* 60 */
+ {"chroot"},
+ {"ustat"},
+ {"dup2"},
+ {"getppid"},
+ {"pgrp"},
+ {"setsid"},
+ {"sigaction"},
+ {"sgetmask"},
+ {"ssetmask"},
+ {"setreuid16"}, /* 70 */
+ {"setregid16"},
+ {"sigsuspend"},
+ {"sigpending"},
+ {"sethostname"},
+ {"setrlimit"},
+ {"old_getrlimit"},
+ {"getrusage"},
+ {"gettimeofday"},
+ {"settimeofday"},
+ {"getgroups16"}, /* 80 */
+ {"setgroups16"},
+ {"old_select"},
+ {"symlink"},
+ {"oldlstat"},
+ {"readlink"},
+ {"uselib"},
+ {"swapon"},
+ {"reboot"},
+ {"old_readdir"},
+ {"old_mmap"}, /* 90 */
+ {"munmap"},
+ {"truncate"},
+ {"ftruncate"},
+ {"fchmod"},
+ {"fchown16"},
+ {"getpriority"},
+ {"setpriority"},
+ {"profil"},
+ {"statfs"},
+ {"fstatfs"}, /* 100 */
+ {"ioperm"},
+ {"socketcall"},
+ {"syslog"},
+ {"setitimer"},
+ {"getitimer"},
+ {"newstat"},
+ {"newsltat"},
+ {"newsftat"},
+ {"uname"},
+ {"oldiopl"}, /* 110 */
+ {"oldvhangup"},
+ {"idle"},
+ {"vm86old"},
+ {"wait4"},
+ {"swapoff"},
+ {"sysinfo"},
+ {"ipc"},
+ {"fsync"},
+ {"sigreturn"},
+ {"clone"}, /* 120 */
+ {"setdomainname"},
+ {"newuname"},
+ {"modify_ldt"},
+ {"adjtimex"},
+ {"mprotect"},
+ {"sigprocmask"},
+ {"create_module"},
+ {"init_module"},
+ {"delete_module"},
+ {"get_kernel_syms"}, /* 130 */
+ {"quotactl"},
+ {"getpgid"},
+ {"fchdir"},
+ {"bdflush"},
+ {"sysfs"},
+ {"personality"},
+ {"afs_syscall"},
+ {"setfsuid16"},
+ {"setfsgid16"},
+ {"llseek"}, /* 140 */
+ {"getdents"},
+ {"select"},
+ {"flock"},
+ {"msync"},
+ {"readv"},
+ {"writev"},
+ {"getsid"},
+ {"fdatasync"},
+ {"sysctl"},
+ {"mlock"}, /* 150 */
+ {"munlock"},
+ {"mlockall"},
+ {"munlockall"},
+ {"sched_setparam"},
+ {"sched_getparam"},
+ {"sched_setscheduler"},
+ {"sched_getscheduler"},
+ {"yield"},
+ {"sched_get_priority_max"},
+ {"sched_get_priority_min"}, /* 160 */
+ {"sched_rr_get_interval"},
+ {"nanosleep"},
+ {"mremap"},
+ {"setresuid16"},
+ {"getresuid16"},
+ {"vm86"},
+ {"query_module"},
+ {"poll"},
+ {"nfsserctl"},
+ {"setresgid16"}, /* 170 */
+ {"getresgid16"},
+ {"prctl"},
+ {"rt_sigreturn"},
+ {"rt_sigaction"},
+ {"rt_sigprocmask"},
+ {"rt_sigpending"},
+ {"rt_sigtimedwait"},
+ {"rt_sigqueueinfo"},
+ {"rt_sigsuspend"},
+ {"pread64"}, /* 180 */
+ {"pwrite64"},
+ {"chown16"},
+ {"getcwd"},
+ {"capget"},
+ {"capset"},
+ {"sigaltstack"},
+ {"sendfile"},
+ {"getpmsg"},
+ {"putpmsg"},
+ {"vfork"}, /* 190 */
+ {"getrlimit"},
+ {"mmap2"},
+ {"truncate64"},
+ {"ftruncate64"},
+ {"stat64"},
+ {"lstat64"},
+ {"fstat64"},
+ {"lchown"},
+ {"getuid"},
+ {"getgid"}, /* 200 */
+ {"geteuid"},
+ {"getegid"},
+ {"setreuid"},
+ {"setregid"},
+ {"getgroups"},
+ {"setgroups"},
+ {"fchown"},
+ {"setresuid"},
+ {"getresuid"},
+ {"setresgid"}, /* 210 */
+ {"getresgid"},
+ {"chown"},
+ {"setuid"},
+ {"setgid"},
+ {"setfsuid"},
+ {"setfsgid"},
+ {"pivot_root"},
+ {"mincore"},
+ {"madvise"},
+ {"getdents64"}, /* 220 */
+ {"fcntl64"},
+ {"lx_nosys"},
+ {"security"},
+ {"gettid"},
+ {"readahead"},
+ {"setxattr"},
+ {"lsetxattr"},
+ {"fsetxattr"},
+ {"getxattr"},
+ {"lgetxattr"}, /* 230 */
+ {"fgetxattr"},
+ {"listxattr"},
+ {"llistxattr"},
+ {"flistxattr"},
+ {"removexattr"},
+ {"lremovexattr"},
+ {"fremovexattr"},
+ {"tkill"},
+ {"sendfile64"},
+ {"futex"}, /* 240 */
+ {"sched_setaffinity"},
+ {"sched_getaffinity"},
+ {"set_thread_area"},
+ {"get_thread_area"},
+ {"io_setup"},
+ {"io_destroy"},
+ {"io_getevents"},
+ {"io_submit"},
+ {"io_cancel"},
+ {"fadvise64"}, /* 250 */
+ {"lx_nosys"},
+ {"exit_group"},
+ {"lookup_dcookie"},
+ {"epoll_create"},
+ {"epoll_ctl"},
+ {"epoll_wait"},
+ {"remap_file_pages"},
+ {"set_tid_address"},
+ {"timer_create"},
+ {"timer_settime"}, /* 260 */
+ {"timer_gettime"},
+ {"timer_getoverrun"},
+ {"timer_delete"},
+ {"clock_settime"},
+ {"clock_gettime"},
+ {"clock_getres"},
+ {"clock_nanosleep"},
+ {"statfs64"},
+ {"fstatfs64"},
+ {"tgkill"}, /* 270 */
+ /* The following are Linux 2.6 system calls */
+ {"utimes"},
+ {"fadvise64_64"},
+ {"vserver"},
+ {"mbind"},
+ {"get_mempolicy"},
+ {"set_mempolicy"},
+ {"mq_open"},
+ {"mq_unlink"},
+ {"mq_timedsend"},
+ {"mq_timedreceive"}, /* 280 */
+ {"mq_notify"},
+ {"mq_getsetattr"},
+ {"kexec_load"},
+ {"waitid"},
+ {"sys_setaltroot"},
+ {"add_key"},
+ {"request_key"},
+ {"keyctl"},
+ {"ioprio_set"},
+ {"ioprio_get"}, /* 290 */
+ {"inotify_init"},
+ {"inotify_add_watch"},
+ {"inotify_rm_watch"},
+ {"migrate_pages"},
+ {"openat"},
+ {"mkdirat"},
+ {"mknodat"},
+ {"fchownat"},
+ {"futimesat"},
+ {"fstatat64"}, /* 300 */
+ {"unlinkat"},
+ {"renameat"},
+ {"linkat"},
+ {"syslinkat"},
+ {"readlinkat"},
+ {"fchmodat"},
+ {"faccessat"},
+ {"pselect6"},
+ {"ppoll"},
+ {"unshare"}, /* 310 */
+ {"set_robust_list"},
+ {"get_robust_list"},
+ {"splice"},
+ {"sync_file_range"},
+ {"tee"},
+ {"vmsplice"},
+ {"move_pages"},
+ {"getcpu"},
+ {"epoll_pwait"},
+ {"utimensat"}, /* 320 */
+ {"signalfd"},
+ {"timerfd_create"},
+ {"eventfd"},
+ {"fallocate"},
+ {"timerfd_settime"},
+ {"timerfd_gettime"},
+ {"signalfd4"},
+ {"eventfd2"},
+ {"epoll_create1"},
+ {"dup3"}, /* 330 */
+ {"pipe2"},
+ {"inotify_init1"},
+ {"preadv"},
+ {"pwritev"},
+ {"rt_tgsigqueueinfo"},
+ {"perf_event_open"},
+ {"recvmmsg"},
+ {"fanotify_init"},
+ {"fanotify_mark"},
+ {"prlimit64"}, /* 340 */
+ {"name_to_handle_at"},
+ {"open_by_handle_at"},
+ {"clock_adjtime"},
+ {"syncfs"},
+ {"sendmmsg"},
+ {"setns"},
+ {"process_vm_readv"},
+ {"process_vm_writev"},
+ {"kcmp"},
+ {"finit_module"}, /* 350 */
+ {"sched_setattr"},
+ {"sched_getattr"},
+ NULL /* NULL-termination is required for lx_systrace */
+};
+
+#if defined(_LP64)
+static lx_sys_names_t lx_sysnames64[] =
+{
+ {"read"}, /* 0 */
+ {"write"},
+ {"open"},
+ {"close"},
+ {"stat"},
+ {"fstat"},
+ {"lstat"},
+ {"poll"},
+ {"lseek"},
+ {"mmap"},
+ {"mprotect"}, /* 10 */
+ {"munmap"},
+ {"brk"},
+ {"rt_sigaction"},
+ {"rt_sigprocmask"},
+ {"rt_sigreturn"},
+ {"ioctl"},
+ {"pread64"},
+ {"pwrite64"},
+ {"readv"},
+ {"writev"}, /* 20 */
+ {"access"},
+ {"pipe"},
+ {"select"},
+ {"sched_yield"},
+ {"mremap"},
+ {"msync"},
+ {"mincore"},
+ {"madvise"},
+ {"shmget"},
+ {"shmat"}, /* 30 */
+ {"shmctl"},
+ {"dup"},
+ {"dup2"},
+ {"pause"},
+ {"nanosleep"},
+ {"getitimer"},
+ {"alarm"},
+ {"setitimer"},
+ {"getpid"},
+ {"sendfile"}, /* 40 */
+ {"socket"},
+ {"connect"},
+ {"accept"},
+ {"sendto"},
+ {"recvfrom"},
+ {"sendmsg"},
+ {"recvmsg"},
+ {"shutdown"},
+ {"bind"},
+ {"listen"}, /* 50 */
+ {"getsockname"},
+ {"getpeername"},
+ {"socketpair"},
+ {"setsockopt"},
+ {"getsockopt"},
+ {"clone"},
+ {"fork"},
+ {"vfork"},
+ {"execve"},
+ {"exit"}, /* 60 */
+ {"wait4"},
+ {"kill"},
+ {"uname"},
+ {"semget"},
+ {"semop"},
+ {"semctl"},
+ {"shmdt"},
+ {"msgget"},
+ {"msgsnd"},
+ {"msgrcv"}, /* 70 */
+ {"msgctl"},
+ {"fcntl"},
+ {"flock"},
+ {"fsync"},
+ {"fdatasync"},
+ {"truncate"},
+ {"ftruncate"},
+ {"getdents"},
+ {"getcwd"},
+ {"chdir"}, /* 80 */
+ {"fchdir"},
+ {"rename"},
+ {"mkdir"},
+ {"rmdir"},
+ {"creat"},
+ {"link"},
+ {"unlink"},
+ {"symlink"},
+ {"readlink"},
+ {"chmod"}, /* 90 */
+ {"fchmod"},
+ {"chown"},
+ {"fchown"},
+ {"lchown"},
+ {"umask"},
+ {"gettimeofday"},
+ {"getrlimit"},
+ {"getrusage"},
+ {"sysinfo"},
+ {"times"}, /* 100 */
+ {"ptrace"},
+ {"getuid"},
+ {"syslog"},
+ {"getgid"},
+ {"setuid"},
+ {"setgid"},
+ {"geteuid"},
+ {"getegid"},
+ {"setpgid"},
+ {"getppid"}, /* 110 */
+ {"getpgrp"},
+ {"setsid"},
+ {"setreuid"},
+ {"setregid"},
+ {"getgroups"},
+ {"setgroups"},
+ {"setresuid"},
+ {"getresuid"},
+ {"setresgid"},
+ {"getresgid"}, /* 120 */
+ {"getpgid"},
+ {"setfsuid"},
+ {"setfsgid"},
+ {"getsid"},
+ {"capget"},
+ {"capset"},
+ {"rt_sigpending"},
+ {"rt_sigtimedwait"},
+ {"rt_sigqueueinfo"},
+ {"rt_sigsuspend"}, /* 130 */
+ {"sigaltstack"},
+ {"utime"},
+ {"mknod"},
+ {"uselib"},
+ {"personality"},
+ {"ustat"},
+ {"statfs"},
+ {"fstatfs"},
+ {"sysfs"},
+ {"getpriority"}, /* 140 */
+ {"setpriority"},
+ {"sched_setparam"},
+ {"sched_getparam"},
+ {"sched_setscheduler"},
+ {"sched_getscheduler"},
+ {"sched_get_priority_max"},
+ {"sched_get_priority_min"},
+ {"sched_rr_get_interval"},
+ {"mlock"},
+ {"munlock"}, /* 150 */
+ {"mlockall"},
+ {"munlockall"},
+ {"vhangup"},
+ {"modify_ldt"},
+ {"pivot_root"},
+ {"sysctl"},
+ {"prctl"},
+ {"arch_prctl"},
+ {"adjtimex"},
+ {"setrlimit"}, /* 150 */
+ {"chroot"},
+ {"sync"},
+ {"acct"},
+ {"settimeofday"},
+ {"mount"},
+ {"umount2"},
+ {"swapon"},
+ {"swapoff"},
+ {"reboot"},
+ {"sethostname"}, /* 170 */
+ {"setdomainname"},
+ {"iopl"},
+ {"ioperm"},
+ {"create_module"},
+ {"init_module"},
+ {"delete_module"},
+ {"get_kernel_syms"},
+ {"query_module"},
+ {"quotactl"},
+ {"nfsservctl"}, /* 180 */
+ {"getpmsg"},
+ {"putpmsg"},
+ {"afs_syscall"},
+ {"tux"},
+ {"security"},
+ {"gettid"},
+ {"readahead"},
+ {"setxattr"},
+ {"lsetxattr"},
+ {"fsetxattr"}, /* 190 */
+ {"getxattr"},
+ {"lgetxattr"},
+ {"fgetxattr"},
+ {"listxattr"},
+ {"llistxattr"},
+ {"flistxattr"},
+ {"removexattr"},
+ {"lremovexattr"},
+ {"fremovexattr"},
+ {"tkill"}, /* 200 */
+ {"time"},
+ {"futex"},
+ {"sched_setaffinity"},
+ {"sched_getaffinity"},
+ {"set_thread_area"},
+ {"io_setup"},
+ {"io_destroy"},
+ {"io_getevents"},
+ {"io_submit"},
+ {"io_cancel"}, /* 210 */
+ {"get_thread_area"},
+ {"lookup_dcookie"},
+ {"epoll_create"},
+ {"epoll_ctl_old"},
+ {"epoll_wait_old"},
+ {"remap_file_pages"},
+ {"getdents64"},
+ {"set_tid_address"},
+ {"restart_syscall"},
+ {"semtimedop"}, /* 220 */
+ {"fadvise64"},
+ {"timer_create"},
+ {"timer_settime"},
+ {"timer_gettime"},
+ {"timer_getoverrun"},
+ {"timer_delete"},
+ {"clock_settime"},
+ {"clock_gettime"},
+ {"clock_getres"},
+ {"clock_nanosleep"}, /* 230 */
+ {"exit_group"},
+ {"epoll_wait"},
+ {"epoll_ctl"},
+ {"tgkill"},
+ {"utimes"},
+ {"vserver"},
+ {"mbind"},
+ {"set_mempolicy"},
+ {"get_mempolicy"},
+ {"mq_open"}, /* 240 */
+ {"mq_unlink"},
+ {"mq_timedsend"},
+ {"mq_timedreceive"},
+ {"mq_notify"},
+ {"mq_getsetattr"},
+ {"kexec_load"},
+ {"waitid"},
+ {"add_key"},
+ {"request_key"},
+ {"keyctl"}, /* 250 */
+ {"ioprio_set"},
+ {"ioprio_get"},
+ {"inotify_init"},
+ {"inotify_add_watch"},
+ {"inotify_rm_watch"},
+ {"migrate_pages"},
+ {"openat"},
+ {"mkdirat"},
+ {"mknodat"},
+ {"fchownat"}, /* 260 */
+ {"futimesat"},
+ {"fstatat64"},
+ {"unlinkat"},
+ {"renameat"},
+ {"linkat"},
+ {"symlinkat"},
+ {"readlinkat"},
+ {"fchmodat"},
+ {"faccessat"},
+ {"pselect6"}, /* 270 */
+ {"ppoll"},
+ {"unshare"},
+ {"set_robust_list"},
+ {"get_robust_list"},
+ {"splice"},
+ {"tee"},
+ {"sync_file_range"},
+ {"vmsplice"},
+ {"move_pages"},
+ {"utimensat"}, /* 280 */
+ {"epoll_pwait"},
+ {"signalfd"},
+ {"timerfd_create"},
+ {"eventfd"},
+ {"fallocate"},
+ {"timerfd_settime"},
+ {"timerfd_gettime"},
+ {"accept4"},
+ {"signalfd4"},
+ {"eventfd2"}, /* 290 */
+ {"epoll_create1"},
+ {"dup3"},
+ {"pipe2"},
+ {"inotify_init1"},
+ {"preadv"},
+ {"pwritev"},
+ {"rt_tgsigqueueinfo"},
+ {"perf_event_open"},
+ {"recvmmsg"},
+ {"fanotify_init"}, /* 300 */
+ {"fanotify_mark"},
+ {"prlimit64"},
+ {"name_to_handle_at"},
+ {"open_by_handle_at"},
+ {"clock_adjtime"},
+ {"syncfs"},
+ {"sendmmsg"},
+ {"setns"},
+ {"getcpu"},
+ {"process_vm_readv"}, /* 310 */
+ {"process_vm_writev"},
+ {"kcmp"},
+ {"finit_module"},
+ {"sched_setattr"},
+ {"sched_getattr"},
+ {"renameat2"}, /* 316 */
+
+ /* XXX gap then x32 syscalls from 512 - 544 */
+
+ NULL /* NULL-termination is required for lx_systrace */
+};
+#endif
+
+typedef struct lx_systrace_sysent {
+ const char *lss_name;
+ dtrace_id_t lss_entry;
+ dtrace_id_t lss_return;
+} lx_systrace_sysent_t;
+
+static dev_info_t *lx_systrace_devi;
+static dtrace_provider_id_t lx_systrace_id;
+static kmutex_t lx_systrace_lock;
+static uint_t lx_systrace_nenabled;
+
+static int lx_systrace_nsysent32;
+static lx_systrace_sysent_t *lx_systrace_sysent32;
+
+#if defined(_LP64)
+static int lx_systrace_nsysent64;
+static lx_systrace_sysent_t *lx_systrace_sysent64;
+#endif
+
+/*ARGSUSED*/
+static void
+lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+#if defined(_LP64)
+ if ((ttoproc(curthread))->p_model == DATAMODEL_NATIVE) {
+ if (sysnum >= lx_systrace_nsysent64)
+ return;
+ id = lx_systrace_sysent64[sysnum].lss_entry;
+ } else
+#endif
+ {
+ if (sysnum >= lx_systrace_nsysent32)
+ return;
+ id = lx_systrace_sysent32[sysnum].lss_entry;
+ }
+
+ if (id == DTRACE_IDNONE)
+ return;
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+#if defined(_LP64)
+ if ((ttoproc(curthread))->p_model == DATAMODEL_NATIVE) {
+ if (sysnum >= lx_systrace_nsysent64)
+ return;
+ id = lx_systrace_sysent64[sysnum].lss_return;
+ } else
+#endif
+ {
+ if (sysnum >= lx_systrace_nsysent32)
+ return;
+ id = lx_systrace_sysent32[sysnum].lss_return;
+ }
+
+ if (id == DTRACE_IDNONE)
+ return;
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+ int i;
+
+ if (desc != NULL)
+ return;
+
+ for (i = 0; i < lx_systrace_nsysent32; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE32_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE32_RETURN(i)));
+
+ lx_systrace_sysent32[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent32[i].lss_return = DTRACE_IDNONE;
+ }
+
+#if defined(_LP64)
+ for (i = 0; i < lx_systrace_nsysent64; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE64_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE64_RETURN(i)));
+
+ lx_systrace_sysent64[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent64[i].lss_return = DTRACE_IDNONE;
+ }
+#endif
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ mutex_enter(&lx_systrace_lock);
+ if (lx_systrace_nenabled++ == 0)
+ lx_brand_systrace_enable();
+ mutex_exit(&lx_systrace_lock);
+
+ if (LX_SYSTRACE_IS64BIT((uintptr_t)parg)) {
+#if defined(_LP64)
+ ASSERT(sysnum < lx_systrace_nsysent64);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent64[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent64[sysnum].lss_return = id;
+ }
+#endif
+ } else {
+ ASSERT(sysnum < lx_systrace_nsysent32);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent32[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent32[sysnum].lss_return = id;
+ }
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ if (LX_SYSTRACE_IS64BIT((uintptr_t)parg)) {
+#if defined(_LP64)
+ ASSERT(sysnum < lx_systrace_nsysent64);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent64[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent64[sysnum].lss_return = DTRACE_IDNONE;
+ }
+#endif
+ } else {
+ ASSERT(sysnum < lx_systrace_nsysent32);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent32[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent32[sysnum].lss_return = DTRACE_IDNONE;
+ }
+ }
+
+ mutex_enter(&lx_systrace_lock);
+ if (--lx_systrace_nenabled == 0)
+ lx_brand_systrace_disable();
+ mutex_exit(&lx_systrace_lock);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+/*ARGSUSED*/
+static uint64_t
+lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
+ int aframes)
+{
+ struct frame *fp = (struct frame *)dtrace_getfp();
+ uintptr_t *stack;
+ uint64_t val = 0;
+ int i;
+
+ if (argno >= 6)
+ return (0);
+
+ /*
+ * Walk the four frames down the stack to the entry or return callback.
+ * Our callback calls dtrace_probe() which calls dtrace_dif_variable()
+ * which invokes this function to get the extended arguments. We get
+ * the frame pointer in via call to dtrace_getfp() above which makes for
+ * four frames.
+ */
+ for (i = 0; i < 4; i++) {
+ fp = (struct frame *)fp->fr_savfp;
+ }
+
+ stack = (uintptr_t *)&fp[1];
+
+ /*
+ * Skip the first argument to the callback -- the system call number.
+ */
+ argno++;
+
+#ifdef __amd64
+ /*
+ * On amd64, the first 6 arguments are passed in registers while
+ * subsequent arguments are on the stack.
+ */
+ argno -= 6;
+#endif
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+ val = stack[argno];
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (val);
+}
+
+
+static const dtrace_pattr_t lx_systrace_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t lx_systrace_pops = {
+ lx_systrace_provide,
+ NULL,
+ lx_systrace_enable,
+ lx_systrace_disable,
+ NULL,
+ NULL,
+ NULL,
+ lx_systrace_getarg,
+ NULL,
+ lx_systrace_destroy
+};
+
+static int
+lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ int i;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR,
+ 0, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+ dtrace_register("lx-syscall", &lx_systrace_attr,
+ DTRACE_PRIV_USER, 0, &lx_systrace_pops, NULL,
+ &lx_systrace_id) != 0) {
+ ddi_remove_minor_node(devi, NULL);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ lx_systrace_devi = devi;
+
+ /*
+ * Count up the 32-bit Linux system calls.
+ */
+ for (i = 0; lx_sysnames32[i].sy_name != NULL; i++)
+ continue;
+
+ /*
+ * Initialize the 32-bit table.
+ */
+ lx_systrace_sysent32 = kmem_zalloc(i * sizeof (lx_systrace_sysent_t),
+ KM_SLEEP);
+ lx_systrace_nsysent32 = i;
+
+ for (i = 0; i < lx_systrace_nsysent32; i++) {
+ lx_systrace_sysent32[i].lss_name = lx_sysnames32[i].sy_name;
+ lx_systrace_sysent32[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent32[i].lss_return = DTRACE_IDNONE;
+ }
+
+#if defined(_LP64)
+ /*
+ * Count up the 64-bit Linux system calls.
+ */
+ for (i = 0; lx_sysnames64[i].sy_name != NULL; i++)
+ continue;
+
+ /*
+ * Initialize the 64-bit table.
+ */
+ lx_systrace_sysent64 = kmem_zalloc(i * sizeof (lx_systrace_sysent_t),
+ KM_SLEEP);
+ lx_systrace_nsysent64 = i;
+
+ for (i = 0; i < lx_systrace_nsysent64; i++) {
+ lx_systrace_sysent64[i].lss_name = lx_sysnames64[i].sy_name;
+ lx_systrace_sysent64[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent64[i].lss_return = DTRACE_IDNONE;
+ }
+#endif
+
+ /*
+ * Install probe triggers.
+ */
+ lx_systrace_entry_ptr = lx_systrace_entry;
+ lx_systrace_return_ptr = lx_systrace_return;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (dtrace_unregister(lx_systrace_id) != 0)
+ return (DDI_FAILURE);
+
+ /*
+ * Free tables.
+ */
+ kmem_free(lx_systrace_sysent32, lx_systrace_nsysent32 *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent32 = NULL;
+ lx_systrace_nsysent32 = 0;
+
+#if defined(_LP64)
+ kmem_free(lx_systrace_sysent64, lx_systrace_nsysent64 *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent64 = NULL;
+ lx_systrace_nsysent64 = 0;
+#endif
+
+ /*
+ * Reset probe triggers.
+ */
+ lx_systrace_entry_ptr = NULL;
+ lx_systrace_return_ptr = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+static struct cb_ops lx_systrace_cb_ops = {
+ lx_systrace_open, /* open */
+ nodev, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ nodev, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops lx_systrace_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ ddi_getinfo_1to1, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ lx_systrace_attach, /* attach */
+ lx_systrace_detach, /* detach */
+ nodev, /* reset */
+ &lx_systrace_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "Linux Brand System Call Tracing", /* name of module */
+ &lx_systrace_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
new file mode 100644
index 0000000000..e4499c8a5b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_systrace" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c
new file mode 100644
index 0000000000..e8c6234d92
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.c
@@ -0,0 +1,1996 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <sys/audio.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/disp.h>
+#include <sys/ddi.h>
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/kmem.h>
+#include <sys/lx_audio.h>
+#include <sys/mixer.h>
+#include <sys/modhash.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/stropts.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/* Properties used by the lx_audio driver */
+#define LXA_PROP_INPUTDEV "inputdev"
+#define LXA_PROP_OUTPUTDEV "outputdev"
+
+/* default device paths used by this driver */
+#define LXA_DEV_DEFAULT "/dev/audio"
+#define LXA_DEV_CUSTOM_DIR "/dev/sound/"
+
+/* maximum possible number of concurrent opens of this driver */
+#define LX_AUDIO_MAX_OPENS 1024
+
+/*
+ * these are default fragment size and fragment count values.
+ * these values were chosen to make quake work well on my
+ * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400.
+ *
+ * for reference:
+ * - 1 sec of stereo output at 44Khz is about 171 Kb of data
+ * - 1 sec of mono output at 8Khz is about 8Kb of data
+ */
+#define LXA_OSS_FRAG_SIZE (1024) /* 1/8 sec at 8Khz mono */
+#define LXA_OSS_FRAG_CNT (1024 * 2)
+
+/* maximum ammount of fragment memory we'll allow a process to mmap */
+#define LXA_OSS_FRAG_MEM (1024 * 1024 * 2) /* 2Mb */
+
+/* forward declarations */
+typedef struct lxa_state lxa_state_t;
+typedef struct lxa_zstate lxa_zstate_t;
+
+/*
+ * Structure and enum declarations
+ */
+typedef enum {
+ LXA_TYPE_INVALID = 0,
+ LXA_TYPE_AUDIO = 1, /* audio device */
+ LXA_TYPE_AUDIOCTL = 2 /* audio control/mixer device */
+} lxa_dev_type_t;
+
+struct lxa_zstate {
+ char *lxa_zs_zonename;
+
+ /*
+ * we could store the input/output audio device setting here,
+ * but instead we're keeing them as device node properties
+ * so that a user can easily see the audio configuration for
+ * a zone via prtconf.
+ */
+
+ /*
+ * OSS doesn't support multiple opens of the audio device.
+ * (multiple opens of the mixer device are supported.)
+ * so here we'll keep a pointer to any open input/output
+ * streams. (OSS does support two opens if one is for input
+ * and the other is for output.)
+ */
+ lxa_state_t *lxa_zs_istate;
+ lxa_state_t *lxa_zs_ostate;
+
+ /*
+ * we need to cache channel gain and balance. channel gain and
+ * balance map to PCM volume in OSS, which are supposedly a property
+ * of the underlying hardware. but in solaris, channels are
+ * implemented in software and only exist when an audio device
+ * is actually open. (each open returns a unique channel.) OSS
+ * apps will expect consistent PCM volume set/get operations to
+ * work even if no audio device is open. hence, if no underlying
+ * device is open we need to cache the gain and balance setting.
+ */
+ lxa_mixer_levels_t lxa_zs_pcm_levels;
+};
+
+struct lxa_state {
+ lxa_zstate_t *lxas_zs; /* zone state pointer */
+
+ dev_t lxas_dev_old; /* dev_t used to open the device */
+ dev_t lxas_dev_new; /* new dev_t assigned to an open */
+ int lxas_flags; /* original flags passed to open */
+ lxa_dev_type_t lxas_type; /* type of device that was opened */
+
+ int lxas_devs_same; /* input and output device the same? */
+
+ /* input device variables */
+ ldi_handle_t lxas_idev_lh; /* ldi handle for access */
+ int lxas_idev_flags; /* flags used for open */
+
+ /* output device variables */
+ ldi_handle_t lxas_odev_lh; /* ldi handle for access */
+ int lxas_odev_flags; /* flags used for open */
+
+ /*
+ * since we support multiplexing of devices we need to remember
+ * certain parameters about the devices
+ */
+ uint_t lxas_hw_features;
+ uint_t lxas_sw_features;
+
+ uint_t lxas_frag_size;
+ uint_t lxas_frag_cnt;
+
+ /*
+ * members needed to support mmap device access. note that to
+ * simplifly things we only support one mmap access per open.
+ */
+ ddi_umem_cookie_t lxas_umem_cookie;
+ char *lxas_umem_ptr;
+ size_t lxas_umem_len;
+ kthread_t *lxas_mmap_thread;
+ int lxas_mmap_thread_running;
+ int lxas_mmap_thread_exit;
+ int lxas_mmap_thread_frag;
+};
+
+/*
+ * Global variables
+ */
+dev_info_t *lxa_dip = NULL;
+kmutex_t lxa_lock;
+id_space_t *lxa_minor_id = NULL;
+mod_hash_t *lxa_state_hash = NULL;
+mod_hash_t *lxa_zstate_hash = NULL;
+size_t lxa_state_hash_size = 15;
+size_t lxa_zstate_hash_size = 15;
+size_t lxa_registered_zones = 0;
+
+/*
+ * function declarations
+ */
+static void lxa_mmap_output_disable(lxa_state_t *);
+
+/*
+ * functions
+ */
+static void
+lxa_state_close(lxa_state_t *lxa_state)
+{
+ lxa_zstate_t *lxa_zs = lxa_state->lxas_zs;
+ minor_t minor = getminor(lxa_state->lxas_dev_new);
+
+ /* disable any mmap output that might still be going on */
+ lxa_mmap_output_disable(lxa_state);
+
+ /*
+ * if this was the active input/output device, unlink it from
+ * the global zone state so that other opens of the audio device
+ * can now succeed.
+ */
+ mutex_enter(&lxa_lock);
+ if (lxa_zs->lxa_zs_istate == lxa_state)
+ lxa_zs->lxa_zs_istate = NULL;
+ if (lxa_zs->lxa_zs_ostate == lxa_state) {
+ lxa_zs->lxa_zs_ostate = NULL;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* remove this state structure from the hash (if it's there) */
+ (void) mod_hash_remove(lxa_state_hash,
+ (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state);
+
+ /* close any audio device that we have open */
+ if (lxa_state->lxas_idev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_idev_lh,
+ lxa_state->lxas_idev_flags, kcred);
+ if (lxa_state->lxas_odev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_odev_lh,
+ lxa_state->lxas_odev_flags, kcred);
+
+ /* free up any memory allocated by mmaps */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ ddi_umem_free(lxa_state->lxas_umem_cookie);
+
+ /* release the id associated with this state structure */
+ id_free(lxa_minor_id, minor);
+
+ kmem_free(lxa_state, sizeof (*lxa_state));
+}
+
+static char *
+getzonename(void)
+{
+ return (curproc->p_zone->zone_name);
+}
+
+static char *
+lxa_devprop_name(char *zname, char *pname)
+{
+ char *zpname;
+ int n;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+
+ /* prepend the zone name to the property name */
+ n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1;
+ zpname = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(zpname, n, "%s_%s", zname, pname);
+
+ return (zpname);
+}
+
+static int
+lxa_devprop_verify(char *pval)
+{
+ int n;
+
+ ASSERT(pval != NULL);
+
+ if (strcmp(pval, "default") == 0)
+ return (0);
+
+ /* make sure the value is an integer */
+ for (n = 0; pval[n] != '\0'; n++) {
+ if ((pval[n] < '0') && (pval[n] > '9')) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+static char *
+lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type)
+{
+ char *zprop_name, *pval;
+ char *dev_path;
+ int n, rv;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+ ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL));
+
+ zprop_name = lxa_devprop_name(zname, pname);
+
+ /* attempt to lookup the property */
+ rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval);
+ strfree(zprop_name);
+
+ if (rv != DDI_PROP_SUCCESS)
+ return (NULL);
+
+ if (lxa_devprop_verify(pval) != 0) {
+ ddi_prop_free(pval);
+ return (NULL);
+ }
+
+ if (strcmp(pval, "none") == 0) {
+ /* there is no audio device specified */
+ return (NULL);
+ } else if (strcmp(pval, "default") == 0) {
+ /* use the default audio device on the system */
+ dev_path = strdup(LXA_DEV_DEFAULT);
+ } else {
+ /* a custom audio device was specified, generate a path */
+ n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1;
+ dev_path = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval);
+ }
+ ddi_prop_free(pval);
+
+ /*
+ * if this is an audio control device so we need to append
+ * "ctl" to the path
+ */
+ if (lxa_type == LXA_TYPE_AUDIOCTL) {
+ char *tmp;
+ n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1;
+ tmp = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(tmp, n, "%s%s", dev_path, "ctl");
+ strfree(dev_path);
+ dev_path = tmp;
+ }
+
+ return (dev_path);
+}
+
+static int
+lxa_dev_getfeatures(lxa_state_t *lxa_state)
+{
+ audio_info_t ai_idev, ai_odev;
+ int n, rv;
+
+ /* set a default fragment size */
+ lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE;
+ lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT;
+
+ /* get info for the currently open audio devices */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+
+ /* if we're only open for reading or writing then it's easy */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ } else if (lxa_state->lxas_odev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_idev.sw_features;
+ lxa_state->lxas_hw_features = ai_idev.hw_features;
+ return (0);
+ }
+
+ /*
+ * well if we're open for reading and writing but the underlying
+ * device is the same then it's also pretty easy
+ */
+ if (lxa_state->lxas_devs_same) {
+ if ((ai_odev.sw_features != ai_idev.sw_features) ||
+ (ai_odev.hw_features != ai_idev.hw_features)) {
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio error: "
+ "audio device reported inconsistent features");
+ return (EIO);
+ }
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ }
+
+ /*
+ * figure out which software features we're going to support.
+ * we will report a feature as supported if both the input
+ * and output device support it.
+ */
+ lxa_state->lxas_sw_features = 0;
+ n = ai_idev.sw_features & ai_odev.sw_features;
+ if (n & AUDIO_SWFEATURE_MIXER)
+ lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER;
+
+ /*
+ * figure out which hardware features we're going to support.
+ * for a first pass we will report a feature as supported if
+ * both the input and output device support it.
+ */
+ lxa_state->lxas_hw_features = 0;
+ n = ai_idev.hw_features & ai_odev.hw_features;
+ if (n & AUDIO_HWFEATURE_MSCODEC)
+ lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC;
+
+ /*
+ * if we made it here then we have different audio input and output
+ * devices. this will allow us to report support for additional
+ * hardware features that may not supported by just the input or
+ * output device alone.
+ */
+
+ /* always report tha we support both playback and recording */
+ lxa_state->lxas_hw_features =
+ AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD;
+
+ /* always report full duplex support */
+ lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX;
+
+ /* never report that we have input to output loopback support */
+ ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0);
+ return (0);
+}
+
+static int
+lxa_dev_open(lxa_state_t *lxa_state)
+{
+ char *idev, *odev;
+ int flags, rv;
+ ldi_handle_t lh;
+ ldi_ident_t li = NULL;
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ /*
+ * check if we have configuration properties for this zone.
+ * if we don't then audio isn't supported in this zone.
+ */
+ idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV,
+ lxa_state->lxas_type);
+ odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV,
+ lxa_state->lxas_type);
+
+ /* make sure there is at least one device to read from or write to */
+ if ((idev == NULL) && (odev == NULL))
+ return (ENODEV);
+
+ /* see if the input and output devices are actually the same device */
+ if (((idev != NULL) && (odev != NULL)) &&
+ (strcmp(idev, odev) == 0))
+ lxa_state->lxas_devs_same = 1;
+
+ /* we don't respect FEXCL */
+ flags = lxa_state->lxas_flags & ~FEXCL;
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /*
+ * if we're opening audio devices then we need to muck
+ * with the FREAD/FWRITE flags.
+ *
+ * certain audio device may only support input or output
+ * (but not both.) so if we're multiplexing input/output
+ * to different devices we need to make sure we don't try
+ * and open the output device for reading and the input
+ * device for writing.
+ *
+ * if we're using the same device for input/output we still
+ * need to do this because some audio devices won't let
+ * themselves be opened multiple times for read access.
+ */
+ lxa_state->lxas_idev_flags = flags & ~FWRITE;
+ lxa_state->lxas_odev_flags = flags & ~FREAD;
+
+ /* make sure we have devices to read from and write to */
+ if (((flags & FREAD) && (idev == NULL)) ||
+ ((flags & FWRITE) && (odev == NULL))) {
+ rv = ENODEV;
+ goto out;
+ }
+ } else {
+ lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags;
+ }
+
+ /* get an ident to open the devices */
+ if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) {
+ rv = ENODEV;
+ goto out;
+ }
+
+ /* open the input device */
+ lxa_state->lxas_idev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_idev_flags & FREAD)) &&
+ (idev != NULL)) {
+ rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", idev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_idev_lh = lh;
+ }
+
+ /* open the output device */
+ lxa_state->lxas_odev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_odev_flags & FWRITE)) &&
+ (odev != NULL)) {
+ rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ /*
+ * If this open failed and we previously opened an
+ * input device, it is the responsibility of the
+ * caller to close that device after we return
+ * failure here.
+ */
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", odev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_odev_lh = lh;
+ }
+
+ /* free up stuff */
+out:
+ if (li != NULL)
+ ldi_ident_release(li);
+ if (idev != NULL)
+ strfree(idev);
+ if (odev != NULL)
+ strfree(odev);
+
+ return (rv);
+}
+
+void
+lxa_mmap_thread_exit(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread = NULL;
+ lxa_state->lxas_mmap_thread_frag = 0;
+ lxa_state->lxas_mmap_thread_running = 0;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ mutex_exit(&lxa_lock);
+ thread_exit();
+ /*NOTREACHED*/
+}
+
+void
+lxa_mmap_thread(lxa_state_t *lxa_state)
+{
+ struct uio uio, uio_null;
+ iovec_t iovec, iovec_null;
+ uint_t bytes_per_sec, usec_per_frag, ticks_per_frag;
+ int rv, junk, eof, retry;
+ audio_info_t ai;
+
+ /* we better be setup for writing to the output device */
+ ASSERT((lxa_state->lxas_flags & FWRITE) != 0);
+ ASSERT(lxa_state->lxas_odev_lh != NULL);
+
+ /* setup a uio to output one fragment */
+ uio.uio_iov = &iovec;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ /* setup a uio to output a eof (a fragment with a length of 0) */
+ uio_null.uio_iov = &iovec_null;
+ uio_null.uio_iov->iov_len = 0;
+ uio_null.uio_iov->iov_base = NULL;
+ uio_null.uio_iovcnt = 1;
+ uio_null.uio_offset = 0;
+ uio_null.uio_segflg = UIO_SYSSPACE;
+ uio_null.uio_fmode = 0;
+ uio_null.uio_extflg = 0;
+ uio_null.uio_llimit = MAXOFFSET_T;
+ uio_null.uio_resid = 0;
+
+lxa_mmap_thread_top:
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+
+ /* first drain any pending audio output */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_DRAIN failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we depend on the ai.play.eof value to keep track of
+ * audio output progress so reset it here.
+ */
+ AUDIO_INITINFO(&ai);
+ ai.play.eof = 0;
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_SETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we're going to need to know the sampling rate and number
+ * of output channels to estimate how long we can sleep between
+ * requests.
+ */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /* estimate how many ticks it takes to output a fragment of data */
+ bytes_per_sec = (ai.play.sample_rate * ai.play.channels *
+ ai.play.precision) / 8;
+ usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec;
+ ticks_per_frag = drv_usectohz(usec_per_frag);
+
+ /* queue up three fragments of of data into the output stream */
+ eof = 3;
+
+ /* sanity check the eof value */
+ ASSERT(ai.play.eof == 0);
+ ai.play.eof = 0;
+
+ /* we always start audio output at fragment 0 */
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /*
+ * we shouldn't have allowed the mapping if it isn't a multiple
+ * of the fragment size
+ */
+ ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0);
+
+ while (!lxa_state->lxas_mmap_thread_exit) {
+ size_t start, end;
+
+ /*
+ * calculate the start and ending offsets of the next
+ * fragment to output
+ */
+ start = lxa_state->lxas_mmap_thread_frag *
+ lxa_state->lxas_frag_size;
+ end = start + lxa_state->lxas_frag_size;
+
+ ASSERT(start < lxa_state->lxas_umem_len);
+ ASSERT(end <= lxa_state->lxas_umem_len);
+
+ /* setup the uio to output one fragment of audio */
+ uio.uio_resid = end - start;
+ uio.uio_iov->iov_len = end - start;
+ uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start];
+
+ /* increment the current fragment index */
+ lxa_state->lxas_mmap_thread_frag =
+ (lxa_state->lxas_mmap_thread_frag + 1) %
+ (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size);
+
+ /* drop the audio lock before actually outputting data */
+ mutex_exit(&lxa_lock);
+
+ /*
+ * write the fragment of audio data to the device stream
+ * then write a eof to the stream to tell the device to
+ * increment ai.play.eof when it's done processing the
+ * fragment we just wrote
+ */
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write() failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio_null, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write(eof) failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+
+ /*
+ * we want to avoid buffer underrun so ensure that
+ * there is always at least one fragment of data in the
+ * output stream.
+ */
+ mutex_enter(&lxa_lock);
+ if (--eof > 0) {
+ continue;
+ }
+
+ /*
+ * now we wait until the audio device has finished outputting
+ * at least one fragment of data.
+ */
+ retry = 0;
+ while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) {
+ uint_t ai_eof_old = ai.play.eof;
+
+ mutex_exit(&lxa_lock);
+
+ /*
+ * delay for the number of ticks it takes
+ * to output one fragment of data
+ */
+ if (ticks_per_frag > 0)
+ delay(ticks_per_frag);
+
+ /* check if we've managed to output any fragments */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai,
+ FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed (%d), "
+ "resetting audio output", rv);
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai_eof_old == ai.play.eof) {
+ /* institute a random retry limit */
+ if (retry++ < 100) {
+ mutex_enter(&lxa_lock);
+ continue;
+ }
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "output stalled, "
+ "resetting audio output");
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai.play.eof > ai_eof_old) {
+ eof = ai.play.eof - ai_eof_old;
+ } else {
+ /* eof counter wrapped around */
+ ASSERT(ai_eof_old < ai.play.eof);
+ eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX);
+ }
+ /* we're done with this loop so re-aquire the lock */
+ ASSERT(eof != 0);
+ mutex_enter(&lxa_lock);
+ }
+ }
+ mutex_exit(&lxa_lock);
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+}
+
+static void
+lxa_mmap_output_disable(lxa_state_t *lxa_state)
+{
+ kt_did_t tid;
+
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread isn't running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* tell the pcm mmap output thread to exit */
+ lxa_state->lxas_mmap_thread_exit = 1;
+
+ /* wait for the mmap output thread to exit */
+ tid = lxa_state->lxas_mmap_thread->t_did;
+ mutex_exit(&lxa_lock);
+ thread_join(tid);
+}
+
+static void
+lxa_mmap_output_enable(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread is already running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running != 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* setup output state */
+ lxa_state->lxas_mmap_thread_running = 1;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /* kick off a thread to do the mmap pcm output */
+ lxa_state->lxas_mmap_thread = thread_create(NULL, 0,
+ (void (*)())lxa_mmap_thread, lxa_state,
+ 0, &p0, TS_RUN, minclsyspri);
+ ASSERT(lxa_state->lxas_mmap_thread != NULL);
+
+ mutex_exit(&lxa_lock);
+}
+
+static int
+lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ uint_t trigger;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0)
+ return (EFAULT);
+
+ /* a zero value disables output */
+ if (trigger == 0) {
+ lxa_mmap_output_disable(lxa_state);
+ return (0);
+ }
+
+ /* a non-zero value enables output */
+ lxa_mmap_output_enable(lxa_state);
+ return (0);
+}
+
+static int
+lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ int ptr;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* if the output thread isn't running then there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0)
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+ ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size;
+ mutex_exit(&lxa_lock);
+
+ if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ fi.lxa_fi_size = lxa_state->lxas_frag_size;
+ fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt;
+
+ if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ /* if the device is mmaped we can't change the fragment settings */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ /* do basic bounds checking */
+ if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16))
+ return (EINVAL);
+
+ /* don't accept size values less than 16 */
+
+ lxa_state->lxas_frag_size = fi.lxa_fi_size;
+ lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt;
+
+ return (0);
+}
+
+static int
+lxa_audio_drain(lxa_state_t *lxa_state)
+{
+ int junk;
+
+ /* only applies to output buffers */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EINVAL);
+
+ /* can't fail so ignore the return value */
+ (void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL,
+ FKIOCTL, kcred, &junk);
+ return (0);
+}
+
+/*
+ * lxa_audio_info_merge() usage notes:
+ *
+ * - it's important to make sure NOT to get the ai_idev and ai_odev
+ * parameters mixed up when calling lxa_audio_info_merge().
+ *
+ * - it's important for the caller to make sure that AUDIO_GETINFO
+ * was called for the input device BEFORE the output device. (see
+ * the comments for merging the monitor_gain setting to see why.)
+ */
+static void
+lxa_audio_info_merge(lxa_state_t *lxa_state,
+ audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged)
+{
+ /* if we're not setup for output return the intput device info */
+ if (lxa_state->lxas_odev_lh == NULL) {
+ *ai_merged = *ai_idev;
+ return;
+ }
+
+ /* if we're not setup for input return the output device info */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ *ai_merged = *ai_odev;
+ return;
+ }
+
+ /* get record values from the input device */
+ ai_merged->record = ai_idev->record;
+
+ /* get play values from the output device */
+ ai_merged->play = ai_odev->play;
+
+ /* muting status only matters for the output device */
+ ai_merged->output_muted = ai_odev->output_muted;
+
+ /* we don't support device reference counts, always return 1 */
+ ai_merged->ref_cnt = 1;
+
+ /*
+ * for supported hw/sw features report the combined feature
+ * set we calcuated out earlier.
+ */
+ ai_merged->hw_features = lxa_state->lxas_hw_features;
+ ai_merged->sw_features = lxa_state->lxas_sw_features;
+
+ if (!lxa_state->lxas_devs_same) {
+ /*
+ * if the input and output devices are different
+ * physical devices then we don't support input to
+ * output loopback so we always report the input
+ * to output loopback gain to be zero.
+ */
+ ai_merged->monitor_gain = 0;
+ } else {
+ /*
+ * the intput and output devices are actually the
+ * same physical device. hence it probably supports
+ * intput to output loopback. regardless we should
+ * pass back the intput to output gain reported by
+ * the device. when we pick a value to passback we
+ * use the output device value since that was
+ * the most recently queried. (we base this
+ * decision on the assumption that io gain is
+ * actually hardware setting in the device and
+ * hence if it is changed on one open instance of
+ * the device the change will be visable to all
+ * other instances of the device.)
+ */
+ ai_merged->monitor_gain = ai_odev->monitor_gain;
+ }
+
+ /*
+ * for currently enabled software features always return the
+ * merger of the two. (of course the enabled software features
+ * for the input and output devices should alway be the same,
+ * so if it isn't complain.)
+ */
+ if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled)
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio: "
+ "unexpected sofware feature state");
+ ai_merged->sw_features_enabled =
+ ai_idev->sw_features_enabled & ai_odev->sw_features_enabled;
+}
+
+static int
+lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg,
+ int mode)
+{
+ audio_info_t ai, ai_null, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ /*
+ * if the caller is attempting to enable a software feature that
+ * we didn't report as supported the return an error
+ */
+ if ((ai.sw_features_enabled != -1) &&
+ (ai.sw_features_enabled & ~lxa_state->lxas_sw_features))
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * changes to the play.eof field (since mmap output depends
+ * on this field.
+ */
+ if ((lxa_state->lxas_umem_cookie != NULL) &&
+ (ai.play.eof != -1))
+ return (EIO);
+
+ /* initialize the new requests */
+ AUDIO_INITINFO(&ai_null);
+ ai_idev = ai_odev = ai;
+
+ /* remove audio input settings from the output device request */
+ ai_odev.record = ai_null.record;
+
+ /* remove audio output settings from the input device request */
+ ai_idev.play = ai_null.play;
+ ai_idev.output_muted = ai_null.output_muted;
+
+ /* apply settings to the intput device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* apply settings to the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to
+ * return values (see the coments in audioio.h.) so we need
+ * to combine the values returned from the input and output
+ * device back into the users buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ audio_info_t ai, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* get the settings from the input device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* get the settings from the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * we need to combine the values returned from the input
+ * and output device back into a single user buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai)
+{
+ int rv, junk;
+
+ ASSERT((lh != NULL) && (ai != NULL));
+
+ /* get the device state and channel state */
+ rv = ldi_ioctl(lh, AUDIO_GETINFO, (intptr_t)ai, FKIOCTL, kcred, &junk);
+
+ return (rv);
+}
+
+static int
+lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai)
+{
+ audio_info_t ai_idev, ai_odev;
+ int rv;
+
+ /* if there is no input device, query the output device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai));
+
+ /* if there is no ouput device, query the intput device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai));
+
+ /*
+ * now get the audio_info and channel information for the
+ * underlying output device.
+ */
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh,
+ &ai_idev)) != 0)
+ return (rv);
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh,
+ &ai_odev)) != 0)
+ return (rv);
+
+ /* now merge the audio_info structures */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai);
+ return (0);
+}
+
+static int
+lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0)
+ return (rv);
+
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ lxa_ml.lxa_ml_gain = ai.play.gain;
+ lxa_ml.lxa_ml_balance = ai.play.balance;
+ break;
+ case LXA_IOC_MIXER_GET_MIC:
+ lxa_ml.lxa_ml_gain = ai.record.gain;
+ lxa_ml.lxa_ml_balance = ai.record.balance;
+ break;
+ }
+
+ if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+static int
+lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ switch (cmd) {
+ case LXA_IOC_MIXER_SET_VOL:
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+ break;
+ case LXA_IOC_MIXER_SET_MIC:
+ ai.record.gain = lxa_ml.lxa_ml_gain;
+ ai.record.balance = lxa_ml.lxa_ml_balance;
+ break;
+ }
+
+ return (lxa_audio_setinfo(lxa_state, AUDIO_SETINFO, (intptr_t)&ai,
+ FKIOCTL));
+}
+
+static int
+lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* simply return the cached pcm mixer settings */
+ mutex_enter(&lxa_lock);
+ if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels, (void *)arg,
+ sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) {
+ mutex_exit(&lxa_lock);
+ return (EFAULT);
+ }
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+
+ /* if there is an active output channel, update it */
+ if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) {
+ audio_info_t ai;
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ return (rv);
+ }
+ }
+
+ /* update the cached mixer settings */
+ lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml;
+
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_zone_reg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int i, junk;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* make sure that inputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++)
+ if (lxa_zr.lxa_zr_inputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_inputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid inputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0)
+ return (EINVAL);
+
+ /* make sure that outputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++)
+ if (lxa_zr.lxa_zr_outputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_outputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid outputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0)
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ /*
+ * allocate and initialize a zone state structure
+ * since the audio device can't possibly be opened yet
+ * (since we're setting it up now and the zone isn't booted
+ * yet) assign some some resonable default pcm channel settings.
+ * also, default to one mixer channel.
+ */
+ lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP);
+ lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name);
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN;
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE;
+
+ mutex_enter(&lxa_lock);
+
+ /*
+ * make sure this zone isn't already registered
+ * a zone is registered with properties for that zone exist
+ * or there is a zone state structure for that zone
+ */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t *)&junk) == 0)
+ goto err_unlock;
+
+ /*
+ * create the new properties and insert the zone state structure
+ * into the global hash
+ */
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (mod_hash_insert(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t)lxa_zs) != 0)
+ goto err_prop_remove;
+
+ /* success! */
+ lxa_registered_zones++;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_prop_remove:
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+ if (lxa_zs != NULL) {
+ strfree(lxa_zs->lxa_zs_zonename);
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ }
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (EIO);
+}
+
+static int
+lxa_zone_unreg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int rv, i;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ mutex_enter(&lxa_lock);
+
+ if (lxa_registered_zones <= 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+
+ /* make sure this zone is actually registered */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0);
+
+ /*
+ * if the audio device is currently in use then refuse to
+ * unregister the zone
+ */
+ if ((lxa_zs->lxa_zs_ostate != NULL) ||
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ rv = EBUSY;
+ goto err_unlock;
+ }
+
+ /* success! cleanup zone config state */
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+ /*
+ * note, the action of removing the zone state structure from the
+ * hash will automatically free lxa_zs->lxa_zs_zonename.
+ *
+ * the reason for this is that we used lxa_zs->lxa_zs_zonename
+ * as the hash key and by default mod_hash_create_strhash() uses
+ * mod_hash_strkey_dtor() as a the hash key destructor. (which
+ * free's the key for us.
+ */
+ (void) mod_hash_remove(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs);
+ lxa_registered_zones--;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (rv);
+}
+
+static int
+lxa_ioctl_devctl(int cmd, intptr_t arg, int mode)
+{
+ /* devctl ioctls are only allowed from the global zone */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+
+ switch (cmd) {
+ case LXA_IOC_ZONE_REG:
+ return (lxa_zone_reg(arg, mode));
+ case LXA_IOC_ZONE_UNREG:
+ return (lxa_zone_unreg(arg, mode));
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+ lxa_dev_type_t open_type = LXA_TYPE_INVALID;
+ lxa_zstate_t *lxa_zs;
+ lxa_state_t *lxa_state;
+ minor_t minor;
+ int rv;
+
+ if (getminor(*devp) == LXA_MINORNUM_DEVCTL) {
+ /*
+ * this is a devctl node, it exists to administer this
+ * pseudo driver so it doesn't actually need access to
+ * any underlying audio devices. hence there is nothing
+ * really to do here. course, this driver should
+ * only be administered from the global zone.
+ */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+ return (0);
+ }
+
+ /* lookup the zone state structure */
+ if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(),
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ return (EIO);
+ }
+
+ /* determine what type of device was opened */
+ switch (getminor(*devp)) {
+ case LXA_MINORNUM_DSP:
+ open_type = LXA_TYPE_AUDIO;
+ break;
+ case LXA_MINORNUM_MIXER:
+ open_type = LXA_TYPE_AUDIOCTL;
+ break;
+ default:
+ return (EINVAL);
+ }
+ ASSERT(open_type != LXA_TYPE_INVALID);
+
+ /* all other opens are clone opens so get a new minor node */
+ minor = id_alloc(lxa_minor_id);
+
+ /* allocate and initialize the new lxa_state structure */
+ lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP);
+ lxa_state->lxas_zs = lxa_zs;
+ lxa_state->lxas_dev_old = *devp;
+ lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor);
+ lxa_state->lxas_flags = flags;
+ lxa_state->lxas_type = open_type;
+
+ /* initialize the input and output device */
+ if (((rv = lxa_dev_open(lxa_state)) != 0) ||
+ ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) {
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+
+ /*
+ * save this audio statue structure into a hash indexed
+ * by it's minor device number. (this will provide a convient
+ * way to lookup the state structure on future operations.)
+ */
+ if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t)lxa_state) != 0) {
+ lxa_state_close(lxa_state);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* apply the currently cached zone PCM mixer levels */
+ if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) &&
+ (lxa_state->lxas_odev_lh != NULL)) {
+ audio_info_t ai;
+
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain;
+ ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+ }
+
+ /*
+ * we only allow one active open of the input or output device.
+ * check here for duplicate opens
+ */
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ (lxa_zs->lxa_zs_istate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+
+ /* not a duplicate open, update the global zone state */
+ if (lxa_state->lxas_idev_lh != NULL)
+ lxa_zs->lxa_zs_istate = lxa_state;
+ if (lxa_state->lxas_odev_lh != NULL)
+ lxa_zs->lxa_zs_ostate = lxa_state;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* make sure to return our newly allocated dev_t */
+ *devp = lxa_state->lxas_dev_new;
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (0);
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ lxa_state_close(lxa_state);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a read if there is no input device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ while (uiop->uio_resid != 0) {
+ rv = ldi_read(lxa_state->lxas_idev_lh, uiop, kcred);
+ if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) {
+ break;
+ }
+ }
+ return (rv);
+}
+
+static int
+/*ARGSUSED*/
+lxa_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a write if there is no output device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ while (uiop->uio_resid != 0) {
+ rv = ldi_write(lxa_state->lxas_odev_lh, uiop, kcred);
+ if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) {
+ break;
+ }
+ }
+ return (rv);
+}
+
+static int
+/*ARGSUSED*/
+lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (lxa_ioctl_devctl(cmd, arg, mode));
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ switch (cmd) {
+ case LXA_IOC_GETMINORNUM:
+ {
+ int minornum = getminor(lxa_state->lxas_dev_old);
+ if (ddi_copyout(&minornum, (void *)arg,
+ sizeof (minornum), mode) != 0)
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MMAP_OUTPUT:
+ return (lxa_ioc_mmap_output(lxa_state, arg, mode));
+ case LXA_IOC_MMAP_PTR:
+ return (lxa_ioc_mmap_ptr(lxa_state, arg, mode));
+ case LXA_IOC_GET_FRAG_INFO:
+ return (lxa_ioc_get_frag_info(lxa_state, arg, mode));
+ case LXA_IOC_SET_FRAG_INFO:
+ return (lxa_ioc_set_frag_info(lxa_state, arg, mode));
+ }
+
+ /* deal with layered ioctls */
+ switch (cmd) {
+ case AUDIO_DRAIN:
+ return (lxa_audio_drain(lxa_state));
+ case AUDIO_SETINFO:
+ return (lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, arg, mode));
+ case AUDIO_GETINFO:
+ return (lxa_audio_getinfo(lxa_state, arg, mode));
+ }
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_VOL:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_MIC:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_MIC:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_PCM:
+ return (lxa_mixer_get_pcm(lxa_state, arg, mode));
+ case LXA_IOC_MIXER_SET_PCM:
+ return (lxa_mixer_set_pcm(lxa_state, arg, mode));
+ }
+
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_devmap(dev_t dev, devmap_cookie_t dhp,
+ offset_t off, size_t len, size_t *maplen, uint_t model)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ ddi_umem_cookie_t umem_cookie;
+ void *umem_ptr;
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /* we only support mmaping of audio devices */
+ if (lxa_state->lxas_type != LXA_TYPE_AUDIO)
+ return (EINVAL);
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* sanity check the amount of memory the user is allocating */
+ if ((len == 0) ||
+ (len > LXA_OSS_FRAG_MEM) ||
+ ((len % lxa_state->lxas_frag_size) != 0))
+ return (EINVAL);
+
+ /* allocate and clear memory to mmap */
+ umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie);
+ if (umem_ptr == NULL)
+ return (ENOMEM);
+ bzero(umem_ptr, len);
+
+ /* setup the memory mappings */
+ rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len,
+ PROT_USER | PROT_READ | PROT_WRITE, 0, NULL);
+ if (rv != 0) {
+ ddi_umem_free(umem_cookie);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* we only support one mmap per open */
+ if (lxa_state->lxas_umem_cookie != NULL) {
+ ASSERT(lxa_state->lxas_umem_ptr != NULL);
+ mutex_exit(&lxa_lock);
+ ddi_umem_free(umem_cookie);
+ return (EBUSY);
+ }
+ ASSERT(lxa_state->lxas_umem_ptr == NULL);
+
+ *maplen = len;
+ lxa_state->lxas_umem_len = len;
+ lxa_state->lxas_umem_ptr = umem_ptr;
+ lxa_state->lxas_umem_cookie = umem_cookie;
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ ASSERT(instance == 0);
+ if (instance != 0)
+ return (DDI_FAILURE);
+
+ lxa_dip = dip;
+ mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* create our minor nodes */
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR,
+ LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR,
+ LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR,
+ LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* allocate our data structures */
+ lxa_minor_id = id_space_create("lxa_minor_id",
+ LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS);
+ lxa_state_hash = mod_hash_create_idhash("lxa_state_hash",
+ lxa_state_hash_size, mod_hash_null_valdtor);
+ lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash",
+ lxa_zstate_hash_size, mod_hash_null_valdtor);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+ if (lxa_registered_zones > 0)
+ return (DDI_FAILURE);
+
+ mod_hash_destroy_idhash(lxa_state_hash);
+ mod_hash_destroy_idhash(lxa_zstate_hash);
+ id_space_destroy(lxa_minor_id);
+ lxa_state_hash = NULL;
+ lxa_dip = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
+{
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resultp = lxa_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)0;
+ return (DDI_SUCCESS);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * Driver flags
+ */
+static struct cb_ops lxa_cb_ops = {
+ lxa_open, /* open */
+ lxa_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lxa_read, /* read */
+ lxa_write, /* write */
+ lxa_ioctl, /* ioctl */
+ lxa_devmap, /* devmap */
+ nodev, /* mmap */
+ ddi_devmap_segmap, /* segmap */
+ nochpoll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP | D_DEVMAP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lxa_ops = {
+ DEVO_REV,
+ 0,
+ lxa_getinfo,
+ nulldev,
+ nulldev,
+ lxa_attach,
+ lxa_detach,
+ nodev,
+ &lxa_cb_ops,
+ NULL,
+ NULL,
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "linux audio driver", /* description of module */
+ &lxa_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+/*
+ * standard module entry points
+ */
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf
new file mode 100644
index 0000000000..2eeb5eb7ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_audio" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_netlink.c b/usr/src/uts/common/brand/lx/io/lx_netlink.c
new file mode 100644
index 0000000000..3ff99de1b1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_netlink.c
@@ -0,0 +1,1170 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Compatibility for the Linux netlink(7) kernel/user transport, as well as
+ * for in-kernel netlink(7) providers like rtnetlink(7). See RFC 3549 for
+ * details of the protocol, and the Linux man pages for details of the Linux
+ * implementation that we're mimicking.
+ */
+
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/strsun.h>
+#include <sys/tihdr.h>
+#include <sys/sockio.h>
+#include <sys/brand.h>
+#include <inet/ip.h>
+#include <inet/ip_impl.h>
+
+/*
+ * Flags in netlink header
+ */
+#define LX_NETLINK_NLM_F_REQUEST 1
+#define LX_NETLINK_NLM_F_MULTI 2
+#define LX_NETLINK_NLM_F_ACK 4
+#define LX_NETLINK_NLM_F_ECHO 8
+#define LX_NETLINK_NLM_F_DUMP_INTR 16
+#define LX_NETLINK_NLM_F_ROOT 0x100
+#define LX_NETLINK_NLM_F_MATCH 0x200
+#define LX_NETLINK_NLM_F_ATOMIC 0x400
+
+/*
+ * Generic message type constants
+ */
+#define LX_NETLINK_NLMSG_NONE 0
+#define LX_NETLINK_NLMSG_NOOP 1
+#define LX_NETLINK_NLMSG_ERROR 2
+#define LX_NETLINK_NLMSG_DONE 3
+#define LX_NETLINK_NLMSG_OVERRUN 4
+
+/*
+ * Protocol constants.
+ */
+#define LX_NETLINK_ROUTE 0
+#define LX_NETLINK_UNUSED 1
+#define LX_NETLINK_USERSOCK 2
+#define LX_NETLINK_FIREWALL 3
+#define LX_NETLINK_SOCK_DIAG 4
+#define LX_NETLINK_NFLOG 5
+#define LX_NETLINK_XFRM 6
+#define LX_NETLINK_SELINUX 7
+#define LX_NETLINK_ISCSI 8
+#define LX_NETLINK_AUDIT 9
+#define LX_NETLINK_FIB_LOOKUP 10
+#define LX_NETLINK_CONNECTOR 11
+#define LX_NETLINK_NETFILTER 12
+#define LX_NETLINK_IP6_FW 13
+#define LX_NETLINK_DNRTMSG 14
+#define LX_NETLINK_KOBJECT_UEVENT 15
+#define LX_NETLINK_GENERIC 16
+#define LX_NETLINK_SCSITRANSPORT 18
+#define LX_NETLINK_ECRYPTFS 19
+#define LX_NETLINK_RDMA 20
+#define LX_NETLINK_CRYPTO 21
+
+/*
+ * rtnetlink(7) attribute-related constants
+ */
+#define LX_NETLINK_NLA_ALIGNTO 4
+
+#define LX_NETLINK_RTM_NEWLINK 16
+#define LX_NETLINK_RTM_DELLINK 17
+#define LX_NETLINK_RTM_GETLINK 18
+#define LX_NETLINK_RTM_SETLINK 19
+#define LX_NETLINK_RTM_NEWADDR 20
+#define LX_NETLINK_RTM_DELADDR 21
+#define LX_NETLINK_RTM_GETADDR 22
+#define LX_NETLINK_RTM_NEWROUTE 24
+#define LX_NETLINK_RTM_DELROUTE 25
+#define LX_NETLINK_RTM_GETROUTE 26
+#define LX_NETLINK_RTM_NEWNEIGH 28
+#define LX_NETLINK_RTM_DELNEIGH 29
+#define LX_NETLINK_RTM_GETNEIGH 30
+#define LX_NETLINK_RTM_NEWRULE 32
+#define LX_NETLINK_RTM_DELRULE 33
+#define LX_NETLINK_RTM_GETRULE 34
+#define LX_NETLINK_RTM_NEWQDISC 36
+#define LX_NETLINK_RTM_DELQDISC 37
+#define LX_NETLINK_RTM_GETQDISC 38
+#define LX_NETLINK_RTM_NEWTCLASS 40
+#define LX_NETLINK_RTM_DELTCLASS 41
+#define LX_NETLINK_RTM_GETTCLASS 42
+#define LX_NETLINK_RTM_NEWTFILTER 44
+#define LX_NETLINK_RTM_DELTFILTER 45
+#define LX_NETLINK_RTM_GETTFILTER 46
+#define LX_NETLINK_RTM_NEWACTION 48
+#define LX_NETLINK_RTM_DELACTION 49
+#define LX_NETLINK_RTM_GETACTION 50
+#define LX_NETLINK_RTM_NEWPREFIX 52
+#define LX_NETLINK_RTM_GETMULTICAST 58
+#define LX_NETLINK_RTM_GETANYCAST 62
+#define LX_NETLINK_RTM_NEWNEIGHTBL 64
+#define LX_NETLINK_RTM_GETNEIGHTBL 66
+#define LX_NETLINK_RTM_SETNEIGHTBL 67
+#define LX_NETLINK_RTM_NEWNDUSEROPT 68
+#define LX_NETLINK_RTM_NEWADDRLABEL 72
+#define LX_NETLINK_RTM_DELADDRLABEL 73
+#define LX_NETLINK_RTM_GETADDRLABEL 74
+#define LX_NETLINK_RTM_GETDCB 78
+#define LX_NETLINK_RTM_SETDCB 79
+#define LX_NETLINK_RTM_NEWNETCONF 80
+#define LX_NETLINK_RTM_GETNETCONF 82
+#define LX_NETLINK_RTM_NEWMDB 84
+#define LX_NETLINK_RTM_DELMDB 85
+#define LX_NETLINK_RTM_GETMDB 86
+#define LX_NETLINK_RTM_MAX 87
+
+/*
+ * rtnetlink(7) attribute constants
+ */
+#define LX_NETLINK_RTA_UNSPEC 1
+#define LX_NETLINK_RTA_DST 2
+#define LX_NETLINK_RTA_SRC 3
+#define LX_NETLINK_RTA_IIF 4
+#define LX_NETLINK_RTA_OIF 5
+#define LX_NETLINK_RTA_GATEWAY 6
+#define LX_NETLINK_RTA_PRIORITY 7
+#define LX_NETLINK_RTA_PREFSRC 8
+#define LX_NETLINK_RTA_METRICS 9
+#define LX_NETLINK_RTA_MULTIPATH 10
+#define LX_NETLINK_RTA_PROTOINFO 11
+#define LX_NETLINK_RTA_FLOW 12
+#define LX_NETLINK_RTA_CACHEINFO 13
+#define LX_NETLINK_RTA_SESSION 14
+#define LX_NETLINK_RTA_MP_ALGO 15
+#define LX_NETLINK_RTA_TABLE 16
+#define LX_NETLINK_RTA_MARK 17
+#define LX_NETLINK_RTA_MFC_STATS 18
+
+/*
+ * rtnetlink(7) NEWLINK/DELLINK/GETLINK constants
+ */
+#define LX_NETLINK_IFLA_UNSPEC 0
+#define LX_NETLINK_IFLA_ADDRESS 1
+#define LX_NETLINK_IFLA_BROADCAST 2
+#define LX_NETLINK_IFLA_IFNAME 3
+#define LX_NETLINK_IFLA_MTU 4
+#define LX_NETLINK_IFLA_LINK 5
+#define LX_NETLINK_IFLA_QDISC 6
+#define LX_NETLINK_IFLA_STATS 7
+#define LX_NETLINK_IFLA_COST 8
+#define LX_NETLINK_IFLA_PRIORITY 9
+#define LX_NETLINK_IFLA_MASTER 10
+#define LX_NETLINK_IFLA_WIRELESS 11
+#define LX_NETLINK_IFLA_PROTINFO 12
+#define LX_NETLINK_IFLA_TXQLEN 13
+#define LX_NETLINK_IFLA_MAP 14
+#define LX_NETLINK_IFLA_WEIGHT 15
+#define LX_NETLINK_IFLA_OPERSTATE 16
+#define LX_NETLINK_IFLA_LINKMODE 17
+#define LX_NETLINK_IFLA_LINKINFO 18
+#define LX_NETLINK_IFLA_NET_NS_PID 19
+#define LX_NETLINK_IFLA_IFALIAS 20
+#define LX_NETLINK_IFLA_NUM_VF 21
+#define LX_NETLINK_IFLA_VFINFO_LIST 22
+#define LX_NETLINK_IFLA_STATS64 23
+#define LX_NETLINK_IFLA_VF_PORTS 24
+#define LX_NETLINK_IFLA_PORT_SELF 25
+#define LX_NETLINK_IFLA_AF_SPEC 26
+#define LX_NETLINK_IFLA_GROUP 27
+#define LX_NETLINK_IFLA_NET_NS_FD 28
+#define LX_NETLINK_IFLA_EXT_MASK 29
+#define LX_NETLINK_IFLA_PROMISCUITY 30
+#define LX_NETLINK_IFLA_NUM_TX_QUEUES 31
+#define LX_NETLINK_IFLA_NUM_RX_QUEUES 32
+#define LX_NETLINK_IFLA_CARRIER 33
+#define LX_NETLINK_IFLA_PHYS_PORT_ID 34
+#define LX_NETLINK_IFLA_CARRIER_CHANGES 35
+#define LX_NETLINK_IFLA_MAX 36
+
+/*
+ * rtnetlink(7) NEWADDR/DELADDR/GETADDR constants
+ */
+#define LX_NETLINK_IFA_UNSPEC 0
+#define LX_NETLINK_IFA_ADDRESS 1
+#define LX_NETLINK_IFA_LOCAL 2
+#define LX_NETLINK_IFA_LABEL 3
+#define LX_NETLINK_IFA_BROADCAST 4
+#define LX_NETLINK_IFA_ANYCAST 5
+#define LX_NETLINK_IFA_CACHEINFO 6
+#define LX_NETLINK_IFA_MULTICAST 7
+#define LX_NETLINK_IFA_FLAGS 8
+#define LX_NETLINK_IFA_MAX 9
+
+#define LX_NETLINK_IFA_F_SECONDARY 0x01
+#define LX_NETLINK_IFA_F_TEMPORARY LX_NETLINK_IFA_F_SECONDARY
+#define LX_NETLINK_IFA_F_NODAD 0x02
+#define LX_NETLINK_IFA_F_OPTIMISTIC 0x04
+#define LX_NETLINK_IFA_F_DADFAILED 0x08
+#define LX_NETLINK_IFA_F_HOMEADDRESS 0x10
+#define LX_NETLINK_IFA_F_DEPRECATED 0x20
+#define LX_NETLINK_IFA_F_TENTATIVE 0x40
+#define LX_NETLINK_IFA_F_PERMANENT 0x80
+#define LX_NETLINK_IFA_F_MANAGETEMPADDR 0x100
+#define LX_NETLINK_IFA_F_NOPREFIXROUTE 0x200
+
+/*
+ * Linux address families.
+ */
+#define LX_AF_INET 2
+#define LX_AF_INET6 10
+#define LX_AF_NETLINK 16
+
+/*
+ * Linux interface flags.
+ */
+#define LX_IFF_UP (1<<0)
+#define LX_IFF_BROADCAST (1<<1)
+#define LX_IFF_DEBUG (1<<2)
+#define LX_IFF_LOOPBACK (1<<3)
+#define LX_IFF_POINTOPOINT (1<<4)
+#define LX_IFF_NOTRAILERS (1<<5)
+#define LX_IFF_RUNNING (1<<6)
+#define LX_IFF_NOARP (1<<7)
+#define LX_IFF_PROMISC (1<<8)
+#define LX_IFF_ALLMULTI (1<<9)
+#define LX_IFF_MASTER (1<<10)
+#define LX_IFF_SLAVE (1<<11)
+#define LX_IFF_MULTICAST (1<<12)
+#define LX_IFF_PORTSEL (1<<13)
+#define LX_IFF_AUTOMEDIA (1<<14)
+#define LX_IFF_DYNAMIC (1<<15)
+#define LX_IFF_LOWER_UP (1<<16)
+#define LX_IFF_DORMANT (1<<17)
+#define LX_IFF_ECHO (1<<18)
+
+typedef struct lx_netlink_hdr {
+ uint32_t lxnh_len; /* length of message */
+ uint16_t lxnh_type; /* type of message */
+ uint16_t lxnh_flags; /* flags */
+ uint32_t lxnh_seq; /* sequence number */
+ uint32_t lxnh_pid; /* sending pid */
+} lx_netlink_hdr_t;
+
+typedef struct lx_netlink_err {
+ lx_netlink_hdr_t lxne_hdr; /* header */
+ int32_t lxne_errno; /* errno */
+ lx_netlink_hdr_t lxne_failed; /* header of err */
+} lx_netlink_err_t;
+
+typedef struct lx_netlink_attr {
+ uint16_t lxna_len; /* length of attribute */
+ uint16_t lxna_type; /* type of attribute */
+} lx_netlink_attr_t;
+
+typedef struct lx_netlink_ifinfomsg {
+ uint8_t lxnl_ifi_family; /* family: AF_UNSPEC */
+ uint8_t lxnl_ifi_type; /* device type */
+ uint32_t lxnl_ifi_index; /* interface index */
+ uint32_t lxnl_ifi_flags; /* device flags */
+ uint32_t lxnl_ifi_change; /* unused; must be -1 */
+} lx_netlink_ifinfomsg_t;
+
+typedef struct lx_netlink_ifaddrmsg {
+ uint8_t lxnl_ifa_family; /* address type */
+ uint8_t lxnl_ifa_prefixlen; /* prefix length of address */
+ uint8_t lxnl_ifa_flags; /* address flags */
+ uint8_t lxnl_ifa_scope; /* address scope */
+ uint8_t lxnl_ifa_index; /* interface index */
+} lx_netlink_ifaddrmsg_t;
+
+typedef struct lx_netlink_sockaddr {
+ sa_family_t lxnl_family; /* AF_LX_NETLINK */
+ uint16_t lxnl_pad; /* padding */
+ uint32_t lxnl_port; /* port id */
+ uint32_t lxnl_groups; /* multicast groups mask */
+} lx_netlink_sockaddr_t;
+
+typedef struct lx_netlink_sock {
+ struct lx_netlink_sock *lxns_next; /* list of lx_netlink sockets */
+ sock_upcalls_t *lxns_upcalls; /* pointer to socket upcalls */
+ sock_upper_handle_t lxns_uphandle; /* socket upcall handle */
+ ldi_handle_t lxns_iphandle; /* handle to /dev/ip */
+ ldi_handle_t lxns_ip6handle; /* handle to /dev/ip6 */
+ ldi_handle_t lxns_current; /* current ip handle */
+ int lxns_proto; /* protocol */
+ uint32_t lxns_port; /* port identifier */
+ uint32_t lxns_bufsize; /* buffer size */
+} lx_netlink_sock_t;
+
+typedef struct lx_netlink_reply {
+ lx_netlink_hdr_t lxnr_hdr; /* header that we're reply to */
+ lx_netlink_sock_t *lxnr_sock; /* socket */
+ uint32_t lxnr_seq; /* sequence number */
+ uint16_t lxnr_type; /* type of reply */
+ mblk_t *lxnr_mp; /* current mblk */
+ mblk_t *lxnr_err; /* error mblk */
+ mblk_t *lxnr_mp1; /* T_UNITDATA_IND mblk */
+ int lxnr_errno; /* errno, if any */
+} lx_netlink_reply_t;
+
+static lx_netlink_sock_t *lx_netlink_head; /* head of lx_netlink sockets */
+static kmutex_t lx_netlink_lock; /* lock to protect state */
+static ldi_ident_t lx_netlink_ldi; /* LDI handle */
+static uint32_t lx_netlink_port; /* next port identifier */
+static int lx_netlink_bufsize = 4096; /* default buffer size */
+static int lx_netlink_flowctrld; /* # of times flow controlled */
+
+/*ARGSUSED*/
+static void
+lx_netlink_activate(sock_lower_handle_t handle,
+ sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls,
+ int flags, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
+ SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ |
+ SOCKOPT_MAXBLK | SOCKOPT_MINPSZ;
+ sopp.sopp_wroff = 0;
+ sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
+ sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
+ sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl);
+ sopp.sopp_maxpsz = INFPSZ;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_minpsz = 0;
+
+ lxsock->lxns_upcalls = sock_upcalls;
+ lxsock->lxns_uphandle = sock_handle;
+
+ sock_upcalls->su_set_proto_props(sock_handle, &sopp);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_setsockopt(sock_lower_handle_t handle, int level,
+ int option_name, const void *optval, socklen_t optlen, struct cred *cr)
+{
+ if (level == SOL_SOCKET)
+ return (0);
+
+ return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_bind(sock_lower_handle_t handle, struct sockaddr *name,
+ socklen_t namelen, struct cred *cr)
+{
+ lx_netlink_sockaddr_t *lxsa = (lx_netlink_sockaddr_t *)name;
+
+ if (namelen != sizeof (lx_netlink_sockaddr_t) ||
+ lxsa->lxnl_family != AF_LX_NETLINK) {
+ return (EINVAL);
+ }
+
+ if (lxsa->lxnl_groups != 0 || lxsa->lxnl_port != 0)
+ return (EOPNOTSUPP);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getsockname(sock_lower_handle_t handle, struct sockaddr *sa,
+ socklen_t *len, struct cred *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ lx_netlink_sockaddr_t *lxsa = (lx_netlink_sockaddr_t *)sa;
+ proc_t *p = curthread->t_procp;
+
+ if (*len < sizeof (lx_netlink_sockaddr_t))
+ return (EINVAL);
+
+ /*
+ * Make sure our lies are consistent with the lies told by other liars.
+ */
+ if (p->p_brand != &native_brand && curthread != p->p_agenttp) {
+ lxsa->lxnl_family = LX_AF_NETLINK;
+ } else {
+ lxsa->lxnl_family = AF_LX_NETLINK;
+ }
+
+ lxsa->lxnl_pad = 0;
+ lxsa->lxnl_port = lxsock->lxns_port;
+ lxsa->lxnl_groups = 0;
+
+ *len = sizeof (lx_netlink_sockaddr_t);
+
+ return (0);
+}
+
+static mblk_t *
+lx_netlink_alloc_mp1()
+{
+ return (allocb(sizeof (struct T_unitdata_ind) +
+ sizeof (lx_netlink_sockaddr_t), 0));
+}
+
+static lx_netlink_reply_t *
+lx_netlink_reply(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, uint16_t type)
+{
+ lx_netlink_reply_t *reply;
+ mblk_t *err, *mp1;
+
+ /*
+ * We always allocate an error block to assure that even if subsequent
+ * allocations fail, we can return an error.
+ */
+ if ((err = allocb(sizeof (lx_netlink_err_t), 0)) == NULL)
+ return (NULL);
+
+ if ((mp1 = lx_netlink_alloc_mp1()) == NULL) {
+ freeb(err);
+ return (NULL);
+ }
+
+ reply = kmem_zalloc(sizeof (lx_netlink_reply_t), KM_SLEEP);
+ reply->lxnr_err = err;
+ reply->lxnr_sock = lxsock;
+ reply->lxnr_hdr = *hdr;
+ reply->lxnr_type = type;
+ reply->lxnr_mp1 = mp1;
+
+ return (reply);
+}
+
+static void
+lx_netlink_reply_add(lx_netlink_reply_t *reply, void *payload, uint32_t size)
+{
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ uint32_t align, alignto = LX_NETLINK_NLA_ALIGNTO;
+ mblk_t *mp = reply->lxnr_mp;
+
+ if (reply->lxnr_errno)
+ return;
+
+ align = (alignto - (size & (alignto - 1))) & (alignto - 1);
+
+ hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+
+ if (hdr->lxnh_len + size + align > lxsock->lxns_bufsize) {
+ reply->lxnr_errno = E2BIG;
+ return;
+ }
+
+ bcopy(payload, mp->b_wptr, size);
+
+ hdr->lxnh_len += size + align;
+ mp->b_wptr += size + align;
+}
+
+static void
+lx_netlink_reply_msg(lx_netlink_reply_t *reply, void *payload, uint32_t size)
+{
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ mblk_t *mp;
+
+ if (reply->lxnr_errno)
+ return;
+
+ VERIFY(reply->lxnr_mp == NULL);
+
+ if ((reply->lxnr_mp = mp = allocb(lxsock->lxns_bufsize, 0)) == NULL) {
+ reply->lxnr_errno = ENOMEM;
+ return;
+ }
+
+ bzero(mp->b_rptr, lxsock->lxns_bufsize);
+ hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+ hdr->lxnh_flags = LX_NETLINK_NLM_F_MULTI;
+ hdr->lxnh_len = sizeof (lx_netlink_hdr_t);
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_pid = lxsock->lxns_port;
+
+ mp->b_wptr += sizeof (lx_netlink_hdr_t);
+
+ if (payload == NULL) {
+ /*
+ * A NULL payload denotes a "done" message.
+ */
+ hdr->lxnh_type = LX_NETLINK_NLMSG_DONE;
+ } else {
+ hdr->lxnh_type = reply->lxnr_type;
+ lx_netlink_reply_add(reply, payload, size);
+ }
+}
+
+static void
+lx_netlink_reply_attr(lx_netlink_reply_t *reply, uint16_t type,
+ void *payload, uint32_t size)
+{
+ lx_netlink_attr_t attr;
+
+ attr.lxna_len = size + sizeof (lx_netlink_attr_t);
+ attr.lxna_type = type;
+
+ lx_netlink_reply_add(reply, &attr, sizeof (attr));
+ lx_netlink_reply_add(reply, payload, size);
+}
+
+static void
+lx_netlink_reply_attr_string(lx_netlink_reply_t *reply,
+ uint16_t type, const char *str)
+{
+ lx_netlink_reply_attr(reply, type, (void *)str, strlen(str) + 1);
+}
+
+static void
+lx_netlink_reply_attr_int32(lx_netlink_reply_t *reply,
+ uint16_t type, int32_t val)
+{
+ int32_t v = val;
+
+ lx_netlink_reply_attr(reply, type, &v, sizeof (int32_t));
+}
+
+static int
+lx_netlink_reply_ioctl(lx_netlink_reply_t *reply, int cmd, void *arg)
+{
+ int rval;
+
+ if (reply->lxnr_errno != 0)
+ return (reply->lxnr_errno);
+
+ if ((rval = ldi_ioctl(reply->lxnr_sock->lxns_current,
+ cmd, (intptr_t)arg, FKIOCTL, kcred, NULL)) != 0) {
+ reply->lxnr_errno = rval;
+ }
+
+ return (rval);
+}
+
+static void
+lx_netlink_reply_sendup(lx_netlink_reply_t *reply, mblk_t *mp, mblk_t *mp1)
+{
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ lx_netlink_sockaddr_t *lxsa;
+ struct T_unitdata_ind *tunit;
+ int error;
+
+ /*
+ * To prevent the stream head from coalescing messages and to indicate
+ * their origin, we send them as T_UNITDATA_IND messages, not as raw
+ * M_DATA.
+ */
+ mp1->b_cont = mp;
+
+ mp = mp1;
+ mp->b_datap->db_type = M_PROTO;
+
+ tunit = (struct T_unitdata_ind *)mp->b_rptr;
+ tunit->PRIM_type = T_UNITDATA_IND;
+ tunit->SRC_length = sizeof (lx_netlink_sockaddr_t);
+ tunit->SRC_offset = sizeof (*tunit);
+ tunit->OPT_length = 0;
+
+ lxsa = (lx_netlink_sockaddr_t *)(mp->b_rptr + sizeof (*tunit));
+ lxsa->lxnl_family = AF_LX_NETLINK;
+ lxsa->lxnl_port = 0;
+ lxsa->lxnl_groups = 0;
+ lxsa->lxnl_pad = 0;
+
+ mp->b_wptr += sizeof (*tunit) + sizeof (*lxsa);
+
+ lxsock->lxns_upcalls->su_recv(lxsock->lxns_uphandle, mp,
+ msgdsize(mp), 0, &error, NULL);
+
+ if (error != 0)
+ lx_netlink_flowctrld++;
+}
+
+static void
+lx_netlink_reply_send(lx_netlink_reply_t *reply)
+{
+ mblk_t *mp1;
+
+ if (reply->lxnr_errno)
+ return;
+
+ if ((mp1 = lx_netlink_alloc_mp1()) == NULL) {
+ reply->lxnr_errno = ENOMEM;
+ return;
+ }
+
+ lx_netlink_reply_sendup(reply, reply->lxnr_mp, mp1);
+ reply->lxnr_mp = NULL;
+}
+
+static void
+lx_netlink_reply_done(lx_netlink_reply_t *reply)
+{
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ mblk_t *mp;
+
+ /*
+ * Denote that we're done via a message with a NULL payload.
+ */
+ lx_netlink_reply_msg(reply, NULL, 0);
+
+ if (reply->lxnr_errno) {
+ /*
+ * If anything failed, we'll send up an error message.
+ */
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_err_t *err;
+
+ if (reply->lxnr_mp != NULL) {
+ freeb(reply->lxnr_mp);
+ reply->lxnr_mp = NULL;
+ }
+
+ mp = reply->lxnr_err;
+ err = (lx_netlink_err_t *)mp->b_rptr;
+ hdr = &err->lxne_hdr;
+
+ hdr->lxnh_type = LX_NETLINK_NLMSG_ERROR;
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_len = sizeof (lx_netlink_err_t);
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_pid = lxsock->lxns_port;
+ err->lxne_failed = reply->lxnr_hdr;
+ err->lxne_errno = reply->lxnr_errno;
+ mp->b_wptr += sizeof (lx_netlink_err_t);
+ reply->lxnr_err = NULL;
+ } else {
+ mp = reply->lxnr_mp;
+ VERIFY(mp != NULL);
+ reply->lxnr_mp = NULL;
+ }
+
+ lx_netlink_reply_sendup(reply, mp, reply->lxnr_mp1);
+
+ if (reply->lxnr_mp != NULL)
+ freeb(reply->lxnr_mp);
+
+ if (reply->lxnr_err != NULL)
+ freeb(reply->lxnr_err);
+
+ kmem_free(reply, sizeof (lx_netlink_reply_t));
+}
+
+static int
+lx_netlink_reply_error(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, int errno)
+{
+ /*
+ * The type of the message doesn't matter, as we're going to explicitly
+ * set lxnr_errno and therefore send only an error message.
+ */
+ lx_netlink_reply_t *reply = lx_netlink_reply(lxsock, hdr, 0);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ reply->lxnr_errno = errno;
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+static void
+lx_netlink_getlink_lifreq(lx_netlink_reply_t *reply, struct lifreq *lifr)
+{
+ lx_netlink_ifinfomsg_t ifi;
+ int i;
+
+ struct {
+ int native;
+ int lx;
+ } flags[] = {
+ { IFF_UP, LX_IFF_UP },
+ { IFF_BROADCAST, LX_IFF_BROADCAST },
+ { IFF_DEBUG, LX_IFF_DEBUG },
+ { IFF_LOOPBACK, LX_IFF_LOOPBACK },
+ { IFF_POINTOPOINT, LX_IFF_POINTOPOINT },
+ { IFF_NOTRAILERS, LX_IFF_NOTRAILERS },
+ { IFF_RUNNING, LX_IFF_RUNNING },
+ { IFF_NOARP, LX_IFF_NOARP },
+ { IFF_PROMISC, LX_IFF_PROMISC },
+ { IFF_ALLMULTI, LX_IFF_ALLMULTI },
+ { IFF_MULTICAST, LX_IFF_MULTICAST },
+ { 0 }
+ };
+
+ bzero(&ifi, sizeof (ifi));
+ ifi.lxnl_ifi_type = AF_UNSPEC;
+ ifi.lxnl_ifi_change = (uint32_t)-1;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFINDEX, lifr) != 0)
+ return;
+
+ ifi.lxnl_ifi_index = lifr->lifr_index;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFFLAGS, lifr) != 0)
+ return;
+
+ for (i = 0; flags[i].native; i++) {
+ if (lifr->lifr_flags & flags[i].native)
+ ifi.lxnl_ifi_flags |= flags[i].lx;
+ }
+
+ lx_netlink_reply_msg(reply, &ifi, sizeof (lx_netlink_ifinfomsg_t));
+
+ lx_netlink_reply_attr_string(reply,
+ LX_NETLINK_IFLA_IFNAME, lifr->lifr_name);
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFMTU, lifr) != 0)
+ return;
+
+ lx_netlink_reply_attr_int32(reply, LX_NETLINK_IFLA_MTU, lifr->lifr_mtu);
+
+ /*
+ * We don't have a notion of TX queue length (or not an easily
+ * accessible one, anyway), so we lie. (Which is to say we lie more
+ * than we're already lying -- which is saying something.)
+ */
+ lx_netlink_reply_attr_int32(reply, LX_NETLINK_IFLA_TXQLEN, 1);
+
+ lx_netlink_reply_send(reply);
+}
+
+static void
+lx_netlink_reply_eachfamily(lx_netlink_reply_t *reply,
+ void (*func)(lx_netlink_reply_t *, struct lifreq *), boolean_t distinct)
+{
+ lx_netlink_sock_t *sock = reply->lxnr_sock;
+ int nlifr, i;
+
+ struct {
+ int family;
+ ldi_handle_t handle;
+ struct lifconf lifc;
+ struct lifnum lifn;
+ } families[] = {
+ { AF_INET, sock->lxns_iphandle },
+ { AF_INET6, sock->lxns_ip6handle },
+ { AF_UNSPEC }
+ }, *family, *check;
+
+ for (family = families; family->family != AF_UNSPEC; family++) {
+ struct lifconf *lifc = &family->lifc;
+ struct lifnum *lifn = &family->lifn;
+
+ lifn->lifn_family = family->family;
+ sock->lxns_current = family->handle;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFNUM, lifn) != 0)
+ break;
+
+ lifc->lifc_family = lifn->lifn_family;
+ lifc->lifc_flags = 0;
+ lifc->lifc_len = lifn->lifn_count * sizeof (struct lifreq);
+ lifc->lifc_buf = kmem_alloc(lifc->lifc_len, KM_SLEEP);
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFCONF, lifc) != 0)
+ break;
+
+ nlifr = lifc->lifc_len / sizeof (lifc->lifc_req[0]);
+
+ for (i = 0; i < nlifr; i++) {
+ if (!distinct) {
+ func(reply, &lifc->lifc_req[i]);
+ continue;
+ }
+
+ /*
+ * If we have been asked to provide each interface
+ * exactly once, we need to (annoyingly) check this
+ * name against others that we've already processed for
+ * other families. Yes, this is quadratic time -- but
+ * the number of interfaces per family is expected to
+ * be very small.
+ */
+ for (check = families; check != family; check++) {
+ struct lifconf *clifc = &check->lifc;
+ int cnlifr = clifc->lifc_len /
+ sizeof (clifc->lifc_req[0]), j;
+ char *nm = lifc->lifc_req[i].lifr_name, *cnm;
+
+ for (j = 0; j < cnlifr; j++) {
+ cnm = clifc->lifc_req[j].lifr_name;
+
+ if (strcmp(nm, cnm) == 0)
+ break;
+ }
+
+ if (j != cnlifr)
+ break;
+ }
+
+ if (check != family)
+ continue;
+
+ func(reply, &lifc->lifc_req[i]);
+ }
+ }
+
+ for (family = families; family->family != AF_UNSPEC; family++) {
+ struct lifconf *lifc = &family->lifc;
+
+ if (lifc->lifc_buf != NULL)
+ kmem_free(lifc->lifc_buf, lifc->lifc_len);
+ }
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getlink(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ lx_netlink_reply_t *reply;
+
+ reply = lx_netlink_reply(lxsock, hdr, LX_NETLINK_RTM_NEWLINK);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ lx_netlink_reply_eachfamily(reply, lx_netlink_getlink_lifreq, B_TRUE);
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+static void
+lx_netlink_getaddr_lifreq(lx_netlink_reply_t *reply, struct lifreq *lifr)
+{
+ lx_netlink_ifaddrmsg_t ifa;
+
+ bzero(&ifa, sizeof (ifa));
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFINDEX, lifr) != 0)
+ return;
+
+ ifa.lxnl_ifa_index = lifr->lifr_index;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFFLAGS, lifr) != 0)
+ return;
+
+ if (!(lifr->lifr_flags & IFF_NOLOCAL)) {
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFSUBNET, lifr) != 0)
+ return;
+
+ ifa.lxnl_ifa_prefixlen = lifr->lifr_addrlen;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFADDR, lifr) != 0)
+ return;
+
+ if (lifr->lifr_addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ ifa.lxnl_ifa_family = LX_AF_INET;
+
+ lx_netlink_reply_msg(reply, &ifa,
+ sizeof (lx_netlink_ifaddrmsg_t));
+
+ sin = (struct sockaddr_in *)&lifr->lifr_addr;
+
+ lx_netlink_reply_attr_int32(reply,
+ LX_NETLINK_IFA_ADDRESS, sin->sin_addr.s_addr);
+ } else {
+ struct sockaddr_in6 *sin;
+
+ ifa.lxnl_ifa_family = LX_AF_INET6;
+
+ lx_netlink_reply_msg(reply, &ifa,
+ sizeof (lx_netlink_ifaddrmsg_t));
+
+ sin = (struct sockaddr_in6 *)&lifr->lifr_addr;
+
+ lx_netlink_reply_attr(reply, LX_NETLINK_IFA_ADDRESS,
+ &sin->sin6_addr, sizeof (sin->sin6_addr));
+ }
+ }
+
+ lx_netlink_reply_send(reply);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getaddr(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ lx_netlink_reply_t *reply;
+
+ reply = lx_netlink_reply(lxsock, hdr, LX_NETLINK_RTM_NEWADDR);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ lx_netlink_reply_eachfamily(reply, lx_netlink_getaddr_lifreq, B_FALSE);
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_audit(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ /*
+ * For all auditing messages, we return ECONNREFUSED, which seems to
+ * keep user-level auditing happy. (Or at least, non-suicidal.)
+ */
+ return (ECONNREFUSED);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_kobject_uevent(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ /*
+ * For udev, we just silently accept all writes and never actually
+ * reply with anything -- which appears to be sufficient for things
+ * to work.
+ */
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_send(sock_lower_handle_t handle, mblk_t *mp,
+ struct nmsghdr *msg, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ lx_netlink_hdr_t *hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+ int i, rval;
+
+ static struct {
+ int proto;
+ uint16_t type;
+ int (*func)(lx_netlink_sock_t *, lx_netlink_hdr_t *, mblk_t *);
+ } handlers[] = {
+ { LX_NETLINK_ROUTE,
+ LX_NETLINK_RTM_GETLINK, lx_netlink_getlink },
+ { LX_NETLINK_ROUTE,
+ LX_NETLINK_RTM_GETADDR, lx_netlink_getaddr },
+ { LX_NETLINK_AUDIT,
+ LX_NETLINK_NLMSG_NONE, lx_netlink_audit },
+ { LX_NETLINK_KOBJECT_UEVENT,
+ LX_NETLINK_NLMSG_NONE, lx_netlink_kobject_uevent },
+ { LX_NETLINK_NLMSG_NOOP, LX_NETLINK_NLMSG_NONE, NULL }
+ };
+
+ if (DB_TYPE(mp) != M_DATA || MBLKL(mp) < sizeof (lx_netlink_hdr_t)) {
+ freemsg(mp);
+ return (EPROTO);
+ }
+
+ for (i = 0; handlers[i].func != NULL; i++) {
+ if (lxsock->lxns_proto != handlers[i].proto)
+ continue;
+
+ if (handlers[i].type != LX_NETLINK_NLMSG_NONE &&
+ hdr->lxnh_type != handlers[i].type)
+ continue;
+
+ rval = handlers[i].func(lxsock, hdr, mp);
+ freemsg(mp);
+
+ return (rval);
+ }
+
+ /*
+ * An unrecognized message. We will bounce up an EOPNOTSUPP reply.
+ */
+ rval = lx_netlink_reply_error(lxsock, hdr, EOPNOTSUPP);
+ freemsg(mp);
+
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_close(sock_lower_handle_t handle, int flags, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle, *sock, **prev;
+
+ mutex_enter(&lx_netlink_lock);
+
+ prev = &lx_netlink_head;
+
+ for (sock = *prev; sock != lxsock; sock = sock->lxns_next)
+ prev = &sock->lxns_next;
+
+ *prev = sock->lxns_next;
+
+ mutex_exit(&lx_netlink_lock);
+
+ (void) ldi_close(lxsock->lxns_iphandle, FREAD, kcred);
+ (void) ldi_close(lxsock->lxns_ip6handle, FREAD, kcred);
+ kmem_free(lxsock, sizeof (lx_netlink_sock_t));
+
+ return (0);
+}
+
+static sock_downcalls_t sock_lx_netlink_downcalls = {
+ lx_netlink_activate, /* sd_activate */
+ sock_accept_notsupp, /* sd_accept */
+ lx_netlink_bind, /* sd_bind */
+ sock_listen_notsupp, /* sd_listen */
+ sock_connect_notsupp, /* sd_connect */
+ sock_getpeername_notsupp, /* sd_getpeername */
+ lx_netlink_getsockname, /* sd_getsockname */
+ sock_getsockopt_notsupp, /* sd_getsockopt */
+ lx_netlink_setsockopt, /* sd_setsockopt */
+ lx_netlink_send, /* sd_send */
+ NULL, /* sd_send_uio */
+ NULL, /* sd_recv_uio */
+ NULL, /* sd_poll */
+ sock_shutdown_notsupp, /* sd_shutdown */
+ sock_clr_flowctrl_notsupp, /* sd_setflowctrl */
+ sock_ioctl_notsupp, /* sd_ioctl */
+ lx_netlink_close /* sd_close */
+};
+
+/*ARGSUSED*/
+static sock_lower_handle_t
+lx_netlink_create(int family, int type, int proto,
+ sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
+ int flags, cred_t *credp)
+{
+ lx_netlink_sock_t *lxsock;
+ ldi_handle_t handle, handle6;
+ cred_t *kcred = zone_kcred();
+ int err;
+
+ if (family != AF_LX_NETLINK ||
+ (type != SOCK_DGRAM && type != SOCK_RAW)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ switch (proto) {
+ case LX_NETLINK_ROUTE:
+ case LX_NETLINK_AUDIT:
+ case LX_NETLINK_KOBJECT_UEVENT:
+ break;
+
+ default:
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ if ((err = ldi_open_by_name(DEV_IP, FREAD, kcred,
+ &handle, lx_netlink_ldi)) != 0) {
+ *errorp = err;
+ return (NULL);
+ }
+
+ if ((err = ldi_open_by_name(DEV_IP6, FREAD, kcred,
+ &handle6, lx_netlink_ldi)) != 0) {
+ (void) ldi_close(handle, FREAD, kcred);
+ *errorp = err;
+ return (NULL);
+ }
+
+ *sock_downcalls = &sock_lx_netlink_downcalls;
+ *smodep = SM_ATOMIC;
+
+ lxsock = kmem_zalloc(sizeof (lx_netlink_sock_t), KM_SLEEP);
+ lxsock->lxns_iphandle = handle;
+ lxsock->lxns_ip6handle = handle6;
+ lxsock->lxns_bufsize = lx_netlink_bufsize;
+ lxsock->lxns_proto = proto;
+
+ mutex_enter(&lx_netlink_lock);
+
+ lxsock->lxns_next = lx_netlink_head;
+ lxsock->lxns_port = ++lx_netlink_port;
+ lx_netlink_head = lxsock;
+
+ mutex_exit(&lx_netlink_lock);
+
+ return ((sock_lower_handle_t)lxsock);
+}
+
+static void
+lx_netlink_init(void)
+{
+ major_t major = mod_name_to_major("ip");
+ int err;
+
+ VERIFY(major != DDI_MAJOR_T_NONE);
+
+ err = ldi_ident_from_major(major, &lx_netlink_ldi);
+ VERIFY(err == 0);
+}
+
+static void
+lx_netlink_fini(void)
+{
+ ldi_ident_release(lx_netlink_ldi);
+}
+
+static smod_reg_t sinfo = {
+ SOCKMOD_VERSION,
+ "lx_netlink",
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ lx_netlink_create,
+ NULL
+};
+
+/* modldrv structure */
+static struct modlsockmod sockmod = {
+ &mod_sockmodops, "AF_LX_NETLINK socket module", &sinfo
+};
+
+/* modlinkage structure */
+static struct modlinkage ml = {
+ MODREV_1,
+ &sockmod,
+ NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ lx_netlink_init();
+
+ if ((err = mod_install(&ml)) != 0)
+ lx_netlink_fini();
+
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&ml, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err = 0;
+
+ mutex_enter(&lx_netlink_lock);
+
+ if (lx_netlink_head != NULL)
+ err = EBUSY;
+
+ mutex_exit(&lx_netlink_lock);
+
+ if (err == 0 && (err = mod_remove(&ml)) == 0)
+ lx_netlink_fini();
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c
new file mode 100644
index 0000000000..e9e3618a52
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c
@@ -0,0 +1,1157 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+/*
+ * This driver attempts to emulate some of the the behaviors of
+ * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
+ *
+ * It does this by layering over the /dev/ptmx device and intercepting
+ * opens to it.
+ *
+ * This driver makes the following assumptions about the way the ptm/pts
+ * drivers on Solaris work:
+ *
+ * - all opens of the /dev/ptmx device node return a unique dev_t.
+ *
+ * - the dev_t minor node value for each open ptm instance corrospondes
+ * to it's associated slave terminal device number. ie. the path to
+ * the slave terminal device associated with an open ptm instance
+ * who's dev_t minor node vaue is 5, is /dev/pts/5.
+ *
+ * - the ptm driver always allocates the lowest numbered slave terminal
+ * device possible.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/kstr.h>
+#include <sys/lx_ptm.h>
+#include <sys/modctl.h>
+#include <sys/pathname.h>
+#include <sys/ptms.h>
+#include <sys/ptyvar.h>
+#include <sys/stat.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define LP_PTM_PATH "/dev/ptmx"
+#define LP_PTS_PATH "/dev/pts/"
+#define LP_PTS_DRV_NAME "pts"
+#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */
+#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */
+
+/*
+ * this driver is layered on top of the ptm driver. we'd like to
+ * make this drivers minor name space a mirror of the ptm drivers
+ * namespace, but we can't actually do this. the reason is that the
+ * ptm driver is opened via the clone driver. there for no minor nodes
+ * of the ptm driver are actually accessible via the filesystem.
+ * since we're not a streams device we can't be opened by the clone
+ * driver. there for we need to have at least minor node accessible
+ * via the filesystem so that consumers can open it. we use the device
+ * node with a minor number of 0 for this purpose. what this means is
+ * that minor node 0 can't be used to map ptm minor node 0. since this
+ * minor node is now reserved we need to shift our ptm minor node
+ * mappings by one. ie. a ptm minor node with a value of 0 will
+ * corrospond to our minor node with a value of 1. these mappings are
+ * managed with the following macros.
+ */
+#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x)
+#define INDEX_TO_MINOR(x) ((x) + 1)
+
+/*
+ * grow our layered handle array by the same size increment that the ptm
+ * driver uses to grow the pty device space - PTY_MAXDELTA
+ */
+#define LP_PTY_INC 128
+
+/*
+ * lx_ptm_ops contains state information about outstanding operations on the
+ * underlying master terminal device. Currently we only track information
+ * for read operations.
+ *
+ * Note that this data has not been rolled directly into the lx_ptm_handle
+ * structure because we can't put mutex's of condition variables into
+ * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle
+ * structures linked to from the global lx_ptm state can be resized
+ * dynamically, and when it's resized, the new array is at a different
+ * memory location and the old array memory is discarded. Mutexs and cvs
+ * are accessed based off their address, so if this array was re-sized while
+ * there were outstanding operations on any mutexs or cvs in the array
+ * then the system would tip over. In the future the lx_ptm_handle structure
+ * array should probably be replaced with either an array of pointers to
+ * lx_ptm_handle structures or some other kind of data structure containing
+ * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure
+ * could be folded directly into the lx_ptm_handle structures. (This will
+ * also require the definition of a new locking mechanism to protect the
+ * contents of lx_ptm_handle structures.)
+ */
+typedef struct lx_ptm_ops {
+ int lpo_rops;
+ kcondvar_t lpo_rops_cv;
+ kmutex_t lpo_rops_lock;
+} lx_ptm_ops_t;
+
+/*
+ * Every open of the master terminal device in a zone results in a new
+ * lx_ptm_handle handle allocation. These handles are stored in an array
+ * hanging off the lx_ptm_state structure.
+ */
+typedef struct lx_ptm_handle {
+ /* Device handle to the underlying real /dev/ptmx master terminal. */
+ ldi_handle_t lph_handle;
+
+ /* Flag to indicate if TIOCPKT mode has been enabled. */
+ int lph_pktio;
+
+ /* Number of times the slave device has been opened/closed. */
+ int lph_eofed;
+
+ /* Callback handler in the ptm driver to check if slave is open. */
+ ptmptsopencb_t lph_ppocb;
+
+ /* Pointer to state for operations on underlying device. */
+ lx_ptm_ops_t *lph_lpo;
+} lx_ptm_handle_t;
+
+/*
+ * Global state for the lx_ptm driver.
+ */
+typedef struct lx_ptm_state {
+ /* lx_ptm device devinfo pointer */
+ dev_info_t *lps_dip;
+
+ /* LDI ident used to open underlying real /dev/ptmx master terminals. */
+ ldi_ident_t lps_li;
+
+ /* pts drivers major number */
+ major_t lps_pts_major;
+
+ /* rw lock used to manage access and growth of lps_lh_array */
+ krwlock_t lps_lh_rwlock;
+
+ /* number of elements in lps_lh_array */
+ uint_t lps_lh_count;
+
+ /* Array of handles to underlying real /dev/ptmx master terminals. */
+ lx_ptm_handle_t *lps_lh_array;
+} lx_ptm_state_t;
+
+/* Pointer to the lx_ptm global state structure. */
+static lx_ptm_state_t lps;
+
+/*
+ * List of modules to be autopushed onto slave terminal devices when they
+ * are opened in an lx branded zone.
+ */
+static char *lx_pts_mods[] = {
+ "ptem",
+ "ldterm",
+ "ttcompat",
+ NULL
+};
+
+static void
+lx_ptm_lh_grow(uint_t index)
+{
+ uint_t new_lh_count, old_lh_count;
+ lx_ptm_handle_t *new_lh_array, *old_lh_array;
+
+ /*
+ * allocate a new array. we drop the rw lock on the array so that
+ * readers can still access devices in case our memory allocation
+ * blocks.
+ */
+ new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
+ new_lh_array =
+ kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
+
+ /*
+ * double check that we still actually need to increase the size
+ * of the array
+ */
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ if (index < lps.lps_lh_count) {
+ /* someone beat us to it so there's nothing more to do */
+ rw_exit(&lps.lps_lh_rwlock);
+ kmem_free(new_lh_array,
+ sizeof (lx_ptm_handle_t) * new_lh_count);
+ return;
+ }
+
+ /* copy the existing data into the new array */
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_count != 0) {
+ bcopy(lps.lps_lh_array, new_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ }
+
+ /* save info on the old array */
+ old_lh_array = lps.lps_lh_array;
+ old_lh_count = lps.lps_lh_count;
+
+ /* install the new array */
+ lps.lps_lh_array = new_lh_array;
+ lps.lps_lh_count = new_lh_count;
+
+ rw_exit(&lps.lps_lh_rwlock);
+
+ /* free the old array */
+ if (old_lh_array != NULL) {
+ kmem_free(old_lh_array,
+ sizeof (lx_ptm_handle_t) * old_lh_count);
+ }
+}
+
+static void
+lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
+{
+ lx_ptm_ops_t *lpo;
+
+ ASSERT(lh != NULL);
+
+ /* Allocate and initialize the ops structure */
+ lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
+ mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ /* check if we need to grow the size of the layered handle array */
+ if (index >= lps.lps_lh_count) {
+ rw_exit(&lps.lps_lh_rwlock);
+ lx_ptm_lh_grow(index);
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ }
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
+ ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
+ ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
+ ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
+
+ /* insert the new handle and return */
+ lps.lps_lh_array[index].lph_handle = lh;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ lps.lps_lh_array[index].lph_lpo = lpo;
+
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_remove(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+ ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
+ ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
+
+ /* free the write handle */
+ kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
+ lps.lps_lh_array[index].lph_lpo = NULL;
+
+ /* remove the handle and return it */
+ lh = lps.lps_lh_array[index].lph_handle;
+ lps.lps_lh_array[index].lph_handle = NULL;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static void
+lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ *ppocb = lps.lps_lh_array[index].lph_ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static void
+lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ lps.lps_lh_array[index].lph_ppocb = *ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_lookup(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the handle */
+ lh = lps.lps_lh_array[index].lph_handle;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static lx_ptm_ops_t *
+lx_ptm_lpo_lookup(uint_t index)
+{
+ lx_ptm_ops_t *lpo;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
+
+ /* return the handle */
+ lpo = lps.lps_lh_array[index].lph_lpo;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lpo);
+}
+
+static int
+lx_ptm_lh_pktio_get(uint_t index)
+{
+ int pktio;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the pktio state */
+ pktio = lps.lps_lh_array[index].lph_pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (pktio);
+}
+
+static void
+lx_ptm_lh_pktio_set(uint_t index, int pktio)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the pktio state */
+ lps.lps_lh_array[index].lph_pktio = pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_lh_eofed_get(uint_t index)
+{
+ int eofed;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the eofed state */
+ eofed = lps.lps_lh_array[index].lph_eofed;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (eofed);
+}
+
+static void
+lx_ptm_lh_eofed_set(uint_t index)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the eofed state */
+ lps.lps_lh_array[index].lph_eofed++;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_read_start(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* Wait for other read operations to finish */
+ while (lpo->lpo_rops != 0) {
+ if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (-1);
+ }
+ }
+
+ /* Start a read operation */
+ VERIFY(++lpo->lpo_rops == 1);
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (0);
+}
+
+static void
+lx_ptm_read_end(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* End a read operation */
+ VERIFY(--lpo->lpo_rops == 0);
+ cv_signal(&lpo->lpo_rops_cv);
+
+ mutex_exit(&lpo->lpo_rops_lock);
+}
+
+static int
+lx_ptm_pts_isopen(dev_t dev)
+{
+ ptmptsopencb_t ppocb;
+
+ lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
+ return (ppocb.ppocb_func(ppocb.ppocb_arg));
+}
+
+static void
+lx_ptm_eof_read(ldi_handle_t lh)
+{
+ struct uio uio;
+ iovec_t iov;
+ char junk[1];
+
+ /*
+ * We can remove any EOF message from the head of the stream by
+ * doing a zero byte read from the stream.
+ */
+ iov.iov_len = 0;
+ iov.iov_base = junk;
+ uio.uio_iovcnt = 1;
+ uio.uio_iov = &iov;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+ (void) ldi_read(lh, &uio, kcred);
+}
+
+static int
+lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, msg_size, msg_count;
+
+ *rvalp = 0;
+
+ /*
+ * Check if there is an EOF message (represented by a zero length
+ * data message) at the head of the stream. Note that the
+ * I_NREAD ioctl is a streams framework ioctl so it will succeed
+ * even if there have been previous write errors on this stream.
+ */
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+
+ if ((msg_count == 0) || (msg_size != 0)) {
+ /* No EOF message found */
+ return (0);
+ }
+
+ /* Record the fact that the slave device has been closed. */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+
+ /* drop the EOF */
+ lx_ptm_eof_read(lh);
+ *rvalp = 1;
+ return (0);
+}
+
+static int
+lx_ptm_eof_drop(dev_t dev, int *rvalp)
+{
+ int rval, err;
+
+ if (rvalp != NULL)
+ *rvalp = 0;
+ for (;;) {
+ if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
+ return (err);
+ if (rval == 0)
+ return (0);
+ if (rvalp != NULL)
+ *rvalp = 1;
+ }
+}
+
+static int
+lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ *rvalp = 0;
+ if (ignore_eof) {
+ int size, rval;
+
+ if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
+ FKIOCTL, kcred, &rval)) != 0)
+ return (err);
+ if (size != 0)
+ *rvalp = 1;
+ } else {
+ int msg_size, msg_count;
+
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+ if (msg_count != 0)
+ *rvalp = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int err;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ err = ldi_ident_from_dip(dip, &lps.lps_li);
+ if (err != 0) {
+ ddi_remove_minor_node(dip, ddi_get_name(dip));
+ return (DDI_FAILURE);
+ }
+
+ lps.lps_dip = dip;
+ lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
+
+ rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
+ lps.lps_lh_count = 0;
+ lps.lps_lh_array = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ldi_ident_release(lps.lps_li);
+ lps.lps_dip = NULL;
+
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_array != NULL) {
+ kmem_free(lps.lps_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ lps.lps_lh_array = NULL;
+ lps.lps_lh_count = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ struct strioctl iocb;
+ ptmptsopencb_t ppocb = { NULL, NULL };
+ ldi_handle_t lh;
+ major_t maj, our_major = getmajor(*devp);
+ minor_t min, lastmin;
+ uint_t index, anchor = 1;
+ dev_t ptm_dev;
+ int err, rval = 0;
+
+ /*
+ * Don't support the FNDELAY flag and FNONBLOCK until we either
+ * find a Linux app that opens /dev/ptmx with the O_NDELAY
+ * or O_NONBLOCK flags explicitly, or until we create test cases
+ * to determine how reads of master terminal devices opened with
+ * these flags behave in different situations on Linux. Supporting
+ * these flags will involve enhancing our read implementation
+ * and changing the way it deals with EOF notifications.
+ */
+ if (flag & (FNDELAY | FNONBLOCK))
+ return (ENOTSUP);
+
+ /*
+ * we're layered on top of the ptm driver so open that driver
+ * first. (note that we're opening /dev/ptmx in the global
+ * zone, not ourselves in the Linux zone.)
+ */
+ err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
+ if (err != 0)
+ return (err);
+
+ /* get the devt returned by the ptmx open */
+ err = ldi_get_dev(lh, &ptm_dev);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (err);
+ }
+
+ /*
+ * we're a cloning driver so here's well change the devt that we
+ * return. the ptmx is also a cloning driver so we'll just use
+ * it's minor number as our minor number (it already manages it's
+ * minor name space so no reason to duplicate the effort.)
+ */
+ index = getminor(ptm_dev);
+ *devp = makedevice(our_major, INDEX_TO_MINOR(index));
+
+ /* Get a callback function to query if the pts device is open. */
+ iocb.ic_cmd = PTMPTSOPENCB;
+ iocb.ic_timout = 0;
+ iocb.ic_len = sizeof (ppocb);
+ iocb.ic_dp = (char *)&ppocb;
+
+ err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
+ if ((err != 0) || (rval != 0)) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+ ASSERT(ppocb.ppocb_func != NULL);
+
+ /*
+ * now setup autopush for the terminal slave device. this is
+ * necessary so that when a Linux program opens the device we
+ * can push required strmod modules onto the stream. in Solaris
+ * this is normally done by the application that actually
+ * allocates the terminal.
+ */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
+ &anchor, lx_pts_mods);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+
+ /* save off this layered handle for future accesses */
+ lx_ptm_lh_insert(index, lh);
+ lx_ptm_lh_set_ppocb(index, &ppocb);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ ldi_handle_t lh;
+ major_t maj;
+ minor_t min, lastmin;
+ uint_t index;
+ int err;
+
+ index = DEVT_TO_INDEX(dev);
+
+ /*
+ * we must cleanup all the state associated with this major/minor
+ * terminal pair before actually closing the ptm master device.
+ * this is required because once the close of the ptm device is
+ * complete major/minor terminal pair is immediatly available for
+ * re-use in any zone.
+ */
+
+ /* free up our saved reference for this layered handle */
+ lh = lx_ptm_lh_remove(index);
+
+ /* unconfigure autopush for the associated terminal slave device */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ do {
+ /*
+ * we loop here because we don't want to release this ptm
+ * node if autopush can't be disabled on the associated
+ * slave device because then bad things could happen if
+ * another brand were to get this terminal allocated
+ * to them.
+ *
+ * XXX should we ever give up?
+ */
+ err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
+ 0, NULL);
+ } while (err != 0);
+
+ err = ldi_close(lh, flag, credp);
+
+ /*
+ * note that we don't have to bother with changing the permissions
+ * on the associated slave device here. the reason is that no one
+ * can actually open the device untill it's associated master
+ * device is re-opened, which will result in the permissions on
+ * it being reset.
+ */
+ return (err);
+}
+
+static int
+lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, rval;
+ struct uio uio = *uiop;
+
+ *loop = 0;
+
+ /*
+ * Here's another way that Linux master terminals behave differently
+ * from Solaris master terminals. If you do a read on a Linux
+ * master terminal (that was opened witout NDELAY and NONBLOCK)
+ * who's corrosponding slave terminal is currently closed and
+ * has been opened and closed at least once, Linux return -1 and
+ * set errno to EIO where as Solaris blocks.
+ */
+ if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
+ /* Slave has been opened and closed at least once. */
+ if (lx_ptm_pts_isopen(dev) == 0) {
+ /*
+ * Slave is closed. Make sure that data is avaliable
+ * before attempting a read.
+ */
+ if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
+ return (err);
+
+ /* If there is no data available then return. */
+ if (rval == 0)
+ return (EIO);
+ }
+ }
+
+ /* Actually do the read operation. */
+ if ((err = ldi_read(lh, uiop, credp)) != 0)
+ return (err);
+
+ /* If read returned actual data then return. */
+ if (uio.uio_resid != uiop->uio_resid)
+ return (0);
+
+ /*
+ * This was a zero byte read (ie, an EOF). This indicates
+ * that the slave terinal device has been closed. Record
+ * the fact that the slave device has been closed and retry
+ * the read operation.
+ */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+ *loop = 1;
+ return (0);
+}
+
+static int
+lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
+ int err, loop;
+ struct uio uio;
+ struct iovec iovp;
+
+ ASSERT(uiop->uio_iovcnt > 0);
+
+ /*
+ * If packet mode has been enabled (via TIOCPKT) we need to pad
+ * all read requests with a leading byte that indicates any
+ * relevant control status information.
+ */
+ if (pktio != 0) {
+ /*
+ * We'd like to write the control information into
+ * the current buffer but we can't yet. We don't
+ * want to modify userspace memory here only to have
+ * the read operation fail later. So instead
+ * what we'll do here is read one character from the
+ * beginning of the memory pointed to by the uio
+ * structure. This will advance the output pointer
+ * by one. Then when the read completes successfully
+ * we can update the byte that we passed over. Before
+ * we do the read make a copy of the current uiop and
+ * iovec structs so we can write to them later.
+ */
+ uio = *uiop;
+ iovp = *uiop->uio_iov;
+ uio.uio_iov = &iovp;
+
+ if (uwritec(uiop) == -1)
+ return (EFAULT);
+ }
+
+ do {
+ /*
+ * Before we actually attempt a read operation we need
+ * to make sure there's some buffer space to actually
+ * read in some data. We do this because if we're in
+ * pktio mode and the caller only requested one byte,
+ * then we've already used up that one byte and we
+ * don't want to pass this read request. Doing a 0
+ * byte read (unless there is a problem with the stream
+ * head) always returns succcess. Normally when a streams
+ * read returns 0 bytes we interpret that as an EOF on
+ * the stream (ie, the slave side has been opened and
+ * closed) and we ignore it and re-try the read operation.
+ * So if we pass on a 0 byte read here lx_ptm_read_loop()
+ * will tell us to loop around and we'll end up in an
+ * infinite loop.
+ */
+ if (uiop->uio_resid == 0)
+ break;
+
+ /*
+ * Serialize all reads. We need to do this so that we can
+ * properly emulate the behavior of master terminals on Linux.
+ * In reality this serializaion should not pose any kind of
+ * performance problem since it would be very strange to have
+ * multiple threads trying to read from the same master
+ * terminal device concurrently.
+ */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_read_loop(dev, uiop, credp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+
+ if (pktio != 0) {
+ uint8_t pktio_data = TIOCPKT_DATA;
+
+ /*
+ * Note that the control status information we
+ * pass back is faked up in the sense that we
+ * don't actually report any events, we always
+ * report a status of 0.
+ */
+ if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ err = ldi_write(lh, uiop, credp);
+
+ return (err);
+}
+
+static int
+lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ /*
+ * here we need to make sure that we never allow the
+ * I_SETSIG and I_ESETSIG ioctls to pass through. we
+ * do this because we can't support them.
+ *
+ * the native Solaris ptm device supports these ioctls because
+ * they are streams framework ioctls and all streams devices
+ * support them by default. these ioctls cause the current
+ * process to be registered with a stream and receive signals
+ * when certain stream events occur.
+ *
+ * a problem arises with cleanup of these registrations
+ * for layered drivers.
+ *
+ * normally the streams framework is notified whenever a
+ * process closes any reference to a stream and it goes ahead
+ * and cleans up these registrations. but actual device drivers
+ * are not notified when a process performs a close operation
+ * unless the process is closing the last opened reference to
+ * the device on the entire system.
+ *
+ * so while we could pass these ioctls on and allow processes
+ * to register for signal delivery, we would never receive
+ * any notification when those processes exit (or close a
+ * stream) and we wouldn't be able to unregister them.
+ *
+ * luckily these operations are streams specific and Linux
+ * doesn't support streams devices. so it doesn't actually
+ * seem like we need to support these ioctls. if it turns
+ * out that we do need to support them for some reason in
+ * the future, the current driver model will have to be
+ * enhanced to better support streams device layering.
+ */
+ if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
+ return (EINVAL);
+
+ /*
+ * here we fake up support for TIOCPKT. Linux applications expect
+ * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
+ * (it is supported on older bsd style ptys.) so we'll fake
+ * up support for it here.
+ *
+ * the reason that this ioctl is emulated here instead of in
+ * userland is that this ioctl affects the results returned
+ * from read() operations. if this ioctl was emulated in
+ * userland the brand library would need to intercept all
+ * read operations and check to see if pktio was enabled
+ * for the fd being read from. since this ioctl only needs
+ * to be supported on the ptmx device it makes more sense
+ * to support it here where we can easily update the results
+ * returned for read() operations performed on ourselves.
+ */
+ if (cmd == TIOCPKT) {
+ int pktio;
+
+ if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
+ mode) != DDI_SUCCESS)
+ return (EFAULT);
+
+ if (pktio == 0)
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
+ else
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
+
+ return (0);
+ }
+
+ err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
+
+ return (err);
+}
+
+static int
+lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ short reventsp2;
+ int err, rval;
+
+ *loop = 0;
+
+ /*
+ * If the slave device has been opened and closed at least
+ * once and the slave device is currently closed, then poll
+ * always needs to returns immediatly.
+ */
+ if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
+ (lx_ptm_pts_isopen(dev) == 0)) {
+ /* In this case always return POLLHUP */
+ *reventsp = POLLHUP;
+
+ /*
+ * Check if there really is data on the stream.
+ * If so set the correct return flags.
+ */
+ if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
+ /* Something went wrong. */
+ return (err);
+ }
+ if (rval != 0)
+ *reventsp |= (events & (POLLIN | POLLRDNORM));
+
+ /*
+ * Is the user checking for writability? Note that for ptm
+ * devices Linux seems to ignore the POLLWRBAND write flag.
+ */
+ if ((events & POLLWRNORM) == 0)
+ return (0);
+
+ /*
+ * To check if the stream is writable we have to actually
+ * call poll, but make sure to set anyyet to 1 to prevent
+ * the streams framework from setting up callbacks.
+ */
+ if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
+ return (err);
+
+ *reventsp |= (reventsp2 & POLLWRNORM);
+ } else {
+ int lockstate;
+
+ /* The slave device is open, do the poll */
+ if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
+ return (err);
+
+ /*
+ * Drop any leading EOFs on the stream.
+ *
+ * Note that we have to use pollunlock() here to avoid
+ * recursive mutex enters in the poll framework. The
+ * reason is that if there is an EOF message on the stream
+ * then the act of reading from the queue to remove the
+ * message can cause the ptm drivers event service
+ * routine to be invoked, and if there is no open
+ * slave device then the ptm driver may generate
+ * error messages and put them on the stream. This
+ * in turn will generate a poll event and the poll
+ * framework will try to invoke any poll callbacks
+ * associated with the stream. In the process of
+ * doing that the poll framework will try to aquire
+ * locks that we are already holding. So we need to
+ * drop those locks here before we do our read.
+ */
+ lockstate = pollunlock();
+ err = lx_ptm_eof_drop(dev, &rval);
+ pollrelock(lockstate);
+ if (err)
+ return (err);
+
+ /* If no EOF was dropped then return */
+ if (rval == 0)
+ return (0);
+
+ /*
+ * An EOF was removed from the stream. Retry the entire
+ * poll operation from the top because polls on the ptm
+ * device should behave differently now.
+ */
+ *loop = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int loop, err;
+
+ do {
+ /* Serialize ourself wrt read operations. */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_poll_loop(dev,
+ events, anyyet, reventsp, phpp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+ return (0);
+}
+
+static struct cb_ops lx_ptm_cb_ops = {
+ lx_ptm_open, /* open */
+ lx_ptm_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lx_ptm_read, /* read */
+ lx_ptm_write, /* write */
+ lx_ptm_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ lx_ptm_poll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lx_ptm_ops = {
+ DEVO_REV,
+ 0,
+ ddi_getinfo_1to1,
+ nulldev,
+ nulldev,
+ lx_ptm_attach,
+ lx_ptm_detach,
+ nodev,
+ &lx_ptm_cb_ops,
+ NULL,
+ NULL,
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "Linux master terminal driver", /* description of module */
+ &lx_ptm_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
new file mode 100644
index 0000000000..481b4e3c74
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_ptm" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
new file mode 100644
index 0000000000..378f01550b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -0,0 +1,1662 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/exec.h>
+#include <sys/lx_impl.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_brand.h>
+#include <sys/param.h>
+#include <sys/termios.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/auxv.h>
+#include <sys/priv.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/archsystm.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+#include <sys/sdt.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+#include <lx_signum.h>
+
+int lx_debug = 0;
+
+void lx_init_brand_data(zone_t *);
+void lx_free_brand_data(zone_t *);
+void lx_setbrand(proc_t *);
+int lx_getattr(zone_t *, int, void *, size_t *);
+int lx_setattr(zone_t *, int, void *, size_t);
+int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+void lx_set_kern_version(zone_t *, char *);
+void lx_copy_procdata(proc_t *, proc_t *);
+
+extern int getsetcontext(int, void *);
+
+extern void lx_setrval(klwp_t *, int, int);
+extern void lx_proc_exit(proc_t *, klwp_t *);
+extern void lx_exec();
+extern int lx_initlwp(klwp_t *);
+extern void lx_forklwp(klwp_t *, klwp_t *);
+extern void lx_exitlwp(klwp_t *);
+extern void lx_freelwp(klwp_t *);
+extern void lx_exit_with_sig(proc_t *, sigqueue_t *, void *);
+static void lx_psig_to_proc(proc_t *, kthread_t *, int);
+extern boolean_t lx_wait_filter(proc_t *, proc_t *);
+extern greg_t lx_fixsegreg(greg_t, model_t);
+extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
+
+extern void lx_ioctl_init();
+extern void lx_ioctl_fini();
+
+int lx_systrace_brand_enabled;
+
+lx_systrace_f *lx_systrace_entry_ptr;
+lx_systrace_f *lx_systrace_return_ptr;
+
+static int lx_systrace_enabled;
+
+static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action);
+
+static boolean_t lx_native_exec(uint8_t, const char **);
+static void lx_ptrace_exectrap(proc_t *);
+static uint32_t lx_map32limit(proc_t *);
+
+/* lx brand */
+struct brand_ops lx_brops = {
+ lx_init_brand_data,
+ lx_free_brand_data,
+ lx_brandsys,
+ lx_setbrand,
+ lx_getattr,
+ lx_setattr,
+ lx_copy_procdata,
+ lx_proc_exit,
+ lx_exec,
+ lx_setrval,
+ lx_initlwp,
+ lx_forklwp,
+ lx_freelwp,
+ lx_exitlwp,
+ lx_elfexec,
+ NULL,
+ NULL,
+ lx_psig_to_proc,
+ NSIG,
+ lx_exit_with_sig,
+ lx_wait_filter,
+ lx_native_exec,
+ lx_ptrace_exectrap,
+ lx_map32limit
+};
+
+struct brand_mach_ops lx_mops = {
+ NULL,
+ lx_brand_int80_callback, /* 32-bit Linux entry point */
+ NULL,
+ lx_brand_syscall_callback, /* 64-bit common entry point */
+ NULL,
+ lx_fixsegreg,
+};
+
+struct brand lx_brand = {
+ BRAND_VER_1,
+ "lx",
+ &lx_brops,
+ &lx_mops,
+ sizeof (struct lx_proc_data)
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, "lx brand", &lx_brand
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+lx_proc_exit(proc_t *p, klwp_t *lwp)
+{
+ zone_t *z = p->p_zone;
+ int sig = ptolxproc(p)->l_signal;
+
+ ASSERT(p->p_brand != NULL);
+ ASSERT(p->p_brand_data != NULL);
+
+ /*
+ * If init is dying and we aren't explicitly shutting down the zone
+ * or the system, then Solaris is about to restart init. The Linux
+ * init is not designed to handle a restart, which it interprets as
+ * a reboot. To give it a sane environment in which to run, we
+ * reboot the zone.
+ */
+ if (p->p_pid == z->zone_proc_initpid) {
+ if (z->zone_boot_err == 0 &&
+ z->zone_restart_init &&
+ zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
+ zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
+ (void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
+ }
+
+ /*
+ * We might get here if fork failed (e.g. ENOMEM) so we don't always
+ * have an lwp (see brand_clearbrand).
+ */
+ if (lwp != NULL)
+ lx_exitlwp(lwp);
+
+ /*
+ * The call path here is:
+ * proc_exit -> brand_clearbrand -> b_proc_exit
+ * and then brand_clearbrand will set p_brand to be the native brand.
+ * We are done with our brand data but we don't free it here since
+ * that is done for us by proc_exit due to the fact that we have a
+ * b_exit_with_sig handler setup.
+ */
+ p->p_exit_data = sig;
+}
+
+void
+lx_setbrand(proc_t *p)
+{
+ kthread_t *t = p->p_tlist;
+ int err;
+
+ ASSERT(p->p_brand_data == NULL);
+ ASSERT(ttolxlwp(curthread) == NULL);
+
+ p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP);
+ ptolxproc(p)->l_signal = stol_signo[SIGCHLD];
+
+ /*
+ * This routine can only be called for single-threaded processes.
+ * Since lx_initlwp() can only fail if we run out of PIDs for
+ * multithreaded processes, we know that this can never fail.
+ */
+ err = lx_initlwp(t->t_lwp);
+ ASSERT(err == 0);
+}
+
+/* ARGSUSED */
+int
+lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+ boolean_t val;
+ char vers[LX_VERS_MAX];
+
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (bufsize > sizeof (boolean_t))
+ return (ERANGE);
+ if (copyin(buf, &val, sizeof (val)) != 0)
+ return (EFAULT);
+ if (val != B_TRUE && val != B_FALSE)
+ return (EINVAL);
+ zone->zone_restart_init = val;
+ return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (bufsize > (LX_VERS_MAX - 1))
+ return (ERANGE);
+ bzero(vers, LX_VERS_MAX);
+ if (copyin(buf, &vers, bufsize) != 0)
+ return (EFAULT);
+ lx_set_kern_version(zone, vers);
+ return (0);
+ }
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (*bufsize < sizeof (boolean_t))
+ return (ERANGE);
+ if (copyout(&zone->zone_restart_init, buf,
+ sizeof (boolean_t)) != 0)
+ return (EFAULT);
+ *bufsize = sizeof (boolean_t);
+ return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (*bufsize < LX_VERS_MAX)
+ return (ERANGE);
+ if (copyout(lx_get_zone_kern_version(curzone), buf,
+ LX_VERS_MAX) != 0)
+ return (EFAULT);
+ *bufsize = LX_VERS_MAX;
+ return (0);
+ }
+ return (-EINVAL);
+}
+
+/*
+ * Enable ptrace system call tracing for the given LWP. This is done by
+ * both setting the flag in that LWP's brand data (in the kernel) and setting
+ * the process-wide trace flag (in the brand library of the traced process).
+ */
+static int
+lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
+{
+ proc_t *p;
+ kthread_t *t;
+ klwp_t *lwp;
+ lx_proc_data_t *lpdp;
+ lx_lwp_data_t *lldp;
+ uintptr_t addr;
+ int ret, flag = 1;
+
+ if ((p = sprlock(pid)) == NULL)
+ return (ESRCH);
+
+ if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
+ sprunlock(p);
+ return (EPERM);
+ }
+
+ if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if ((lpdp = p->p_brand_data) == NULL ||
+ (lldp = lwp->lwp_brand) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if (set) {
+ /*
+ * Enable the ptrace flag for this LWP and this process. Note
+ * that we will turn off the LWP's ptrace flag, but we don't
+ * turn off the process's ptrace flag.
+ */
+ lldp->br_ptrace = 1;
+ lpdp->l_ptrace = 1;
+
+ addr = lpdp->l_traceflag;
+
+ mutex_exit(&p->p_lock);
+
+ /*
+ * This can fail only in some rare corner cases where the
+ * process is exiting or we're completely out of memory. In
+ * these cases, it's sufficient to return an error to the ptrace
+ * consumer and leave the process-wide flag set.
+ */
+ ret = uwrite(p, &flag, sizeof (flag), addr);
+
+ mutex_enter(&p->p_lock);
+
+ /*
+ * If we couldn't set the trace flag, unset the LWP's ptrace
+ * flag as there ptrace consumer won't expect this LWP to stop.
+ */
+ if (ret != 0)
+ lldp->br_ptrace = 0;
+ } else {
+ lldp->br_ptrace = 0;
+ ret = 0;
+ }
+
+ sprunlock(p);
+
+ if (ret != 0)
+ ret = EIO;
+
+ return (ret);
+}
+
+static void
+lx_ptrace_fire(void)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ lx_lwp_data_t *lldp = lwp->lwp_brand;
+
+ /*
+ * The ptrace flag only applies until the next event is encountered
+ * for the given LWP. If it's set, turn off the flag and poke the
+ * controlling process by raising a signal.
+ */
+ if (lldp->br_ptrace) {
+ lldp->br_ptrace = 0;
+ tsignal(t, SIGTRAP);
+ }
+}
+
+/*
+ * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME
+ * but return an event in the second byte of si_status.
+ */
+static int
+lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval)
+{
+ proc_t *p;
+ lx_proc_data_t *lpdp;
+ uint_t ret;
+
+ if (cmd != B_PTRACE_EXT_OPTS_SET && cmd != B_PTRACE_EXT_OPTS_GET &&
+ cmd != B_PTRACE_EXT_OPTS_EVT)
+ return (set_errno(EINVAL));
+
+ if ((p = sprlock(pid)) == NULL)
+ return (ESRCH);
+
+ if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
+ sprunlock(p);
+ return (EPERM);
+ }
+
+ if ((lpdp = p->p_brand_data) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if (cmd == B_PTRACE_EXT_OPTS_SET) {
+ lpdp->l_ptrace_opts = (uint_t)val;
+
+ } else if (cmd == B_PTRACE_EXT_OPTS_GET) {
+ ret = lpdp->l_ptrace_opts;
+
+ } else /* B_PTRACE_EXT_OPTS_EVT */ {
+ ret = lpdp->l_ptrace_event;
+ lpdp->l_ptrace_event = 0;
+ }
+
+ sprunlock(p);
+
+ if (cmd != B_PTRACE_EXT_OPTS_SET) {
+ if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0)
+ return (EFAULT);
+ }
+
+ *rval = 0;
+ return (0);
+}
+
+/*
+ * Used to support Linux PTRACE_SETOPTIONS handling and similar to
+ * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and
+ * setup the event reason so the emulation can pull this out when someone
+ * 'waits' on this process.
+ */
+static void
+lx_ptrace_stop_for_option(int option)
+{
+ proc_t *p = ttoproc(curthread);
+ sigqueue_t *sqp;
+ lx_proc_data_t *lpdp;
+
+ if ((lpdp = p->p_brand_data) == NULL) {
+ /* this should never happen but just to be safe */
+ return;
+ }
+
+ /* Track the event as the reason for stopping */
+ switch (option) {
+ case LX_PTRACE_O_TRACEFORK:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORK:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK;
+ break;
+ case LX_PTRACE_O_TRACECLONE:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE;
+ break;
+ case LX_PTRACE_O_TRACEEXEC:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC;
+ break;
+ case LX_PTRACE_O_TRACEVFORKDONE:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
+ break;
+ case LX_PTRACE_O_TRACEEXIT:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT;
+ break;
+ case LX_PTRACE_O_TRACESECCOMP:
+ lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP;
+ break;
+ }
+
+ /* Post the required signal to ourselves so that we stop. */
+ psignal(p, SIGTRAP);
+
+ /*
+ * Since we're stopping, we need to post the SIGCHLD to the parent.
+ * The code in sigcld expects the following two process values to be
+ * setup specifically before it can send the signal, so do that here.
+ */
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ mutex_enter(&pidlock);
+ p->p_wdata = SIGTRAP;
+ p->p_wcode = CLD_STOPPED;
+ sigcld(p, sqp);
+ mutex_exit(&pidlock);
+}
+
+/*
+ * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec().
+ * This will only be called if ptrace is enabled -- and we only generate the
+ * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set.
+ */
+void
+lx_ptrace_exectrap(proc_t *p)
+{
+ lx_proc_data_t *lpdp;
+
+ if ((lpdp = p->p_brand_data) == NULL ||
+ !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) {
+ psignal(p, SIGTRAP);
+ }
+}
+
+uint32_t
+lx_map32limit(proc_t *p)
+{
+ /*
+ * To be bug-for-bug compatible with Linux, we have MAP_32BIT only
+ * allow mappings in the first 31 bits. This was a nuance in the
+ * original Linux implementation circa 2002, and applications have
+ * come to depend on its behavior.
+ *
+ * This is only relevant for 64-bit processes.
+ */
+ if (p->p_model == DATAMODEL_LP64)
+ return (1 << 31);
+
+ return ((uint32_t)USERLIMIT32);
+}
+
+void
+lx_brand_systrace_enable(void)
+{
+ extern void lx_brand_int80_enable(void);
+
+ ASSERT(!lx_systrace_enabled);
+
+#if defined(__amd64)
+ /* enable the trace points for both 32-bit and 64-bit lx calls */
+ extern void lx_brand_syscall_enable(void);
+ lx_brand_syscall_enable();
+ lx_brand_int80_enable();
+#else
+ lx_brand_int80_enable();
+#endif
+
+ lx_systrace_enabled = 1;
+}
+
+void
+lx_brand_systrace_disable(void)
+{
+ extern void lx_brand_int80_disable(void);
+
+ ASSERT(lx_systrace_enabled);
+
+#if defined(__amd64)
+ /* disable the trace points for both 32-bit and 64-bit lx calls */
+ extern void lx_brand_syscall_disable(void);
+ lx_brand_syscall_disable();
+ lx_brand_int80_disable();
+#else
+ lx_brand_int80_disable();
+#endif
+
+ lx_systrace_enabled = 0;
+}
+
+/*
+ * Posting a signal to a proc/thread, switch to native syscall mode.
+ * See the comment on lwp_segregs_save() for how we handle the user-land
+ * registers when we come into the kernel and see update_sregs() for how we
+ * restore.
+ */
+/*ARGSUSED*/
+static void
+lx_psig_to_proc(proc_t *p, kthread_t *t, int sig)
+{
+#if defined(__amd64)
+ lx_lwp_data_t *lwpd = ttolxlwp(t);
+ klwp_t *lwp = ttolwp(t);
+ pcb_t *pcb;
+ model_t datamodel;
+
+ datamodel = lwp_getdatamodel(lwp);
+ if (datamodel != DATAMODEL_NATIVE)
+ return;
+
+ pcb = &lwp->lwp_pcb;
+
+#ifdef DEBUG
+ /*
+ * Debug check to see if we have the correct fsbase.
+ *
+ * Note that it is not guaranteed that our %fsbase is loaded (i.e.
+ * rdmsr(MSR_AMD_FSBASE) won't necessarily return our expected fsbase)
+ * when this function runs. While it is usually loaded, it's possible
+ * to be in this function via the following sequence:
+ * we go off-cpu in the kernel
+ * another process runs in user-land and its fsbase gets loaded
+ * we go on-cpu to run and post a signal, but since we haven't run
+ * in user-land yet, our fsbase has not yet been loaded by
+ * update_sregs.
+ */
+ if (lwpd->br_ntv_syscall == 0 && lwpd->br_lx_fsbase != 0) {
+ /* should have Linux fsbase */
+ if (lwpd->br_lx_fsbase != pcb->pcb_fsbase) {
+ DTRACE_PROBE2(brand__lx__psig__lx__pcb,
+ uintptr_t, lwpd->br_lx_fsbase,
+ uintptr_t, pcb->pcb_fsbase);
+ }
+
+ }
+
+ if (lwpd->br_ntv_syscall == 1 && lwpd->br_ntv_fsbase != 0) {
+ /* should have Illumos fsbase */
+ if (lwpd->br_ntv_fsbase != pcb->pcb_fsbase) {
+ DTRACE_PROBE2(brand__lx__psig__ntv__pcb,
+ uintptr_t, lwpd->br_ntv_fsbase,
+ uintptr_t, pcb->pcb_fsbase);
+ }
+ }
+#endif
+
+ /* We "push" the current syscall mode flag on the "stack". */
+ ASSERT(lwpd->br_ntv_syscall == 0 || lwpd->br_ntv_syscall == 1);
+ lwpd->br_scms = (lwpd->br_scms << 1) | lwpd->br_ntv_syscall;
+
+ if (lwpd->br_ntv_syscall == 0 && lwpd->br_ntv_fsbase != 0) {
+ /*
+ * We were executing in Linux code but now that we're handling
+ * a signal we have to make sure we have the native fsbase
+ * loaded. Also update pcb so that if we service an interrupt
+ * we will restore the correct fsbase in update_sregs().
+ * Because of the amd64 guard and datamodel check, this
+ * obviously will only happen for the 64-bit user-land.
+ *
+ * There is a non-obvious side-effect here. Since the fsbase
+ * will now be the native value, when we bounce out to
+ * user-land the ucontext will capture the native value, even
+ * though we need to restore the Linux value when we return
+ * from the signal. This is handled by the B_SIGNAL_RETURN
+ * code in lx_brandsys().
+ */
+ pcb->pcb_fsbase = lwpd->br_ntv_fsbase;
+
+ /* Ensure that we go out via update_sregs */
+ pcb->pcb_rupdate = 1;
+ }
+ lwpd->br_ntv_syscall = 1;
+#endif
+}
+
+void
+lx_init_brand_data(zone_t *zone)
+{
+ lx_zone_data_t *data;
+ ASSERT(zone->zone_brand == &lx_brand);
+ ASSERT(zone->zone_brand_data == NULL);
+ data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP);
+ /*
+ * Set the default lxzd_kernel_version to 2.4.
+ * This can be changed by a call to setattr() during zone boot.
+ */
+ (void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX);
+ data->lxzd_max_syscall = LX_NSYSCALLS;
+ zone->zone_brand_data = data;
+}
+
+void
+lx_free_brand_data(zone_t *zone)
+{
+ kmem_free(zone->zone_brand_data, sizeof (lx_zone_data_t));
+}
+
+void
+lx_unsupported(char *dmsg)
+{
+ DTRACE_PROBE1(brand__lx__unsupported, char *, dmsg);
+}
+
+/*
+ * Get the addresses of the user-space system call handler and attach it to
+ * the proc structure. Returning 0 indicates success; the value returned
+ * by the system call is the value stored in rval. Returning a non-zero
+ * value indicates a failure; the value returned is used to set errno, -1
+ * is returned from the syscall and the contents of rval are ignored. To
+ * set errno and have the syscall return a value other than -1 we can
+ * manually set errno and rval and return 0.
+ */
+int
+lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+ kthread_t *t = curthread;
+ proc_t *p = ttoproc(t);
+ lx_proc_data_t *pd;
+ int ike_call;
+ struct termios *termios;
+ uint_t termios_len;
+ int error;
+ lx_brand_registration_t reg;
+ lx_lwp_data_t *lwpd;
+
+ /*
+ * There is one operation that is suppored for non-branded
+ * process. B_EXEC_BRAND. This is the equilivant of an
+ * exec call, but the new process that is created will be
+ * a branded process.
+ */
+ if (cmd == B_EXEC_BRAND) {
+ ASSERT(p->p_zone != NULL);
+ ASSERT(p->p_zone->zone_brand == &lx_brand);
+ return (exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_BRAND));
+ }
+
+ /* For all other operations this must be a branded process. */
+ if (p->p_brand == NULL)
+ return (set_errno(ENOSYS));
+
+ ASSERT(p->p_brand == &lx_brand);
+ ASSERT(p->p_brand_data != NULL);
+
+ switch (cmd) {
+ case B_REGISTER:
+ if (p->p_model == DATAMODEL_NATIVE) {
+ if (copyin((void *)arg1, &reg, sizeof (reg)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+ }
+#ifdef _LP64
+ else {
+ /* 32-bit userland on 64-bit kernel */
+ lx_brand_registration32_t reg32;
+
+ if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+
+ reg.lxbr_version = (uint_t)reg32.lxbr_version;
+ reg.lxbr_handler =
+ (void *)(uintptr_t)reg32.lxbr_handler;
+ reg.lxbr_tracehandler =
+ (void *)(uintptr_t)reg32.lxbr_tracehandler;
+ reg.lxbr_traceflag =
+ (void *)(uintptr_t)reg32.lxbr_traceflag;
+ }
+#endif
+
+ if (reg.lxbr_version != LX_VERSION_1) {
+ lx_print("Invalid brand library version (%u)\n",
+ reg.lxbr_version);
+ return (EINVAL);
+ }
+
+ lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
+ (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p);
+ pd = p->p_brand_data;
+ pd->l_handler = (uintptr_t)reg.lxbr_handler;
+ pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler;
+ pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag;
+
+#if defined(__amd64)
+ /*
+ * When we register, start with native syscalls enabled so that
+ * lx_init can finish initialization before switch to Linux
+ * syscall mode. Also initialize the syscall mode "stack" to
+ * native. We push/pop bits into this "stack" during signal
+ * handling.
+ */
+ lwpd = ttolxlwp(t);
+ lwpd->br_ntv_syscall = 1;
+ lwpd->br_scms = 1;
+#endif
+
+ *rval = 0;
+ return (0);
+ case B_TTYMODES:
+ /* This is necessary for emulating TCGETS ioctls. */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios,
+ &termios_len) != DDI_SUCCESS)
+ return (EIO);
+
+ ASSERT(termios_len == sizeof (*termios));
+
+ if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) {
+ ddi_prop_free(termios);
+ return (EFAULT);
+ }
+
+ ddi_prop_free(termios);
+ *rval = 0;
+ return (0);
+
+ case B_ELFDATA:
+ pd = curproc->p_brand_data;
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyout(&pd->l_elf_data, (void *)arg1,
+ sizeof (lx_elf_data_t)) != 0) {
+ (void) set_errno(EFAULT);
+ return (*rval = -1);
+ }
+ }
+#if defined(_LP64)
+ else {
+ /* 32-bit userland on 64-bit kernel */
+ lx_elf_data32_t led32;
+
+ led32.ed_phdr = (int)pd->l_elf_data.ed_phdr;
+ led32.ed_phent = (int)pd->l_elf_data.ed_phent;
+ led32.ed_phnum = (int)pd->l_elf_data.ed_phnum;
+ led32.ed_entry = (int)pd->l_elf_data.ed_entry;
+ led32.ed_base = (int)pd->l_elf_data.ed_base;
+ led32.ed_ldentry = (int)pd->l_elf_data.ed_ldentry;
+
+ if (copyout(&led32, (void *)arg1,
+ sizeof (led32)) != 0) {
+ (void) set_errno(EFAULT);
+ return (*rval = -1);
+ }
+ }
+#endif
+ *rval = 0;
+ return (0);
+
+ case B_EXEC_NATIVE:
+ error = exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_NATIVE);
+ if (error) {
+ (void) set_errno(error);
+ return (*rval = -1);
+ }
+ return (*rval = 0);
+
+ /*
+ * The B_TRUSS_POINT subcommand is used so that we can make a no-op
+ * syscall for debugging purposes (dtracing) from within the user-level
+ * emulation.
+ */
+ case B_TRUSS_POINT:
+ *rval = 0;
+ return (0);
+
+ case B_LPID_TO_SPAIR:
+ /*
+ * Given a Linux pid as arg1, return the Solaris pid in arg2 and
+ * the Solaris LWP in arg3. We also translate pid 1 (which is
+ * hardcoded in many applications) to the zone's init process.
+ */
+ {
+ pid_t s_pid;
+ id_t s_tid;
+
+ if ((pid_t)arg1 == 1) {
+ s_pid = p->p_zone->zone_proc_initpid;
+ /* handle the dead/missing init(1M) case */
+ if (s_pid == -1)
+ s_pid = 1;
+ s_tid = 1;
+ } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid,
+ &s_tid) < 0)
+ return (ESRCH);
+
+ if (copyout(&s_pid, (void *)arg2,
+ sizeof (s_pid)) != 0 ||
+ copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0)
+ return (EFAULT);
+
+ *rval = 0;
+ return (0);
+ }
+
+ case B_PTRACE_SYSCALL:
+ *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
+ (int)arg3);
+ return (0);
+
+ case B_SYSENTRY:
+ if (lx_systrace_enabled) {
+ ASSERT(lx_systrace_entry_ptr != NULL);
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ uintptr_t a[6];
+
+ if (copyin((void *)arg2, a, sizeof (a)) != 0)
+ return (EFAULT);
+
+ (*lx_systrace_entry_ptr)(arg1, a[0], a[1],
+ a[2], a[3], a[4], a[5]);
+ }
+#if defined(_LP64)
+ else {
+ /* 32-bit userland on 64-bit kernel */
+ uint32_t a[6];
+
+ if (copyin((void *)arg2, a, sizeof (a)) != 0)
+ return (EFAULT);
+
+ (*lx_systrace_entry_ptr)(arg1, a[0], a[1],
+ a[2], a[3], a[4], a[5]);
+ }
+#endif
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SYSRETURN:
+ if (lx_systrace_enabled) {
+ ASSERT(lx_systrace_return_ptr != NULL);
+
+ (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SET_AFFINITY_MASK:
+ case B_GET_AFFINITY_MASK:
+ /*
+ * Retrieve or store the CPU affinity mask for the
+ * requested linux pid.
+ *
+ * arg1 is a linux PID (0 means curthread).
+ * arg2 is the size of the given mask.
+ * arg3 is the address of the affinity mask.
+ */
+ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
+
+ case B_PTRACE_EXT_OPTS:
+ /*
+ * Set or get the ptrace extended options or get the event
+ * reason for the stop.
+ */
+ return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval));
+
+ case B_PTRACE_STOP_FOR_OPT:
+ lx_ptrace_stop_for_option((int)arg1);
+ return (0);
+
+ case B_UNSUPPORTED:
+ {
+ char dmsg[256];
+
+ if (copyin((void *)arg1, &dmsg, sizeof (dmsg)) != 0) {
+ lx_print("Failed to copyin unsupported msg "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+ dmsg[255] = '\0';
+ lx_unsupported(dmsg);
+ }
+
+ return (0);
+
+ case B_STORE_ARGS:
+ /*
+ * B_STORE_ARGS subcommand
+ * arg1 = address of struct to be copied in
+ * arg2 = size of the struct being copied in
+ * arg3-arg6 ignored
+ * rval = the amount of data copied.
+ */
+ {
+ int err;
+ void *buf;
+
+ lwpd = ttolxlwp(curthread);
+ /* only have upper limit because arg2 is unsigned */
+ if (arg2 > LX_BR_ARGS_SIZE_MAX) {
+ return (EINVAL);
+ }
+
+ buf = kmem_alloc(arg2, KM_SLEEP);
+ if ((err = copyin((void *)arg1, buf, arg2)) != 0) {
+ lx_print("Failed to copyin scall arg at 0x%p\n",
+ (void *) arg1);
+ kmem_free(buf, arg2);
+ /*
+ * Purposely not setting br_scall_args to NULL
+ * to preserve data for debugging.
+ */
+ return (EFAULT);
+ }
+
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args,
+ lwpd->br_args_size);
+ }
+
+ lwpd->br_scall_args = buf;
+ lwpd->br_args_size = arg2;
+ *rval = arg2;
+ return (0);
+ }
+
+ case B_CLR_NTV_SYSC_FLAG:
+#if defined(__amd64)
+ lwpd = ttolxlwp(curthread);
+ lwpd->br_ntv_syscall = 0;
+
+ /*
+ * If Linux fsbase has been set, restore it. The user-level
+ * code only ever calls this in the 64-bit library.
+ *
+ * Note that it is not guaranteed that our %fsbase is loaded
+ * (i.e. rdmsr(MSR_AMD_FSBASE) won't necessarily return our
+ * expected fsbase) when this block runs. While it is usually
+ * loaded, it's possible to be in this function via the
+ * following sequence:
+ * we make the brandsys syscall and go off-cpu on entering
+ * the kernel
+ * another process runs in user-land and its fsbase gets
+ * loaded
+ * we go on-cpu to finish the syscall but since we haven't
+ * run again in user-land yet, our fsbase has not yet been
+ * reloaded by update_sregs
+ */
+ if (lwpd->br_lx_fsbase != 0) {
+ klwp_t *lwp = ttolwp(t);
+ pcb_t *pcb = &lwp->lwp_pcb;
+
+ pcb->pcb_fsbase = lwpd->br_lx_fsbase;
+
+ /* Ensure that we go out via update_sregs */
+ pcb->pcb_rupdate = 1;
+ }
+#endif
+ return (0);
+
+ case B_SIGNAL_RETURN:
+#if defined(__amd64)
+ /*
+ * Set the syscall mode and do the setcontext syscall. The
+ * user-level code only ever calls this in the 64-bit library.
+ *
+ * We get the previous syscall mode off of the br_scms "stack".
+ * That is a sequence of syscall mode flag bits we've pushed
+ * into that int as we took signals.
+ * arg1 = ucontext_t pointer
+ */
+ lwpd = ttolxlwp(curthread);
+
+ lwpd->br_ntv_syscall = lwpd->br_scms & 0x1;
+ /* "pop" this value from the "stack" */
+ lwpd->br_scms >>= 1;
+
+ /*
+ * If setting the mode to lx, make sure we fix up the context
+ * so that we load the lx fsbase when we return to the Linux
+ * code. For the native case, the context already has the
+ * correct native fsbase so we don't need to do anything here.
+ * Note that setgregs updates the pcb and in update_sregs we
+ * wrmsr the correct fsbase when we return to user-level.
+ * getsetcontext -> restorecontext -> setgregs
+ */
+ if (lwpd->br_ntv_syscall == 0 && lwpd->br_lx_fsbase != 0 &&
+ arg1 != NULL) {
+ /*
+ * Linux fsbase has been initialized, restore it.
+ * We have to copyin to modify since the user-level
+ * emulation doesn't have a copy of the lx fsbase or
+ * know that we are returning to Linux code.
+ */
+ ucontext_t uc;
+ klwp_t *lwp = ttolwp(t);
+ pcb_t *pcb = &lwp->lwp_pcb;
+
+ if (copyin((void *)arg1, &uc, sizeof (ucontext_t) -
+ sizeof (uc.uc_filler) -
+ sizeof (uc.uc_mcontext.fpregs)))
+ return (set_errno(EFAULT));
+
+ uc.uc_mcontext.gregs[REG_FSBASE] = lwpd->br_lx_fsbase;
+
+ if (copyout(&uc, (void *)arg1, sizeof (ucontext_t) -
+ sizeof (uc.uc_filler) -
+ sizeof (uc.uc_mcontext.fpregs)))
+ return (set_errno(EFAULT));
+
+ /* Ensure that we go out via update_sregs */
+ pcb->pcb_rupdate = 1;
+ }
+#endif /* amd64 */
+ return (getsetcontext(SETCONTEXT, (void *)arg1));
+
+ case B_UNWIND_NTV_SYSC_FLAG:
+#if defined(__amd64)
+ /*
+ * Used when exiting to support the setcontext back to the
+ * getcontext we performed in lx_init. We need to unwin
+ * whatever signal state is in br_scms since we are exiting.
+ * This sets us up for the B_SIGNAL_RETURN from lx_setcontext.
+ */
+ lwpd = ttolxlwp(curthread);
+ lwpd->br_scms = 1;
+#endif
+ return (0);
+
+ case B_EXIT_AS_SIG:
+ exit(CLD_KILLED, (int)arg1);
+ /* NOTREACHED */
+ break;
+
+ case B_IKE_SYSCALL:
+ if (arg1 > LX_N_IKE_FUNCS)
+ return (EINVAL);
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ uintptr_t a[6];
+
+ if (copyin((void *)arg2, a, sizeof (a)) != 0)
+ return (EFAULT);
+
+ *rval = lx_emulate_syscall(arg1, a[0], a[1],
+ a[2], a[3], a[4], a[5]);
+#if defined(_LP64)
+ } else {
+ /* 32-bit userland on 64-bit kernel */
+ uint32_t a[6];
+
+ if (copyin((void *)arg2, a, sizeof (a)) != 0)
+ return (EFAULT);
+
+ *rval = lx_emulate_syscall(arg1, a[0], a[1],
+ a[2], a[3], a[4], a[5]);
+#endif
+ }
+
+ return (0);
+
+ default:
+ ike_call = cmd - B_IKE_SYSCALL;
+ if (ike_call > 0 && ike_call <= LX_N_IKE_FUNCS) {
+ *rval = lx_emulate_syscall(ike_call, arg1, arg2,
+ arg3, arg4, arg5, 0xbadbeef);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+char *
+lx_get_zone_kern_version(zone_t *zone)
+{
+ return (((lx_zone_data_t *)zone->zone_brand_data)->lxzd_kernel_version);
+}
+
+void
+lx_set_kern_version(zone_t *zone, char *vers)
+{
+ lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data;
+
+ (void) strlcpy(lxzd->lxzd_kernel_version, vers, LX_VERS_MAX);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.
+ */
+void
+lx_copy_procdata(proc_t *child, proc_t *parent)
+{
+ lx_proc_data_t *cpd, *ppd;
+
+ ppd = parent->p_brand_data;
+
+ ASSERT(ppd != NULL);
+
+ cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP);
+ *cpd = *ppd;
+
+ child->p_brand_data = cpd;
+}
+
+#if defined(_LP64)
+static void
+Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
+{
+ bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
+ dst->e_type = src->e_type;
+ dst->e_machine = src->e_machine;
+ dst->e_version = src->e_version;
+ dst->e_entry = src->e_entry;
+ dst->e_phoff = src->e_phoff;
+ dst->e_shoff = src->e_shoff;
+ dst->e_flags = src->e_flags;
+ dst->e_ehsize = src->e_ehsize;
+ dst->e_phentsize = src->e_phentsize;
+ dst->e_phnum = src->e_phnum;
+ dst->e_shentsize = src->e_shentsize;
+ dst->e_shnum = src->e_shnum;
+ dst->e_shstrndx = src->e_shstrndx;
+}
+#endif /* _LP64 */
+
+static void
+restoreexecenv(struct execenv *ep, stack_t *sp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+
+ setexecenv(ep);
+ lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
+ lwp->lwp_sigaltstack.ss_size = sp->ss_size;
+ lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
+}
+
+extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int);
+
+extern int elf32exec(struct vnode *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int);
+
+/*
+ * Exec routine called by elfexec() to load either 32-bit or 64-bit Linux
+ * binaries.
+ */
+static int
+lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action)
+{
+ int error;
+ vnode_t *nvp;
+ Ehdr ehdr;
+ Addr uphdr_vaddr;
+ intptr_t voffset;
+ char *interp;
+ uintptr_t ldaddr = NULL;
+ int i;
+ proc_t *p = ttoproc(curthread);
+ klwp_t *lwp = ttolwp(curthread);
+ struct execenv env;
+ struct execenv origenv;
+ stack_t orig_sigaltstack;
+ struct user *up = PTOU(ttoproc(curthread));
+ lx_elf_data_t *edp =
+ &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data;
+ char *lib_path = NULL;
+
+ ASSERT(ttoproc(curthread)->p_brand == &lx_brand);
+ ASSERT(ttoproc(curthread)->p_brand_data != NULL);
+
+ if (args->to_model == DATAMODEL_NATIVE) {
+ lib_path = LX_LIB_PATH;
+ }
+#if defined(_LP64)
+ else {
+ lib_path = LX_LIB_PATH32;
+ }
+#endif
+
+ /*
+ * Set the brandname and library name for the new process so that
+ * elfexec() puts them onto the stack.
+ */
+ args->brandname = LX_BRANDNAME;
+ args->emulator = lib_path;
+
+ /*
+ * We will first exec the brand library, then map in the linux
+ * executable and the linux linker.
+ */
+ if ((error = lookupname(lib_path, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp))) {
+ uprintf("%s: not found.", lib_path);
+ return (error);
+ }
+
+ /*
+ * We will eventually set the p_exec member to be the vnode for the new
+ * executable when we call setexecenv(). However, if we get an error
+ * before that call we need to restore the execenv to its original
+ * values so that when we return to the caller fop_close() works
+ * properly while cleaning up from the failed exec(). Restoring the
+ * original value will also properly decrement the 2nd VN_RELE that we
+ * took on the brand library.
+ */
+ origenv.ex_bssbase = p->p_bssbase;
+ origenv.ex_brkbase = p->p_brkbase;
+ origenv.ex_brksize = p->p_brksize;
+ origenv.ex_vp = p->p_exec;
+ orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
+ orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
+ orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
+
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = elfexec(nvp, uap, args, idata, level + 1, execsz,
+ setid, exec_file, cred, brand_action);
+ }
+#if defined(_LP64)
+ else {
+ error = elf32exec(nvp, uap, args, idata, level + 1, execsz,
+ setid, exec_file, cred, brand_action);
+ }
+#endif
+ VN_RELE(nvp);
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ return (error);
+ }
+
+ /*
+ * exec-ed in the brand library above.
+ * The u_auxv vectors are now setup by elfexec to point to the
+ * brand emulation library and its linker.
+ */
+
+ bzero(&env, sizeof (env));
+
+ /*
+ * map in the the Linux executable
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
+ &voffset, exec_file, &interp, &env.ex_bssbase,
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
+ }
+#if defined(_LP64)
+ else {
+ Elf32_Ehdr ehdr32;
+ Elf32_Addr uphdr_vaddr32;
+
+ error = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
+ &voffset, exec_file, &interp, &env.ex_bssbase,
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
+
+ Ehdr32to64(&ehdr32, &ehdr);
+
+ if (uphdr_vaddr32 == (Elf32_Addr)-1)
+ uphdr_vaddr = (Addr)-1;
+ else
+ uphdr_vaddr = uphdr_vaddr32;
+ }
+#endif
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+
+ if (interp != NULL)
+ kmem_free(interp, MAXPATHLEN);
+
+ return (error);
+ }
+
+ /*
+ * Save off the important properties of the lx executable. The brand
+ * library will ask us for this data later, when it is ready to set
+ * things up for the lx executable.
+ */
+ edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff :
+ voffset + uphdr_vaddr;
+ edp->ed_entry = voffset + ehdr.e_entry;
+ edp->ed_phent = ehdr.e_phentsize;
+ edp->ed_phnum = ehdr.e_phnum;
+
+ if (interp != NULL) {
+ if (ehdr.e_type == ET_DYN) {
+ /*
+ * This is a shared object executable, so we need to
+ * pick a reasonable place to put the heap. Just don't
+ * use the first page.
+ */
+ env.ex_brkbase = (caddr_t)PAGESIZE;
+ env.ex_bssbase = (caddr_t)PAGESIZE;
+ }
+
+ /*
+ * If the program needs an interpreter (most do), map it in and
+ * store relevant information about it in the aux vector, where
+ * the brand library can find it.
+ */
+ if ((error = lookupname(interp, UIO_SYSSPACE, FOLLOW,
+ NULLVPP, &nvp))) {
+ uprintf("%s: not found.", interp);
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ kmem_free(interp, MAXPATHLEN);
+ return (error);
+ }
+
+ kmem_free(interp, MAXPATHLEN);
+ interp = NULL;
+
+ /*
+ * map in the Linux linker
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = mapexec_brand(nvp, args, &ehdr,
+ &uphdr_vaddr, &voffset, exec_file, NULL, NULL,
+ NULL, NULL, NULL, &ldaddr);
+ }
+#if defined(_LP64)
+ else {
+ Elf32_Ehdr ehdr32;
+ Elf32_Addr uphdr_vaddr32;
+
+ error = mapexec32_brand(nvp, args, &ehdr32,
+ &uphdr_vaddr32, &voffset, exec_file, NULL, NULL,
+ NULL, NULL, NULL, &ldaddr);
+
+ Ehdr32to64(&ehdr32, &ehdr);
+
+ if (uphdr_vaddr32 == (Elf32_Addr)-1)
+ uphdr_vaddr = (Addr)-1;
+ else
+ uphdr_vaddr = uphdr_vaddr32;
+ }
+#endif
+
+ VN_RELE(nvp);
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ return (error);
+ }
+
+ /*
+ * Now that we know the base address of the brand's linker,
+ * we also save this for later use by the brand library.
+ */
+ edp->ed_base = voffset;
+ edp->ed_ldentry = voffset + ehdr.e_entry;
+ } else {
+ /*
+ * This program has no interpreter. The lx brand library will
+ * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
+ * so in this case, put the entry point of the main executable
+ * there.
+ */
+ if (ehdr.e_type == ET_EXEC) {
+ /*
+ * An executable with no interpreter, this must be a
+ * statically linked executable, which means we loaded
+ * it at the address specified in the elf header, in
+ * which case the e_entry field of the elf header is an
+ * absolute address.
+ */
+ edp->ed_ldentry = ehdr.e_entry;
+ edp->ed_entry = ehdr.e_entry;
+ } else {
+ /*
+ * A shared object with no interpreter, we use the
+ * calculated address from above.
+ */
+ edp->ed_ldentry = edp->ed_entry;
+
+ /*
+ * In all situations except an ET_DYN elf object with no
+ * interpreter, we want to leave the brk and base
+ * values set by mapexec_brand alone. Normally when
+ * running ET_DYN objects on Solaris (most likely
+ * /lib/ld.so.1) the kernel sets brk and base to 0 since
+ * it doesn't know where to put the heap, and later the
+ * linker will call brk() to initialize the heap in:
+ * usr/src/cmd/sgs/rtld/common/setup.c:setup()
+ * after it has determined where to put it. (This
+ * decision is made after the linker loads and inspects
+ * elf properties of the target executable being run.)
+ *
+ * So for ET_DYN Linux executables, we also don't know
+ * where the heap should go, so we'll set the brk and
+ * base to 0. But in this case the Solaris linker will
+ * not initialize the heap, so when the Linux linker
+ * starts running there is no heap allocated. This
+ * seems to be ok on Linux 2.4 based systems because the
+ * Linux linker/libc fall back to using mmap() to
+ * allocate memory. But on 2.6 systems, running
+ * applications by specifying them as command line
+ * arguments to the linker results in segfaults for an
+ * as yet undetermined reason (which seems to indicatej
+ * that a more permanent fix for heap initalization in
+ * these cases may be necessary).
+ */
+ if (ehdr.e_type == ET_DYN) {
+ env.ex_bssbase = (caddr_t)0;
+ env.ex_brkbase = (caddr_t)0;
+ env.ex_brksize = 0;
+ }
+ }
+
+ }
+
+ env.ex_vp = vp;
+ setexecenv(&env);
+
+ /*
+ * We try to keep /proc's view of the aux vector consistent with
+ * what's on the process stack.
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ auxv_t phdr_auxv[4] = {
+ { AT_SUN_BRAND_LX_PHDR, 0 },
+ { AT_SUN_BRAND_LX_INTERP, 0 },
+ { AT_SUN_BRAND_LX_SYSINFO_EHDR, 0 },
+ { AT_SUN_BRAND_AUX4, 0 }
+ };
+ phdr_auxv[0].a_un.a_val = edp->ed_phdr;
+ phdr_auxv[1].a_un.a_val = ldaddr;
+ phdr_auxv[2].a_un.a_val = 1; /* set in lx_init */
+ phdr_auxv[3].a_type = AT_CLKTCK;
+ phdr_auxv[3].a_un.a_val = hz;
+
+ if (copyout(&phdr_auxv, args->auxp_brand,
+ sizeof (phdr_auxv)) == -1)
+ return (EFAULT);
+ }
+#if defined(_LP64)
+ else {
+ auxv32_t phdr_auxv32[3] = {
+ { AT_SUN_BRAND_LX_PHDR, 0 },
+ { AT_SUN_BRAND_LX_INTERP, 0 },
+ { AT_SUN_BRAND_AUX3, 0 }
+ };
+ phdr_auxv32[0].a_un.a_val = edp->ed_phdr;
+ phdr_auxv32[1].a_un.a_val = ldaddr;
+ phdr_auxv32[2].a_type = AT_CLKTCK;
+ phdr_auxv32[2].a_un.a_val = hz;
+
+ if (copyout(&phdr_auxv32, args->auxp_brand,
+ sizeof (phdr_auxv32)) == -1)
+ return (EFAULT);
+ }
+#endif
+
+ /*
+ * /proc uses the AT_ENTRY aux vector entry to deduce
+ * the location of the executable in the address space. The user
+ * structure contains a copy of the aux vector that needs to have those
+ * entries patched with the values of the real lx executable (they
+ * currently contain the values from the lx brand library that was
+ * elfexec'd, above).
+ *
+ * For live processes, AT_BASE is used to locate the linker segment,
+ * which /proc and friends will later use to find Solaris symbols
+ * (such as rtld_db_preinit). However, for core files, /proc uses
+ * AT_ENTRY to find the right segment to label as the executable.
+ * So we set AT_ENTRY to be the entry point of the linux executable,
+ * but leave AT_BASE to be the address of the Solaris linker.
+ */
+ for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+ switch (up->u_auxv[i].a_type) {
+ case AT_ENTRY:
+ up->u_auxv[i].a_un.a_val = edp->ed_entry;
+ break;
+
+ case AT_SUN_BRAND_LX_PHDR:
+ up->u_auxv[i].a_un.a_val = edp->ed_phdr;
+ break;
+
+ case AT_SUN_BRAND_LX_INTERP:
+ up->u_auxv[i].a_un.a_val = ldaddr;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (0);
+}
+
+boolean_t
+lx_native_exec(uint8_t osabi, const char **interp)
+{
+ if (osabi != ELFOSABI_SOLARIS)
+ return (B_FALSE);
+
+ *interp = "/native";
+ return (B_TRUE);
+}
+
+int
+_init(void)
+{
+ int err = 0;
+
+ /* pid/tid conversion hash tables */
+ lx_pid_init();
+
+ /* for lx_ioctl() */
+ lx_ioctl_init();
+
+ /* for lx_futex() */
+ lx_futex_init();
+
+
+ err = mod_install(&modlinkage);
+ if (err != 0) {
+ cmn_err(CE_WARN, "Couldn't install lx brand module");
+
+ /*
+ * This looks drastic, but it should never happen. These
+ * two data structures should be completely free-able until
+ * they are used by Linux processes. Since the brand
+ * wasn't loaded there should be no Linux processes, and
+ * thus no way for these data structures to be modified.
+ */
+ lx_pid_fini();
+ lx_ioctl_fini();
+ if (lx_futex_fini())
+ panic("lx brand module cannot be loaded or unloaded.");
+ }
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+ int futex_done = 0;
+
+ /*
+ * If there are any zones using this brand, we can't allow it to be
+ * unloaded.
+ */
+ if (brand_zone_count(&lx_brand))
+ return (EBUSY);
+
+ lx_pid_fini();
+ lx_ioctl_fini();
+
+ if ((err = lx_futex_fini()) != 0)
+ goto done;
+ futex_done = 1;
+
+ err = mod_remove(&modlinkage);
+
+done:
+ if (err) {
+ /*
+ * If we can't unload the module, then we have to get it
+ * back into a sane state.
+ */
+ lx_pid_init();
+
+ if (futex_done)
+ lx_futex_init();
+
+ }
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
new file mode 100644
index 0000000000..c775a97505
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -0,0 +1,549 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/privregs.h>
+#include <sys/exec.h>
+#include <sys/lwp.h>
+#include <sys/sem.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+#include <sys/cmn_err.h>
+#include <sys/siginfo.h>
+#include <sys/contract/process_impl.h>
+#include <sys/x86_archext.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <lx_syscall.h>
+#include <sys/proc.h>
+
+/* Linux specific functions and definitions */
+void lx_setrval(klwp_t *, int, int);
+void lx_exec();
+int lx_initlwp(klwp_t *);
+void lx_forklwp(klwp_t *, klwp_t *);
+void lx_exitlwp(klwp_t *);
+void lx_freelwp(klwp_t *);
+static void lx_save(klwp_t *);
+static void lx_restore(klwp_t *);
+extern void lx_ptrace_free(proc_t *);
+
+/*
+ * Set the return code for the forked child, always zero
+ */
+/*ARGSUSED*/
+void
+lx_setrval(klwp_t *lwp, int v1, int v2)
+{
+ lwptoregs(lwp)->r_r0 = 0;
+}
+
+/*
+ * Reset process state on exec(2)
+ */
+void
+lx_exec()
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p = ttoproc(curthread);
+ lx_proc_data_t *pd = p->p_brand_data;
+ int err;
+
+ /*
+ * Any l_handler handlers set as a result of B_REGISTER are now
+ * invalid; clear them.
+ */
+ pd->l_handler = NULL;
+ pd->l_tracehandler = NULL;
+
+ /*
+ * There are two mutually exclusive special cases we need to
+ * address. First, if this was a native process prior to this
+ * exec(), then this lwp won't have its brand-specific data
+ * initialized and it won't be assigned a Linux PID yet. Second,
+ * if this was a multi-threaded Linux process and this lwp wasn't
+ * the main lwp, then we need to make its Solaris and Linux PIDS
+ * match.
+ */
+ if (lwpd == NULL) {
+ err = lx_initlwp(lwp);
+ /*
+ * Only possible failure from this routine should be an
+ * inability to allocate a new PID. Since single-threaded
+ * processes don't need a new PID, we should never hit this
+ * error.
+ */
+ ASSERT(err == 0);
+ lwpd = lwptolxlwp(lwp);
+ } else if (curthread->t_tid != 1) {
+ lx_pid_reassign(curthread);
+ }
+
+ /* clear the fsbase values until the app. can reinitialize them */
+ lwpd->br_lx_fsbase = NULL;
+ lwpd->br_ntv_fsbase = NULL;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
+ NULL);
+
+ /*
+ * clear out the tls array
+ */
+ bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
+
+ /*
+ * reset the tls entries in the gdt
+ */
+ kpreempt_disable();
+ lx_restore(lwp);
+ kpreempt_enable();
+}
+
+void
+lx_exitlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p;
+ kthread_t *t;
+ sigqueue_t *sqp = NULL;
+ pid_t ppid;
+ id_t ptid;
+
+ if (lwpd == NULL)
+ return; /* second time thru' */
+
+ if (lwpd->br_clear_ctidp != NULL) {
+ (void) suword32(lwpd->br_clear_ctidp, 0);
+ (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
+ NULL, NULL, 0);
+ }
+
+ if (lwpd->br_signal != 0) {
+ /*
+ * The first thread in a process doesn't cause a signal to
+ * be sent when it exits. It was created by a fork(), not
+ * a clone(), so the parent should get signalled when the
+ * process exits.
+ */
+ if (lwpd->br_ptid == -1)
+ goto free;
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ /*
+ * If br_ppid is 0, it means this is a CLONE_PARENT thread,
+ * so the signal goes to the parent process - not to a
+ * specific thread in this process.
+ */
+ p = lwptoproc(lwp);
+ if (lwpd->br_ppid == 0) {
+ mutex_enter(&p->p_lock);
+ ppid = p->p_ppid;
+ t = NULL;
+ } else {
+ /*
+ * If we have been reparented to init or if our
+ * parent thread is gone, then nobody gets
+ * signaled.
+ */
+ if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) ||
+ (ptid == -1))
+ goto free;
+
+ mutex_enter(&pidlock);
+ if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ goto free;
+ }
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if ((t = idtot(p, ptid)) == NULL) {
+ mutex_exit(&p->p_lock);
+ goto free;
+ }
+ }
+
+ sqp->sq_info.si_signo = lwpd->br_signal;
+ sqp->sq_info.si_code = lwpd->br_exitwhy;
+ sqp->sq_info.si_status = lwpd->br_exitwhat;
+ sqp->sq_info.si_pid = lwpd->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(p, t, sqp);
+ mutex_exit(&p->p_lock);
+ sqp = NULL;
+ }
+
+free:
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args, lwpd->br_args_size);
+ }
+ if (sqp)
+ kmem_free(sqp, sizeof (sigqueue_t));
+
+ lx_freelwp(lwp);
+}
+
+void
+lx_freelwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+
+ if (lwpd != NULL) {
+ (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
+ NULL, NULL, lx_save, NULL);
+ if (lwpd->br_pid != 0)
+ lx_pid_rele(lwptoproc(lwp)->p_pid,
+ lwptot(lwp)->t_tid);
+
+ lwp->lwp_brand = NULL;
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ }
+}
+
+int
+lx_initlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd;
+ struct lx_lwp_data *plwpd;
+ kthread_t *tp = lwptot(lwp);
+
+ lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
+ lwpd->br_exitwhy = CLD_EXITED;
+ lwpd->br_lwp = lwp;
+ lwpd->br_clear_ctidp = NULL;
+ lwpd->br_set_ctidp = NULL;
+ lwpd->br_signal = 0;
+ lwpd->br_ntv_syscall = 1;
+ lwpd->br_scms = 1;
+
+ /*
+ * lwpd->br_affinitymask was zeroed by kmem_zalloc()
+ * as was lwpd->br_scall_args and lwpd->br_args_size.
+ */
+
+ /*
+ * The first thread in a process has ppid set to the parent
+ * process's pid, and ptid set to -1. Subsequent threads in the
+ * process have their ppid set to the pid of the thread that
+ * created them, and their ptid to that thread's tid.
+ */
+ if (tp->t_next == tp) {
+ lwpd->br_ppid = tp->t_procp->p_ppid;
+ lwpd->br_ptid = -1;
+ } else if (ttolxlwp(curthread) != NULL) {
+ plwpd = ttolxlwp(curthread);
+ bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
+ lwpd->br_ppid = plwpd->br_pid;
+ lwpd->br_ptid = curthread->t_tid;
+ /* The child inherits the 2 fsbase values from the parent */
+ lwpd->br_lx_fsbase = plwpd->br_lx_fsbase;
+ lwpd->br_ntv_fsbase = plwpd->br_ntv_fsbase;
+ } else {
+ /*
+ * Oddball case: the parent thread isn't a Linux process.
+ */
+ lwpd->br_ppid = 0;
+ lwpd->br_ptid = -1;
+ }
+ lwp->lwp_brand = lwpd;
+
+ if (lx_pid_assign(tp)) {
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ lwp->lwp_brand = NULL;
+ return (-1);
+ }
+ lwpd->br_tgid = lwpd->br_pid;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+ lx_save, NULL);
+
+ return (0);
+}
+
+/*
+ * There is no need to have any locking for either the source or
+ * destination struct lx_lwp_data structs. This is always run in the
+ * thread context of the source thread, and the destination thread is
+ * always newly created and not referred to from anywhere else.
+ */
+void
+lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
+{
+ struct lx_lwp_data *src = srclwp->lwp_brand;
+ struct lx_lwp_data *dst = dstlwp->lwp_brand;
+
+ dst->br_ppid = src->br_pid;
+ dst->br_ptid = lwptot(srclwp)->t_tid;
+ bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls));
+
+ /*
+ * copy only these flags
+ */
+ dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
+ dst->br_scall_args = NULL;
+}
+
+/*
+ * When switching a Linux process off the CPU, clear its GDT entries.
+ */
+/* ARGSUSED */
+static void
+lx_save(klwp_t *t)
+{
+ int i;
+
+#if defined(__amd64)
+ reset_sregs();
+#endif
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
+}
+
+/*
+ * When switching a Linux process on the CPU, set its GDT entries.
+ *
+ * For 64-bit code we don't have to worry about explicitly setting the
+ * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
+ * automatically in update_sregs if we are executing in user-land. If this
+ * is the case then pcb_rupdate should be set.
+ */
+static void
+lx_restore(klwp_t *t)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(t);
+ user_desc_t *tls;
+ int i;
+
+ ASSERT(lwpd);
+
+ tls = lwpd->br_tls;
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
+}
+
+void
+lx_set_gdt(int entry, user_desc_t *descrp)
+{
+
+ gdt_update_usegd(entry, descrp);
+}
+
+void
+lx_clear_gdt(int entry)
+{
+ gdt_update_usegd(entry, &null_udesc);
+}
+
+longlong_t
+lx_nosys()
+{
+ return (set_errno(ENOSYS));
+}
+
+longlong_t
+lx_opnotsupp()
+{
+ return (set_errno(EOPNOTSUPP));
+}
+
+/*
+ * Brand-specific routine to check if given non-Solaris standard segment
+ * register values should be modified to other values.
+ */
+/*ARGSUSED*/
+greg_t
+lx_fixsegreg(greg_t sr, model_t datamodel)
+{
+ uint16_t idx = SELTOIDX(sr);
+
+ ASSERT(sr == (sr & 0xffff));
+
+ /*
+ * If the segment selector is a valid TLS selector, just return it.
+ */
+ if (!SELISLDT(sr) && idx >= GDT_TLSMIN && idx <= GDT_TLSMAX)
+ return (sr | SEL_UPL);
+
+ /*
+ * Force the SR into the LDT in ring 3 for 32-bit processes.
+ *
+ * 64-bit processes get the null GDT selector since they are not
+ * allowed to have a private LDT.
+ */
+#if defined(__amd64)
+ return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0);
+#elif defined(__i386)
+ datamodel = datamodel; /* datamodel currently unused for 32-bit */
+ return (sr | SEL_TI_LDT | SEL_UPL);
+#endif /* __amd64 */
+}
+
+/*
+ * These two functions simulate winfo and post_sigcld for the lx brand. The
+ * difference is delivering a designated signal as opposed to always SIGCLD.
+ */
+void
+lx_winfo(proc_t *pp, k_siginfo_t *ip, struct lx_proc_data *dat)
+{
+ ASSERT(MUTEX_HELD(&pidlock));
+ bzero(ip, sizeof (k_siginfo_t));
+ ip->si_signo = ltos_signo[dat->l_signal];
+ ip->si_code = pp->p_wcode;
+ ip->si_pid = pp->p_pid;
+ ip->si_ctid = PRCTID(pp);
+ ip->si_zoneid = pp->p_zone->zone_id;
+ ip->si_status = pp->p_wdata;
+ ip->si_stime = pp->p_stime;
+ ip->si_utime = pp->p_utime;
+}
+
+void
+lx_post_exit_sig(proc_t *cp, sigqueue_t *sqp, struct lx_proc_data *dat)
+{
+ proc_t *pp = cp->p_parent;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+ mutex_enter(&pp->p_lock);
+ /*
+ * Since Linux doesn't queue SIGCHLD, or any other non RT
+ * signals, we just blindly deliver whatever signal we can.
+ */
+ ASSERT(sqp != NULL);
+ lx_winfo(cp, &sqp->sq_info, dat);
+ sigaddqa(pp, NULL, sqp);
+ sqp = NULL;
+ mutex_exit(&pp->p_lock);
+}
+
+
+/*
+ * Brand specific code for exiting and sending a signal to the parent, as
+ * opposed to sigcld().
+ */
+void
+lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
+{
+ proc_t *pp = cp->p_parent;
+ struct lx_proc_data *lx_brand_data = brand_data;
+ ASSERT(MUTEX_HELD(&pidlock));
+
+ switch (cp->p_wcode) {
+ case CLD_EXITED:
+ case CLD_DUMPED:
+ case CLD_KILLED:
+ ASSERT(cp->p_stat == SZOMB);
+ /*
+ * The broadcast on p_srwchan_cv is a kludge to
+ * wakeup a possible thread in uadmin(A_SHUTDOWN).
+ */
+ cv_broadcast(&cp->p_srwchan_cv);
+
+ /*
+ * Add to newstate list of the parent
+ */
+ add_ns(pp, cp);
+
+ cv_broadcast(&pp->p_cv);
+ if ((pp->p_flag & SNOWAIT) ||
+ PTOU(pp)->u_signal[SIGCLD - 1] == SIG_IGN) {
+ if (!(cp->p_pidflag & CLDWAITPID))
+ freeproc(cp);
+ } else if (!(cp->p_pidflag & CLDNOSIGCHLD) &&
+ lx_brand_data->l_signal != 0) {
+ lx_post_exit_sig(cp, sqp, lx_brand_data);
+ sqp = NULL;
+ }
+ break;
+
+ case CLD_STOPPED:
+ case CLD_CONTINUED:
+ case CLD_TRAPPED:
+ panic("Should not be called in this case");
+ }
+
+ if (sqp)
+ siginfofree(sqp);
+}
+
+/*
+ * Filters based on arguments that have been passed in by a separate syscall
+ * using the B_STORE_ARGS mechanism. if the __WALL flag is set, no filter is
+ * applied, otherwise we look at the difference between a clone and non-clone
+ * process.
+ * The definition of a clone process in Linux is a thread that does not deliver
+ * SIGCHLD to its parent. The option __WCLONE indicates to wait only on clone
+ * processes. Without that option, a process should only wait on normal
+ * children. The following table shows the cases.
+ *
+ * default __WCLONE
+ * no SIGCHLD - X
+ * SIGCHLD X -
+ *
+ * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on
+ * process exit. Since (flags & __WCLONE) is not guaranteed to have the
+ * least-significant bit set when the flags is enabled, !! is used to place
+ * that bit into the least significant bit. Then, the bitwise XOR can be
+ * used, because there is no logical XOR in the C language.
+ *
+ * More information on wait in lx brands can be found at
+ * usr/src/lib/brand/lx/lx_brand/common/wait.c.
+ */
+boolean_t
+lx_wait_filter(proc_t *pp, proc_t *cp)
+{
+ int flags;
+ boolean_t ret;
+
+ if (LX_ARGS(waitid) != NULL) {
+ flags = LX_ARGS(waitid)->waitid_flags;
+ mutex_enter(&cp->p_lock);
+ if (flags & LX_WALL) {
+ ret = B_TRUE;
+ } else if (cp->p_stat == SZOMB ||
+ cp->p_brand == &native_brand) {
+ ret = (((!!(flags & LX_WCLONE)) ^
+ (stol_signo[SIGCHLD] == cp->p_exit_data))
+ ? B_TRUE : B_FALSE);
+ } else {
+ ret = (((!!(flags & LX_WCLONE)) ^
+ (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal))
+ ? B_TRUE : B_FALSE);
+ }
+ mutex_exit(&cp->p_lock);
+ return (ret);
+ } else {
+ return (B_TRUE);
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
new file mode 100644
index 0000000000..aa8c751bc2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -0,0 +1,378 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/var.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */
+static int hash_len = 4; /* desired average hash chain length */
+static int hash_size; /* no of buckets in the hash table */
+
+static struct lx_pid **stol_pid_hash;
+static struct lx_pid **ltos_pid_hash;
+
+#define LTOS_HASH(pid) ((pid) & (hash_size - 1))
+#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1))
+
+static kmutex_t hash_lock;
+
+static void
+lx_pid_insert_hash(struct lx_pid *lpidp)
+{
+ int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid);
+ int lhash = LTOS_HASH(lpidp->l_pid);
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ lpidp->stol_next = stol_pid_hash[shash];
+ stol_pid_hash[shash] = lpidp;
+
+ lpidp->ltos_next = ltos_pid_hash[lhash];
+ ltos_pid_hash[lhash] = lpidp;
+}
+
+static struct lx_pid *
+lx_pid_remove_hash(pid_t pid, id_t tid)
+{
+ struct lx_pid **hpp;
+ struct lx_pid *lpidp = NULL;
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ hpp = &stol_pid_hash[STOL_HASH(pid, tid)];
+ while (*hpp) {
+ if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) {
+ lpidp = *hpp;
+ *hpp = (*hpp)->stol_next;
+ break;
+ }
+ hpp = &(*hpp)->stol_next;
+ }
+
+ /*
+ * when called during error recovery the pid may already
+ * be released
+ */
+ if (lpidp == NULL)
+ return (NULL);
+
+ hpp = &ltos_pid_hash[LTOS_HASH(lpidp->l_pid)];
+ while (*hpp) {
+ if (*hpp == lpidp) {
+ *hpp = lpidp->ltos_next;
+ break;
+ }
+ hpp = &(*hpp)->ltos_next;
+ }
+
+ return (lpidp);
+}
+
+struct pid *pid_find(pid_t pid);
+
+/*
+ * given a solaris pid/tid pair, create a linux pid
+ */
+int
+lx_pid_assign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ pid_t s_pid = p->p_pid;
+ id_t s_tid = t->t_tid;
+ struct pid *pidp;
+ struct lx_pid *lpidp;
+ lx_lwp_data_t *lwpd = ttolxlwp(t);
+ pid_t newpid;
+
+ /*
+ * When lx_initlwp is called from lx_setbrand, p_lwpcnt will already be
+ * equal to 1. Since lx_initlwp is being called against an lwp that
+ * already exists, pid_allocate is not necessary.
+ *
+ * We check for this by testing br_ppid == 0.
+ */
+ if (p->p_lwpcnt > 0 && lwpd->br_ppid != 0) {
+ /*
+ * Allocate a pid for any thread other than the first
+ */
+ if ((newpid = pid_allocate(p, 0, 0)) < 0)
+ return (-1);
+
+ pidp = pid_find(newpid);
+ } else {
+ pidp = NULL;
+ newpid = s_pid;
+ }
+
+ lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP);
+ lpidp->l_pid = newpid;
+ lpidp->s_pid = s_pid;
+ lpidp->s_tid = s_tid;
+ lpidp->l_pidp = pidp;
+ lpidp->l_start = t->t_start;
+
+ /*
+ * now put the pid into the linux-solaris and solaris-linux
+ * conversion hash tables
+ */
+ mutex_enter(&hash_lock);
+ lx_pid_insert_hash(lpidp);
+ mutex_exit(&hash_lock);
+
+ lwpd->br_pid = newpid;
+
+ return (0);
+}
+
+/*
+ * If we are exec()ing the process, this thread's tid is about to be reset
+ * to 1. Make sure the Linux PID bookkeeping reflects that change.
+ */
+void
+lx_pid_reassign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ struct pid *old_pidp;
+ struct lx_pid *lpidp;
+
+ ASSERT(p->p_lwpcnt == 1);
+
+ mutex_enter(&hash_lock);
+
+ /*
+ * Clean up all the traces of this thread's 'fake' Linux PID.
+ */
+ lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid);
+ ASSERT(lpidp != NULL);
+ old_pidp = lpidp->l_pidp;
+ lpidp->l_pidp = NULL;
+
+ /*
+ * Now register this thread as (pid, 1).
+ */
+ lpidp->l_pid = p->p_pid;
+ lpidp->s_pid = p->p_pid;
+ lpidp->s_tid = 1;
+ lx_pid_insert_hash(lpidp);
+
+ mutex_exit(&hash_lock);
+
+ if (old_pidp)
+ (void) pid_rele(old_pidp);
+}
+
+/*
+ * release a solaris pid/tid pair
+ */
+void
+lx_pid_rele(pid_t pid, id_t tid)
+{
+ struct lx_pid *lpidp;
+
+ mutex_enter(&hash_lock);
+ lpidp = lx_pid_remove_hash(pid, tid);
+ mutex_exit(&hash_lock);
+
+ if (lpidp) {
+ if (lpidp->l_pidp)
+ (void) pid_rele(lpidp->l_pidp);
+
+ kmem_free(lpidp, sizeof (*lpidp));
+ }
+}
+
+/*
+ * given a linux pid, return the solaris pid/tid pair
+ */
+int
+lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
+{
+ struct lx_pid *hp;
+
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) {
+ if (l_pid == hp->l_pid) {
+ if (s_pid)
+ *s_pid = hp->s_pid;
+ if (s_tid)
+ *s_tid = hp->s_tid;
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ if (hp != NULL)
+ return (0);
+
+ /*
+ * We didn't find this pid in our translation table.
+ * But this still could be the pid of a native process
+ * running in the current zone so check for that here.
+ *
+ * Note that prfind() only searches for processes in the current zone.
+ */
+ mutex_enter(&pidlock);
+ if (prfind(l_pid) != NULL) {
+ mutex_exit(&pidlock);
+ if (s_pid)
+ *s_pid = l_pid;
+ if (s_tid)
+ *s_tid = 0;
+ return (0);
+ }
+ mutex_exit(&pidlock);
+
+ return (-1);
+}
+
+/*
+ * Given an lwp, return the Linux pid of its parent. If the caller
+ * wants them, we return the Solaris (pid, tid) as well.
+ */
+pid_t
+lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ struct lx_pid *hp;
+ pid_t zoneinit = curproc->p_zone->zone_proc_initpid;
+ pid_t lppid, ppid;
+
+ /*
+ * Be sure not to return a parent pid that should be invisible
+ * within this zone.
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * If the parent process's pid is the zone's init process, force it
+ * to the Linux init pid value of 1.
+ */
+ if (ppid == zoneinit)
+ ppid = 1;
+
+ /*
+ * There are two cases in which the Linux definition of a 'parent'
+ * matches that of Solaris:
+ *
+ * - if our tgid is the same as our PID, then we are either the
+ * first thread in the process or a CLONE_THREAD thread.
+ *
+ * - if the brand lwp value for ppid is 0, then we are either the
+ * child of a differently-branded process or a CLONE_PARENT thread.
+ */
+ if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) {
+ if (ppidp != NULL)
+ *ppidp = ppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ return (ppid);
+ }
+
+ /*
+ * Set the default Linux parent pid to be the pid of the zone's init
+ * process; this will get converted back to the Linux default of 1
+ * later.
+ */
+ lppid = zoneinit;
+
+ /*
+ * If the process's parent isn't init, try and look up the Linux "pid"
+ * corresponding to the process's parent.
+ */
+ if (ppid != 1) {
+ /*
+ * In all other cases, we are looking for the parent of this
+ * specific thread, which in Linux refers to the thread that
+ * clone()d it. We stashed that thread's PID away when this
+ * thread was created.
+ */
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp;
+ hp = hp->ltos_next) {
+ if (lwpd->br_ppid == hp->l_pid) {
+ /*
+ * We found the PID we were looking for, but
+ * since we cached its value in this LWP's brand
+ * structure, it has exited and been reused by
+ * another process.
+ */
+ if (hp->l_start > lwptot(lwp)->t_start)
+ break;
+
+ lppid = lwpd->br_ppid;
+ if (ppidp != NULL)
+ *ppidp = hp->s_pid;
+ if (ptidp != NULL)
+ *ptidp = hp->s_tid;
+
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ }
+
+ if (lppid == zoneinit) {
+ lppid = 1;
+
+ if (ppidp != NULL)
+ *ppidp = lppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ }
+
+ return (lppid);
+}
+
+void
+lx_pid_init(void)
+{
+ hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR));
+
+ stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+ ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+
+ mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_pid_fini(void)
+{
+ kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size);
+ kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
new file mode 100644
index 0000000000..615022b576
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -0,0 +1,178 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Some system calls return either a 32-bit or a 64-bit value, depending
+ * on the datamodel.
+ */
+#ifdef _LP64
+#define V_RVAL SE_64RVAL
+#else
+#define V_RVAL SE_32RVAL1
+#endif
+
+/*
+ * Define system calls that return a native 'long' quantity i.e. a 32-bit
+ * or 64-bit integer - depending on how the kernel is itself compiled
+ * e.g. read(2) returns 'ssize_t' in the kernel and in userland.
+ */
+#define LX_CL(name, call, narg) \
+ { V_RVAL, (name), (llfcn_t)(call), (narg) }
+
+/*
+ * Returns a 32 bit quantity regardless of datamodel
+ */
+#define LX_CI(name, call, narg) \
+ { SE_32RVAL1, (name), (llfcn_t)(call), (narg) }
+
+extern longlong_t lx_nosys(void);
+#define LX_NOSYS(name) \
+ {SE_64RVAL, (name), (llfcn_t)lx_nosys, 0}
+
+typedef int64_t (*llfcn_t)();
+
+/*
+ * In-Kernel Emulation table
+ * The entries in this table are NOT indexed by either of the Linux syscall
+ * numbers (32-bit or 64-bit). Instead, the entries are laid out linearly
+ * with the LX_EMUL_* defines uses to lookup the correct entry.
+ */
+typedef struct lx_ike {
+ int sy_flags;
+ char *sy_name;
+ llfcn_t sy_callc;
+ char sy_narg;
+} lx_ike_t;
+
+static lx_ike_t lx_ike_ent[] =
+{
+ LX_NOSYS("lx_nosys"), /* 0 */
+ LX_CL("getpid", lx_getpid, 0), /* 1 */
+ LX_CL("kill", lx_kill, 2),
+ LX_CL("pipe", lx_pipe, 1),
+ LX_CL("brk", lx_brk, 1),
+ LX_CL("getppid", lx_getppid, 0),
+ LX_CL("sysinfo", lx_sysinfo, 1),
+ LX_CL("clone", lx_clone, 5),
+ LX_CL("modify_ldt", lx_modify_ldt, 3),
+ LX_CL("sched_setparam", lx_sched_setparam, 2),
+ LX_CL("sched_getparam", lx_sched_getparam, 2), /* 10 */
+ LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2),
+ LX_CL("setresuid16", lx_setresuid16, 3),
+ LX_CL("setresgid16", lx_setresgid16, 3),
+ LX_CL("rt_sigqueueinfo", lx_rt_sigqueueinfo, 3),
+ LX_CL("setgroups", lx_setgroups, 2),
+ LX_CL("setresuid", lx_setresuid, 3),
+ LX_CL("setresgid", lx_setresgid, 3),
+ LX_CL("gettid", lx_gettid, 0),
+ LX_CL("tkill", lx_tkill, 2),
+ LX_CL("futex", lx_futex, 6), /* 20 */
+ LX_CL("set_thread_area", lx_set_thread_area, 1),
+ LX_CL("get_thread_area", lx_get_thread_area, 1),
+ LX_CL("set_tid_address", lx_set_tid_address, 1),
+ LX_CL("pipe2", lx_pipe2, 2),
+ LX_CL("rt_tgsigqueueinfo", lx_rt_tgsigqueueinfo, 4),
+ LX_CL("arch_prctl", lx_arch_prctl, 2),
+ LX_CL("tgkill", lx_tgkill, 3),
+ LX_CL("read", lx_read, 3),
+ LX_CL("ioctl", lx_ioctl, 3),
+};
+
+int64_t
+lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ lx_ike_t *jsp;
+ int64_t rval;
+
+ rval = (int64_t)0;
+
+ jsp = &(lx_ike_ent[num]);
+
+ switch (jsp->sy_narg) {
+ case 0: {
+ lx_print("--> %s()\n", jsp->sy_name);
+ rval = (int64_t)jsp->sy_callc();
+ break;
+ }
+ case 1: {
+ lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1);
+ rval = (int64_t)jsp->sy_callc(arg1);
+ break;
+ }
+ case 2: {
+ lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2);
+ break;
+ }
+ case 3: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3);
+ break;
+ }
+ case 4: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4);
+ break;
+ }
+ case 5: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5);
+ break;
+ }
+ case 6: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx,"
+ " 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5,
+ arg6);
+ break;
+ }
+ default:
+ panic("Invalid IKE entry: #%d at 0x%p\n", num, (void *)jsp);
+ }
+ lx_print("----------> return (0x%llx)\n", (long long)rval);
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
new file mode 100644
index 0000000000..96dd0c13ba
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -0,0 +1,250 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LXPROC_H
+#define _LXPROC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_LIMITS, /* /proc/<pid>/limits */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_MOUNTINFO, /* /proc/<pid>/mountinfo */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MODULES, /* /proc/modules */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_SWAPS, /* /proc/swaps */
+ LXPR_SYSDIR, /* /proc/sys/ */
+ LXPR_SYS_FSDIR, /* /proc/sys/fs/ */
+ LXPR_SYS_FS_INOTIFYDIR, /* /proc/sys/fs/inotify */
+ LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, /* inotify/max_queued_events */
+ LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, /* inotify/max_user_instances */
+ LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, /* inotify/max_user_watches */
+ LXPR_SYS_KERNELDIR, /* /proc/sys/kernel/ */
+ LXPR_SYS_KERNEL_HOSTNAME, /* /proc/sys/kernel/hostname */
+ LXPR_SYS_KERNEL_MSGMNI, /* /proc/sys/kernel/msgmni */
+ LXPR_SYS_KERNEL_NGROUPS_MAX, /* /proc/sys/kernel/ngroups_max */
+ LXPR_SYS_KERNEL_PID_MAX, /* /proc/sys/kernel/pid_max */
+ LXPR_SYS_KERNEL_SHMMAX, /* /proc/sys/kernel/shmmax */
+ LXPR_SYS_KERNEL_THREADS_MAX, /* /proc/sys/kernel/threads-max */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char *d_name;
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ ino_t lxpr_ino; /* node id */
+ ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern boolean_t lxpr_uiobuf_nonblock(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
new file mode 100644
index 0000000000..70d274cdb3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -0,0 +1,538 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lxprsubr.c: Various functions for the /lxproc vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lx_proc.h"
+
+#define LXPRCACHE_NAME "lxbpr_cache"
+
+static int lxpr_node_constructor(void *, void *, int);
+static void lxpr_node_destructor(void *, void *);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+};
+
+int lx_pr_bufsize = 4000;
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ int bufsize = lx_pr_bufsize;
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + bufsize, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = bufsize;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = (off_t)offset;
+}
+
+boolean_t
+lxpr_uiobuf_nonblock(struct lxpr_uiobuf *uiobuf)
+{
+ if ((uiobuf->uiop->uio_fmode & FNONBLOCK) != 0)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+ size_t size = (uintptr_t)uiobuf->pos - (uintptr_t)uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg + size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr + (off - beg), size - (off - beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uintptr_t remain = (uintptr_t)uiobuf->buffsize -
+ ((uintptr_t)uiobuf->pos - (uintptr_t)uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len + 1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+ proc_t *p;
+ kmutex_t *mp;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process
+ */
+ p = prfind((pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ if (p->p_flag & SEXITING) {
+ /*
+ * This process is exiting -- let it go.
+ */
+ mutex_exit(mp);
+ return (NULL);
+ }
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (pid + 1);
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + fd);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ lxpnp->lxpr_pid = ((p->p_pid ==
+ curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+
+ /*
+ * Zombie check. p_stat is officially protected by pidlock,
+ * but we can't grab pidlock here because we already hold
+ * p_lock. Luckily if we look at the process exit code
+ * we see that p_stat only transisions from SRUN to SZOMB
+ * while p_lock is held. Aside from this, the only other
+ * p_stat transition that we need to be aware about is
+ * SIDL to SRUN, but that's not a problem since lxpr_lock()
+ * ignores nodes in the SIDL state so we'll never get a node
+ * that isn't already in the SRUN state.
+ */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ /* Zombie check. see locking comment above */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp =
+ up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ break;
+
+ case LXPR_PID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_SYSDIR:
+ case LXPR_SYS_FSDIR:
+ case LXPR_SYS_FS_INOTIFYDIR:
+ case LXPR_SYS_KERNELDIR:
+ case LXPR_NETDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
new file mode 100644
index 0000000000..e9dda7864a
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
@@ -0,0 +1,374 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxprvfsops.c: vfs operations for /lxprocfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+
+#include "lx_proc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_proc",
+ lxpr_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "lx brand procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxpr_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxpr_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialise cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc", 0);
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * allocate the first vnode
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+ /* We know f_fstr is 32 chars */
+ (void) strcpy(sp->f_fstr, "/proc");
+ (void) strcpy(&sp->f_fstr[6], "/proc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
new file mode 100644
index 0000000000..7a319a6427
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -0,0 +1,3646 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lxproc -- a loosely Linux-compatible /proc
+ *
+ * The aspiration here is to provide something that sufficiently approximates
+ * the Linux /proc implementation for purposes of offering some compatibility
+ * for simple Linux /proc readers (e.g., ps/top/htop). However, it is not
+ * intended to exactly mimic Linux semantics; when choosing between offering
+ * compatibility and telling the truth, we emphatically pick the truth. A
+ * particular glaring example of this is the Linux notion of "tasks" (that is,
+ * threads), which -- due to historical misadventures on Linux -- allocate their
+ * identifiers from the process identifier space. (That is, each thread has in
+ * effect a pid.) Some Linux /proc readers have come to depend on this
+ * attribute, and become confused when threads appear with proper identifiers,
+ * so we simply opt for the pre-2.6 behavior, and do not present the tasks
+ * directory at all. Similarly, when choosing between offering compatibility
+ * and remaining consistent with our broader security model, we (obviously)
+ * choose security over compatibility. In short, this is meant to be a best
+ * effort -- no more.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+#include <sys/param.h>
+#include <sys/utsname.h>
+#include <sys/rctl.h>
+
+/* Dependent on procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lx_proc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+extern rctl_hndl_t rc_zone_msgmni;
+extern rctl_hndl_t rc_zone_shmmax;
+#define FOURGB 4294967295
+
+/*
+ * The lx /proc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxpr_open },
+ VOPNAME_CLOSE, { .vop_close = lxpr_close },
+ VOPNAME_READ, { .vop_read = lxpr_read },
+ VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxpr_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
+ VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
+ VOPNAME_FSYNC, { .error = lxpr_sync },
+ VOPNAME_SEEK, { .error = lxpr_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
+ VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
+ NULL, NULL
+};
+
+
+/*
+ * file contents of an lx /proc directory.
+ */
+static lxpr_dirent_t lx_procdir[] = {
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MODULES, "modules" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_SWAPS, "swaps" },
+ { LXPR_SYSDIR, "sys" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" }
+};
+
+#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
+
+/*
+ * Contents of an lx /proc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_LIMITS, "limits" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_MOUNTINFO, "mountinfo" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+#define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF
+
+#define RCTL_INFINITE(x) \
+ ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
+ (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
+
+typedef struct lxpr_rlimtab {
+ char *rlim_name; /* limit name */
+ char *rlim_unit; /* limit unit */
+ char *rlim_rctl; /* rctl source */
+} lxpr_rlimtab_t;
+
+static lxpr_rlimtab_t lxpr_rlimtab[] = {
+ { "Max cpu time", "seconds", "process.max-cpu-time" },
+ { "Max file size", "bytes", "process.max-file-size" },
+ { "Max data size", "bytes", "process.max-data-size" },
+ { "Max stack size", "bytes", "process.max-stack-size" },
+ { "Max core file size", "bytes", "process.max-core-size" },
+ { "Max resident set", "bytes", "zone.max-physical-memory" },
+ { "Max processes", "processes", "zone.max-lwps" },
+ { "Max open files", "files", "process.max-file-descriptor" },
+ { "Max locked memory", "bytes", "zone.max-locked-memory" },
+ { "Max address space", "bytes", "process.max-address-space" },
+ { "Max file locks", "locks", NULL },
+ { "Max pending signals", "signals",
+ "process.max-sigqueue-size" },
+ { "Max msgqueue size", "bytes", "process.max-msg-messages" },
+ { NULL, NULL, NULL }
+};
+
+
+/*
+ * contents of lx /proc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * contents of /proc/sys directory
+ */
+static lxpr_dirent_t sysdir[] = {
+ { LXPR_SYS_FSDIR, "fs" },
+ { LXPR_SYS_KERNELDIR, "kernel" },
+};
+
+#define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0]))
+
+/*
+ * contents of /proc/sys/fs directory
+ */
+static lxpr_dirent_t sys_fsdir[] = {
+ { LXPR_SYS_FS_INOTIFYDIR, "inotify" },
+};
+
+#define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
+
+/*
+ * contents of /proc/sys/fs/inotify directory
+ */
+static lxpr_dirent_t sys_fs_inotifydir[] = {
+ { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
+ { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
+ { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
+};
+
+#define SYS_FS_INOTIFYDIRFILES \
+ (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
+
+/*
+ * contents of /proc/sys/kernel directory
+ */
+static lxpr_dirent_t sys_kerneldir[] = {
+ { LXPR_SYS_KERNEL_HOSTNAME, "hostname" },
+ { LXPR_SYS_KERNEL_MSGMNI, "msgmni" },
+ { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" },
+ { LXPR_SYS_KERNEL_PID_MAX, "pid_max" },
+ { LXPR_SYS_KERNEL_SHMMAX, "shmmax" },
+ { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" },
+};
+
+#define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /*
+ * We only allow reading in this file systrem
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ /*
+ * If we are opening an underlying file only allow regular files,
+ * fifos or sockets; reject the open for anything else.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
+ rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
+ error = EACCES;
+ } else {
+ if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
+ /*
+ * This flag lets the fifo open know that
+ * we're using proc/fd to open a fd which we
+ * already have open. Otherwise, the fifo might
+ * reject an open if the other end has closed.
+ */
+ flag |= FKLYR;
+ }
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_empty, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_limits, /* /proc/<pid>/limits */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_empty, /* /proc/devices */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_empty, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_kmsg, /* /proc/kmsg */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_empty, /* /proc/modules */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_swaps, /* /proc/swaps */
+ lxpr_read_invalid, /* /proc/sys */
+ lxpr_read_invalid, /* /proc/sys/fs */
+ lxpr_read_invalid, /* /proc/sys/fs/inotify */
+ lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
+ lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
+ lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
+ lxpr_read_invalid, /* /proc/sys/kernel */
+ lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */
+ lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */
+ lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
+ lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */
+ lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */
+ lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+};
+
+/*
+ * Array of lookup functions, indexed by lx /proc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/modules */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/swaps */
+ lxpr_lookup_sysdir, /* /proc/sys */
+ lxpr_lookup_sys_fsdir, /* /proc/sys/fs */
+ lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */
+ lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/modules */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/swaps */
+ lxpr_readdir_sysdir, /* /proc/sys */
+ lxpr_readdir_sys_fsdir, /* /proc/sys/fs */
+ lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */
+ lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in the lx procfs is human
+ * readable and not binary structures there do not have to be different
+ * read variants depending on whether the reading process model is 32 or 64 bits
+ * (at least in general, and certainly the difference is unlikely to be enough
+ * to justify have different routines for 32 and 64 bit reads
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
+ &lxpnp->lxpr_cons_ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+ }
+
+ lxpr_read_function[type](lxpnp, uiobuf);
+
+ if (type == LXPR_KMSG) {
+ if ((error = ldi_close(lxpnp->lxpr_cons_ldih, FREAD, cr)) != 0)
+ return (error);
+ }
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with Linux: the Linux cmdline returns argv
+ * with the correct separation using \0 between the arguments, but we cannot do
+ * that without copying the real argv from the correct process context. This
+ * is too difficult to attempt so we pretend that the entire cmdline is just
+ * argv[0]. This is good enough for ps and htop to display correctly, but might
+ * cause some other things not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char *buf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ buf = PTOU(p)->u_argv != 0 ? PTOU(p)->u_psargs : PTOU(p)->u_comm;
+
+ lxpr_uiobuf_write(uiobuf, buf, strlen(buf) + 1);
+ lxpr_unlock(p);
+}
+
+/*
+ * lxpr_read_pid_limits(): ulimit file
+ */
+static void
+lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ rctl_qty_t cur, max;
+ rctl_val_t *oval, *nval;
+ rctl_hndl_t hndl;
+ char *kname;
+ int i;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS);
+
+ nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ kmem_free(nval, sizeof (rctl_val_t));
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
+ "Limit", "Soft Limit", "Hard Limit", "Units");
+ for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
+ kname = lxpr_rlimtab[i].rlim_rctl;
+ /* default to unlimited for resources without an analog */
+ cur = RLIM_INFINITY;
+ max = RLIM_INFINITY;
+ if (kname != NULL) {
+ hndl = rctl_hndl_lookup(kname);
+ oval = NULL;
+ while ((hndl != -1) &&
+ rctl_local_get(hndl, oval, nval, p) == 0) {
+ oval = nval;
+ switch (nval->rcv_privilege) {
+ case RCPRIV_BASIC:
+ if (!RCTL_INFINITE(nval))
+ cur = nval->rcv_value;
+ break;
+ case RCPRIV_PRIVILEGED:
+ if (!RCTL_INFINITE(nval))
+ max = nval->rcv_value;
+ break;
+ }
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
+ if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
+ lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
+ }
+ if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
+ lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
+ }
+ lxpr_uiobuf_printf(uiobuf, " %-10s\n",
+ lxpr_rlimtab[i].rlim_unit);
+ }
+
+ lxpr_unlock(p);
+ kmem_free(nval, sizeof (rctl_val_t));
+}
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ uintptr_t saddr;
+ uintptr_t eaddr;
+ int type;
+ char prot[5];
+ uintptr_t offset;
+ vnode_t *vp;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = (uintptr_t)seg->s_base;
+ pbuf->eaddr = pbuf->saddr + seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ u_longlong_t inode = 0;
+
+ *buf = '\0';
+ if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
+ NULL) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (p->p_model == DATAMODEL_LP64) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%016llx-%16llx %s %016llx %02d:%03d %lld%s%s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, *buf != '\0' ? " " : "", buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %lld%s%s\n",
+ (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
+ pbuf->prot, (uint32_t)pbuf->offset, maj, min,
+ inode, *buf != '\0' ? " " : "", buf);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
+ * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
+ * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
+ *
+ * We have to make up several of these fields.
+ */
+static void
+lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ dev_t vfs_dev;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+ int root_id = 15; /* use a made-up value */
+ int mnt_id;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO);
+
+ vfs_list_read_lock();
+
+ /* root is the top-level, it does not appear in this output */
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d 1 %d:%d / / %s - %s / %s\n",
+ root_id,
+ major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->vfs_dev = vfsp->vfs_dev;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ mnt_id = root_id + 1;
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
+ }
+ } else {
+ resource = "none";
+ }
+
+ /*
+ * XXX parent ID is not tracked correctly here. Currently we
+ * always assume the parent ID is the root ID.
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "%d %d %d:%d / %s %s - %s %s %s\n",
+ mnt_id, root_id,
+ major(printp->vfs_dev), minor(printp->vfs_dev),
+ mntpt,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
+ vfssw[printp->vfs_fstype].vsw_name,
+ resource,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ mnt_id++;
+ }
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize;
+ size_t rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = btopr(as->a_resvsize);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * Derived from procfs prgetxmap32
+ */
+static size_t
+get_locked(proc_t *p)
+{
+ struct as *as = p->p_as;
+ struct seg *seg;
+ uint_t nlocked = 0;
+
+ ASSERT(as != &kas && AS_READ_HELD(as, &as->a_lock));
+
+ if ((seg = AS_SEGFIRST(as)) == NULL)
+ return (0);
+
+ do {
+ char *parr;
+ uint64_t npages;
+ uint64_t pagenum;
+
+ npages = ((uintptr_t)seg->s_size) >> PAGESHIFT;
+ parr = kmem_zalloc(npages, KM_SLEEP);
+
+ SEGOP_INCORE(seg, seg->s_base, seg->s_size, parr);
+
+ for (pagenum = 0; pagenum < npages; pagenum++) {
+ if (parr[pagenum] & SEG_PAGE_LOCKED)
+ nlocked++;
+ }
+ kmem_free(parr, npages);
+ } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
+
+ return (nlocked);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ int ngroups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ size_t vsize;
+ size_t nlocked;
+ size_t rss;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%u\t%u\t%u\t%u\n"
+ "Gid:\t%u\t%u\t%u\t%u\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ up->u_comm,
+ status,
+ pid, /* thread group id - same as pid */
+ pid,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ p->p_fno_ctl);
+
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%u ",
+ groups[i]);
+ }
+ crfree(cr);
+
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ nlocked = get_locked(p);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ ptok(nlocked),
+ ptok(rss),
+ 0l,
+ btok(p->p_stksize),
+ ptok(rss),
+ 0l);
+ }
+
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+
+ for (i = 1; i < NSIG; i++) {
+ lx_sig = stol_signo[i];
+
+ if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i - 1] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i - 1] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n"
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0],
+ /* Can't do anything with linux capabilities */
+ 0,
+ 0,
+ 0);
+
+ lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri;
+ caddr_t wchan;
+ processorid_t cpu;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = (gid_t)-1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP) ?
+ curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ psdev = p->p_sessp->s_dev;
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S'; break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R'; break;
+ case TS_ZOMB:
+ stat = 'Z'; break;
+ case TS_STOPPED:
+ stat = 'T'; break;
+ default:
+ stat = '!'; break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_id;
+ thread_unlock(t);
+ } else {
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d (%s) %c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld "
+ "%d %d %d "
+ "%lu "
+ "%lu "
+ "%lu %ld %llu "
+ "%lu %lu %u "
+ "%lu %lu "
+ "%lu %lu %lu %lu "
+ "%lu "
+ "%lu %lu "
+ "%d "
+ "%d"
+ "\n",
+ pid, PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+ pri, nice, p->p_lwpcnt,
+ 0l, /* itrealvalue (time before next SIGALRM) */
+ PTOU(p)->u_ticks,
+ vsize, rss, p->p_vmem_ctl,
+ 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+ 0l, 0l, /* kstkesp, kstkeip */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+ wchan,
+ 0l, 0l, /* nswap, cnswap */
+ 0, /* exit_signal */
+ cpu);
+
+ lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ /*
+ * Data about each interface should go here, but that shouldn't be added
+ * unless there is an lxproc reader that actually makes use of it (and
+ * doesn't need anything else that we refuse to provide)...
+ */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced, unless we're open non-blocking, in which case we return after
+ * 1ms.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
+{
+ ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
+ mblk_t *mp;
+ timestruc_t to;
+ timestruc_t *tp = NULL;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
+
+ if (lxpr_uiobuf_nonblock(uiobuf)) {
+ to.tv_sec = 0;
+ to.tv_nsec = 1000000; /* 1msec */
+ tp = &to;
+ }
+
+ if (ldi_getmsg(lh, &mp, tp) == 0) {
+ /*
+ * lx procfs doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
+ * enough for uptime and other simple lxproc readers to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+ zone_t *zone = LXPTOZ(lxpnp);
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = zone == global_zone ?
+ &avenrun[0] : zone->zone_avenrun;
+ }
+
+ /*
+ * If we're in the non-global zone, we'll report the total number of
+ * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
+ * otherwise will just use nthread (which will include kernel threads,
+ * but should be good enough for lxproc).
+ */
+ nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ int global = zone == global_zone;
+ long total_mem, free_mem, total_swap, used_swap;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
+ total_mem = physmem * PAGESIZE;
+ free_mem = freemem * PAGESIZE;
+ } else {
+ total_mem = zone->zone_phys_mem_ctl;
+ free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
+ }
+
+ if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
+ total_swap = k_anoninfo.ani_max * PAGESIZE;
+ used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+ } else {
+ mutex_enter(&zone->zone_mem_lock);
+ total_swap = zone->zone_max_swap_ctl;
+ used_swap = zone->zone_max_swap;
+ mutex_exit(&zone->zone_mem_lock);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap - used_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+
+ vfs_list_read_lock();
+
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "/ / %s %s 0 0\n",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) :
+ mntpt;
+ }
+ } else {
+ resource = "-";
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s %s %s %s 0 0\n",
+ resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ }
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices. But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "major minor #blocks name rio rmerge rsect ruse "
+ "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ char *vers;
+
+ vers = lx_get_zone_kern_version(LXPTOZ(lxpnp));
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) "
+ "#%s SMP %s\n",
+ LX_UNAME_SYSNAME, vers,
+#if defined(__GNUC__)
+ "gcc",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__,
+#else
+ "Sun C",
+ __SUNPRO_C / 0x100,
+ (__SUNPRO_C & 0xff) / 0x10,
+ __SUNPRO_C & 0xf,
+#endif
+ LX_UNAME_VERSION,
+ "00:00:00 00/00/00");
+}
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ ulong_t cpu_nrunnable_cum = 0;
+ ulong_t w_io_cum = 0;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+ char *lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp));
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ if (strncmp(lx_kern_version, "2.4", 3) != 0) {
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+ }
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ if (strncmp(lx_kern_version, "2.4", 3) != 0) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %lu %lu %lu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum);
+ }
+
+ /* Do per processor stats */
+ do {
+ int i;
+
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ if (strncmp(lx_kern_version, "2.4", 3) != 0) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
+ cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
+ 0L, irq_ticks, 0L);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu\n",
+ cp->cpu_id,
+ user_ticks, 0L, sys_ticks, idle_ticks);
+ }
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ if (strncmp(lx_kern_version, "2.4", 3) != 0) {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum);
+ }
+}
+
+/*
+ * lxpr_read_swaps():
+ *
+ * We don't support swap files or partitions, so just provide a dummy file with
+ * the necessary header.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "Filename "
+ "Type Size Used Priority\n");
+}
+
+/*
+ * inotify tunables exported via /proc.
+ */
+extern int inotify_maxevents;
+extern int inotify_maxinstances;
+extern int inotify_maxwatches;
+
+static void
+lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
+}
+
+static void
+lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
+}
+
+static void
+lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
+}
+
+static void
+lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
+ lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
+}
+
+static void
+lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_msgmni,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
+}
+
+static void
+lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
+}
+
+static void
+lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
+}
+
+static void
+lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_shmmax,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ if (val > FOURGB)
+ val = FOURGB;
+
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
+}
+
+static void
+lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
+}
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+ hrtime_t birthtime;
+ hrtime_t centi_sec = 10000000; /* 10^7 */
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Getting the Zone zsched process startup time */
+ birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
+ up_cs = (gethrtime() - birthtime) / centi_sec;
+ up_s = up_cs / 100;
+ up_cs %= 100;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink, fifo or socket
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = lxpnp->lxpr_realvp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is a read only file system */
+ if (mode & VWRITE)
+ return (EROFS);
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (lxpnp->lxpr_type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_LIMITS:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
+ }
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only one of owner, group, public. If not
+ * owner, then check group. If not a member of the group, then check
+ * public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode if that's where we are trying to go.
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches. Note: null component name
+ * denotes that the current directory is being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ vnode_t *vp = NULL;
+ proc_t *p;
+ file_t *fp;
+ uint_t fd;
+ int c;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ /*
+ * convert the string rendition of the filename
+ * to a file descriptor
+ */
+ fd = 0;
+ while ((c = *comp++) != '\0') {
+ int ofd;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ ofd = fd;
+ fd = 10*fd + c - '0';
+ /* integer overflow */
+ if (fd / 10 != ofd)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ p = lxpr_lock(dlxpnp->lxpr_pid);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * If the process is a zombie or system process
+ * it can't have any open files.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * get us a fresh node/vnode
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we dereference into fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * get open file info
+ */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ /*
+ * ensure the fd is still kosher.
+ * it may have gone between the readdir and
+ * the lookup
+ */
+ if (fip->fi_list[fd].uf_file == NULL) {
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ }
+
+ if ((fp = ufp->uf_file) != NULL)
+ vp = fp->f_vnode;
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+
+ if (vp == NULL) {
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will be able to
+ * find the underlying vnode. The vnode is held on the realvp.
+ */
+ lxpnp->lxpr_realvp = vp;
+ VN_HOLD(lxpnp->lxpr_realvp);
+ if (lxpnp->lxpr_realvp->v_type == VFIFO) {
+ /*
+ * lxpr_getnode initially sets the type to be VLNK for
+ * the LXPR_PID_FD_FD option, but that breaks fifo
+ * file descriptors (which are unlinked named pipes).
+ * We set this as a regular file so that open.2 comes
+ * into lxpr_open so we can do more work.
+ */
+ dp = LXPTOV(lxpnp);
+ dp->v_type = VREG;
+ }
+ }
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our file system structure
+ * except those that are pid names. These change as pids are created/
+ * deleted etc., so we just look for a number as the first char to see
+ * if we are we doing pid lookups.
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10 * pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading or it doesn't
+ * really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lx /proc node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sysdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
+ SYS_KERNELDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
+ SYS_FSDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
+ SYS_FS_INOTIFYDIRFILES));
+}
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ ASSERT(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order: "." and ".." then the
+ * unique lxproc files, then the directories corresponding to the
+ * running processes. We have defined this as the ordering because
+ * it allows us to more easily keep track of where we are betwen calls
+ * to getdents(). If the number of processes changes between calls
+ * then we can't lose track of where we are in the lxproc files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, a PID of 0,
+ * and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc
+ * structure
+ */
+ pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+ p->p_pid : 1);
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, in the increment of this for
+ * the loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp != NULL) {
+ *eofp = (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+ return (0);
+}
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ p = prfind((lxpnp->lxpr_pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+ int ceof;
+ proc_t *p;
+ int fddirsize = -1;
+ uf_info_t *fip;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL)
+ return (ENOENT);
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+ fddirsize = 0;
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we iterate over its fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fddirsize == -1)
+ fddirsize = fip->fi_nfiles;
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ goto out;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ goto out;
+ }
+
+ if (eofp != NULL) {
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+out:
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ return (error);
+}
+
+static int
+lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
+ SYS_FSDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
+ SYS_FS_INOTIFYDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
+ SYS_KERNELDIRFILES));
+}
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ pid_t pid;
+ int error = 0;
+
+ /* must be a symbolic link file */
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
+ return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
+ return (error);
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process
+ */
+ pid = ((curproc->p_pid !=
+ curproc->p_zone->zone_proc_initpid)
+ ? curproc->p_pid : 1);
+
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes.
+ */
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_EXE:
+ return (EACCES);
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * Nothing to sync but this function must never fail
+ */
+ return (0);
+}
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
+ vp1 = rvp;
+ }
+
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
+ vp2 = rvp;
+ }
+
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp, ct) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h
new file mode 100644
index 0000000000..cbb3431c4b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUDIO_H
+#define _LX_AUDIO_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * name for this driver
+ */
+#define LX_AUDIO_DRV "lx_audio"
+
+/*
+ * names for the minor nodes this driver exports
+ */
+#define LXA_MINORNAME_DEVCTL "lx_devctl"
+#define LXA_MINORNAME_DSP "lx_dsp"
+#define LXA_MINORNAME_MIXER "lx_mixer"
+
+/*
+ * minor numbers for the minor nodes this driver exporrts
+ */
+#define LXA_MINORNUM_DEVCTL 0
+#define LXA_MINORNUM_DSP 1
+#define LXA_MINORNUM_MIXER 2
+#define LXA_MINORNUM_COUNT 3
+
+/*
+ * driver ioctls
+ *
+ * note that we're layering on top of solaris audio devices so we want
+ * to make sure that our ioctls namespace doesn't conflict with theirs.
+ * looking in sys/audioio.h and sys/mixer.h we see that they seem to
+ * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.'
+ */
+
+/*
+ * administrative ioctls.
+ * these ioctls are only supported on the DEVCTL minor node
+ */
+#define LXA_IOC_ZONE_REG (_IOR('a', 0, lxa_zone_reg_t))
+#define LXA_IOC_ZONE_UNREG (_IOR('a', 1, lxa_zone_reg_t))
+
+
+/*
+ * audio and mixer device ioctls
+ * these ioctls are supported on DSP and MIXER minor nodes.
+ */
+#define LXA_IOC_GETMINORNUM (_IOR('a', 20, int))
+
+/*
+ * audio device ioctls.
+ * these ioctls are supports on DSP minor nodes.
+ */
+#define LXA_IOC_MMAP_OUTPUT (_IOR('a', 41, int))
+#define LXA_IOC_MMAP_PTR (_IOR('a', 42, int))
+#define LXA_IOC_GET_FRAG_INFO (_IOR('a', 43, lxa_frag_info_t))
+#define LXA_IOC_SET_FRAG_INFO (_IOR('a', 44, lxa_frag_info_t))
+
+/*
+ * mixer device ioctls.
+ * these ioctls are supports on MIXER minor nodes.
+ */
+#define LXA_IOC_MIXER_GET_VOL (_IOR('a', 60, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_VOL (_IOR('a', 61, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_MIC (_IOR('a', 62, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_MIC (_IOR('a', 63, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_PCM (_IOR('a', 64, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_PCM (_IOR('a', 65, lxa_mixer_levels_t))
+
+/* command structure for LXA_IOC_ZONE_REG */
+#define LXA_INTSTRLEN 32
+typedef struct lxa_zone_reg {
+ char lxa_zr_zone_name[ZONENAME_MAX];
+ char lxa_zr_inputdev[LXA_INTSTRLEN];
+ char lxa_zr_outputdev[LXA_INTSTRLEN];
+} lxa_zone_reg_t;
+
+/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */
+typedef struct lxa_frag_info {
+ int lxa_fi_size;
+ int lxa_fi_cnt;
+} lxa_frag_info_t;
+
+/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */
+typedef struct lxa_mixer_levels {
+ int lxa_ml_gain;
+ int lxa_ml_balance;
+} lxa_mixer_levels_t;
+
+/* verify that a solaris mixer level structure has valid values */
+#define LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \
+ ((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \
+ ((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \
+ ((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUDIO_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
new file mode 100644
index 0000000000..4436226deb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
@@ -0,0 +1,334 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_H
+#define _LX_AUTOFS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
+ * and provide support for the Linux "automount" automounter.
+ *
+ *
+ *
+ * +++ Linux automounter background.
+ *
+ * Linux has two automounters: "amd" and "automount"
+ *
+ * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem
+ * at an automount point, and it acts as the NFS server for the mount. When
+ * an access is done to that NFS filesystem, the access is redirected by the
+ * kernel to the "amd" process via rpc. "amd" then looks up any information
+ * required to resolve the requests, mounts real NFS filesystems if
+ * necessary, and returns. "amd" has it's own strange configuration
+ * mechanism that doesn't seem to be very compatabile with Solaris's network
+ * based automounter map support.
+ *
+ * 2) "automount" is the other Linux automounter. It utilizes a kernel
+ * filesystem (autofs) to provide it's functionality. Basically, it mounts
+ * the autofs filesystem at any automounter controlled mount point. This
+ * filesystem then intercepts and redirects lookup operations (and only
+ * lookup ops) to the userland automounter process via a pipe. (The
+ * pipe to the automounter is establised via mount options when the autofs
+ * filesystem is mounted.) When the automounter recieves a request via this
+ * pipe, it does lookups to whatever backing store it's configured to use,
+ * does mkdir operations on the autofs filesystem, mounts remote NFS
+ * filesystems on any leaf directories it just created, and signals the
+ * autofs filesystem via an ioctl to let it know that the lookup can
+ * continue.
+ *
+ *
+ *
+ * +++ Linux autofs (and automount daemon) notes
+ *
+ * Since we're mimicking the behavior of the Linux autofs filesystem it's
+ * important to document some of it's observed behavior here since there's
+ * no doubt that in the future this behavior will change. These comments
+ * apply to the behavior of the automounter as observed on a system
+ * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
+ *
+ * A) Autofs allows root owned, non-automounter processes to create
+ * directories in the autofs filesystem. The autofs filesystem treats the
+ * automounter's process group as special, but it doesn't prevent root
+ * processes outside of the automounter's process group from creating new
+ * directories in the autofs filesystem.
+ *
+ * B) Autofs doesn't allow creation of any non-directory entries in the
+ * autofs filesystem. No entity can create files (e.g. /bin/touch or
+ * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within
+ * the autofs filesystem are directories.
+ *
+ * C) Autofs only intercepts vop lookup operations. Notably, it does _not_
+ * intercept and re-direct vop readdir operations. This means that the
+ * observed behavior of the Linux automounter can be considerably different
+ * from that of the Solaris automounter. Specifically, on Solaris if autofs
+ * mount point is mounted _without_ the -nobrowse option then if a user does
+ * an ls operation (which translates into a vop readdir operation) then the
+ * automounter will intercept that operation and list all the possible
+ * directories and mount points without actually mounting any filesystems.
+ * Essentially, all automounter managed mount points on Linux will behave
+ * like "-nobrowse" mount points on Solaris. Here's an example to
+ * illustrate this. If /ws was mounted on Solaris without the -nobrowse
+ * option and an auto_ws yp map was setup as the backing store for this
+ * mount point, then an "ls /ws" would list all the keys in the map as
+ * valid directories, but an "ls /ws" on Linux would list an emptry
+ * directory.
+ *
+ * D) NFS mounts are performed by the automount process. When the automount
+ * process gets a redirected lookup request, it determines _all_ the
+ * possible remote mount points for that request, creates directory paths
+ * via mkdir, and mounts the remote filesystems on the newly created paths.
+ * So for example, if a machine called mcescher exported /var/crash and
+ * /var/core, an "ls /net/mcescher" would result in the following actions
+ * being done by the automounter:
+ * mkdir /net/mcescher
+ * mkdir /net/mcescher/var
+ * mkdir /net/mcescher/var/crash
+ * mkdir /net/mcescher/var/core
+ * mount mcescher:/var/crash /var/crash
+ * mount mcescher:/var/crash /var/core
+ * once the automounter compleated the work above it would signal the autofs
+ * filesystem (via an ioctl) that the lookup could continue.
+ *
+ * E.1) Autofs only redirects vop lookup operations for path entries that
+ * don't already exist in the autofs filesystem. So for the example above,
+ * an initial (after the start of the automounter) "ls /net/mcescher" would
+ * result in a request to the automounter. A subsequest "ls /net/mcescher"
+ * would not result in a request to the automounter. Even if
+ * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
+ * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
+ * would not result in a new request to the automounter.
+ *
+ * E.2) Autofs lookup requests that are sent to the automounter only include
+ * the root directory path component. So for example, after starting up
+ * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
+ * lookup request actually sent to the automounter would just be for
+ * "mcescher". (The same request as if the user had done "ls /net/mcescher".)
+ *
+ * E.3) The two statements above aren't entirely entirely true. The Linux
+ * autofs filesystem will also redirect lookup operations for leaf
+ * directories that don't have a filesystem mounted on them. Using the
+ * example above, if a user did a "ls /net/mcescher", then manually
+ * unmounted /net/mcescher/var/crash, and then did an "ls
+ * /net/mcescher/var/crash", this would result in a request for
+ * "mcescher/var/crash" being sent to the automounter. The strange thing
+ * (a Linux bug perhaps) is that the automounter won't do anything with this
+ * request and the lookup will fail.
+ *
+ * F) The autofs filesystem communication protocol (what ioctls it supports
+ * and what data it passes to the automount process) are versioned. The
+ * source for the userland automount daemon (i looked at version v3.1.7)
+ * seemed to support two versions of the Linux kernel autofs implementation.
+ * Both versions supported communiciation with a pipe and the format of the
+ * structure passed via this pipe was the same. The difference between the
+ * two versions was in the functionality supported. (The v3 version has
+ * additional ioctls to support automount timeouts.)
+ *
+ *
+ *
+ * +++ lx_autofs notes
+ *
+ * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
+ * Linux autofs filesystem with the following exceptions:
+ *
+ * 1.1) We don't bother to implement the E.3 functionality listed above
+ * since it doesn't appear to be of any use.
+ *
+ * 1.2) We only implement v2 of the automounter protocol since
+ * implementing v3 would take a _lot_ more work. If this proves to be a
+ * problem we can re-visit this decision later. (More details about v3
+ * support are included in comments below.)
+ *
+ * 2) In general, the approach taken for lx_autofs is to keep it as simple
+ * as possible and to minimize it's memory usage. To do this all information
+ * about the contents of the lx_autofs filesystem are mirrored in the
+ * underlying filesystem that lx_autofs is mounted on and most vop operations
+ * are simply passed onto this underlying filesystem. This means we don't
+ * have to implement most the complex operations that a full filesystem
+ * normally has to implement. It also means that most of our filesystem state
+ * (wrt the contents of the filesystem) doesn't actually have to be stored
+ * in memory, we can simply go to the underlying filesystem to get it when
+ * it's requested. For the purposes of discussion, we'll call the underlying
+ * filesystem the "backing store."
+ *
+ * The backing store is actually directory called ".lx_afs" which is created in
+ * the directory where the lx_autofs filesystem is mounted. When the lx_autofs
+ * filesystem is unmounted this backing store directory is deleted. If this
+ * directory exists at mount time (perhaps the system crashed while a previous
+ * lx_autofs instance was mounted at the same location) it will be deleted.
+ * There are a few implications of using a backing store worth mentioning.
+ *
+ * 2.1) lx_autofs can't be mounted on a read only filesystem. If this
+ * proves to be a problem we can probably move the location of the
+ * backing store.
+ *
+ * 2.2) If the backing store filesystem runs out of space then the
+ * automounter process won't be able to create more directories and mount
+ * new filesystems. Of course, strange failures usually happen when
+ * filesystems run out of space.
+ *
+ * 3) Why aren't we using gfs? gfs has two different usage models.
+ *
+ * 3.1) I'm my own filesystem but i'm using gfs to help with managing
+ * readdir operations.
+ *
+ * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes
+ *
+ * We're not using the 3.1 interfaces because we don't implement readdir
+ * ourselves. We pass all readdir operations onto the backing store
+ * filesystem and utilize its readdir implementation.
+ *
+ * We're not using the 3.2 interfaces because they are really designed for
+ * in memory filesystems where all of the filesystem state is stored in
+ * memory. They don't lend themselves to filesystems where part of the
+ * state is in memory and part of the state is on disk.
+ *
+ * For more information on gfs take a look at the block comments in the
+ * top of gfs.c
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note that the name of the actual Solaris filesystem is lx_afs and not
+ * lx_autofs. This is becase filesystem names are stupidly limited to 8
+ * characters.
+ */
+#define LX_AUTOFS_NAME "lx_afs"
+
+/*
+ * Mount options supported.
+ */
+#define LX_MNTOPT_FD "fd"
+#define LX_MNTOPT_PGRP "pgrp"
+#define LX_MNTOPT_MINPROTO "minproto"
+#define LX_MNTOPT_MAXPROTO "maxproto"
+
+/* Version of the Linux kernel automount protocol we support. */
+#define LX_AUTOFS_PROTO_VERSION 2
+
+/*
+ * Command structure sent to automount process from lx_autofs via a pipe.
+ * This structure is the same for v2 and v3 of the automount protocol
+ * (the communication pipe is established at mount time).
+ */
+typedef struct lx_autofs_pkt {
+ int lap_protover; /* protocol version number */
+ int lap_constant; /* always set to 0 */
+ int lap_id; /* every pkt must have a unique id */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component to lookup */
+} lx_autofs_pkt_t;
+
+/*
+ * Ioctls supprted (v2 protocol).
+ */
+#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */
+#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */
+#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */
+
+/*
+ * Ioctls not supported (v3 protocol).
+ *
+ * Initially we're only going to support v2 of the Linux kernel automount
+ * protocol. This means that we don't support the following ioctls.
+ *
+ * 1) The protocol version ioctl (by not supporting it the automounter
+ * will assume version 2).
+ *
+ * 2) Automounter timeout ioctls. For v3 and later the automounter can
+ * be started with a timeout option. It will notify the filesystem of
+ * this timeout and, if any automounter filesystem root directory entry
+ * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
+ * ioctl. For example, if the timeout is 60 seconds, the Linux
+ * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
+ * timeouts more often than that. (v3.1.7 of the automount daemon would
+ * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs
+ * filesystem will
+ * report top level directories that aren't in use to the automounter
+ * via this ioctl. If /net was managed by the automounter and
+ * there were the following mount points:
+ * /net/jurassic/var/crash
+ * /net/mcescher/var/crash
+ * and no one was looking at any crash dumps on mcescher but someone
+ * was analyzing a crash dump on jurassic, then after <timeout> seconds
+ * had passed the autofs filesystem would let the automounter know that
+ * "mcescher" could be unmounted. (Note the granularity of notification
+ * is directories in the root of the autofs filesystem.) Here's two
+ * ideas for how this functionality could be implemented on Solaris:
+ *
+ * 2.1) The easy incomplete way. Don't do any in-use detection. Simply
+ * tell the automounter it can try to unmount the filesystem every time
+ * the specified timeout passes. If the filesystem is in use then the
+ * unmount will fail. This would break down for remote hosts with multiple
+ * mounts. For example, if the automounter had mounted the following
+ * filesystems:
+ * /net/jurassic/var/crash
+ * /net/jurassic/var/core
+ * and the user was looking at a core file, and the timeout expired, the
+ * automounter would recieve notification to unmount "jurassic". Then
+ * it would unmount crash (which would succeed) and then to try unmount
+ * core (which would fail). After that (since the automounter only
+ * performs mounts for failed lookups in the root autofs directory)
+ * future access to /net/jurassic/var/crash would result to access
+ * to an empty autofs directory. We might be able to work around
+ * this by caching which root autofs directories we've timed out,
+ * then any access to paths that contain those directories could be
+ * stalled and we could resend another request to the automounter.
+ * This could work if the automounter ignores mount failures.
+ *
+ * 2.2) The hard correct way. The real difficulty here is detecting
+ * files in use on other filesystems (say NFS) that have been mounted
+ * on top of autofs. (Detecting in use autofs vnodes should be easy.)
+ * to do this we would probably have to create a new brand op to intercept
+ * mount/umount filesystem operations. Then using this entry point we
+ * could detect mounts of other filesystems on top of lx_autofs. When
+ * a successful mount finishes we would use the FEM (file event
+ * monitoring) framework to push a module onto that filesystem and
+ * intercept VOP operations that allocate/free vnodes in that filesystem.
+ * (We would also then have to track mount operations on top of that
+ * filesystem, etc.) this would allow us to properly detect any
+ * usage of subdirectories of an autofs directory.
+ */
+#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */
+#define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */
+#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */
+
+typedef struct lx_autofs_expire {
+ int lap_protover; /* protol version number */
+ int lap_constant; /* always set to 1 */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component that has timed out */
+} lx_autofs_expire_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
new file mode 100644
index 0000000000..9c5517b8d5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_IMPL_H
+#define _LX_AUTOFS_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/modhash.h>
+#include <sys/vnode.h>
+
+#include <sys/lx_autofs.h>
+
+/*
+ * Space key.
+ * Used to persist data across lx_autofs filesystem module unloads.
+ */
+#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev"
+
+/*
+ * Name of the backing store directory.
+ */
+#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME
+
+#define LX_AUTOFS_VFS_ID_HASH_SIZE 15
+#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15
+#define LX_AUTOFS_VFS_VN_HASH_SIZE 15
+
+/*
+ * VFS data object.
+ */
+typedef struct lx_autofs_vfs {
+ /* Info about the underlying filesystem and backing store. */
+ vnode_t *lav_mvp;
+ char *lav_bs_name;
+ vnode_t *lav_bs_vp;
+
+ /* Info about the automounter process managing this filesystem. */
+ int lav_fd;
+ pid_t lav_pgrp;
+ file_t *lav_fifo_wr;
+ file_t *lav_fifo_rd;
+
+ /* Each automount requests needs a unique id. */
+ id_space_t *lav_ids;
+
+ /* All remaining structure members are protected by lav_lock. */
+ kmutex_t lav_lock;
+
+ /* Hashes to keep track of outstanding automounter requests. */
+ mod_hash_t *lav_path_hash;
+ mod_hash_t *lav_id_hash;
+
+ /* We need to keep track of all our vnodes. */
+ vnode_t *lav_root;
+ mod_hash_t *lav_vn_hash;
+} lx_autofs_vfs_t;
+
+/*
+ * Structure to keep track of requests sent to the automounter.
+ */
+typedef struct lx_autofs_lookup_req {
+ /* Packet that gets sent to the automounter. */
+ lx_autofs_pkt_t lalr_pkt;
+
+ /* Reference count. Always updated atomically. */
+ uint_t lalr_ref;
+
+ /*
+ * Fields to keep track and sync threads waiting on a lookup.
+ * Fields are protected by lalr_lock.
+ */
+ kmutex_t lalr_lock;
+ kcondvar_t lalr_cv;
+ int lalr_complete;
+} lx_autofs_lookup_req_t;
+
+/*
+ * Generic stack structure.
+ */
+typedef struct stack_elem {
+ list_node_t se_list;
+ caddr_t se_ptr1;
+ caddr_t se_ptr2;
+ caddr_t se_ptr3;
+} stack_elem_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
new file mode 100644
index 0000000000..3cf0dd00a0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -0,0 +1,349 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_BRAND_H
+#define _LX_BRAND_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/cpuvar.h>
+#include <sys/zone.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_BRANDNAME "lx"
+
+/*
+ * Brand uname info
+ */
+#define LX_UNAME_SYSNAME "Linux"
+#define LX_UNAME_RELEASE_2_6 "2.6.18"
+#define LX_UNAME_RELEASE_2_4 "2.4.21"
+#define LX_UNAME_VERSION "BrandZ virtual linux"
+#define LX_UNAME_MACHINE32 "i686"
+#define LX_UNAME_MACHINE64 "x86_64"
+
+#define LX_LIB_PATH32 "/native/usr/lib/lx_brand.so.1"
+#define LX_LIB_PATH64 "/native/usr/lib/amd64/lx_brand.so.1"
+
+#if defined(_LP64)
+#define LX_LIB_PATH LX_LIB_PATH64
+#define LX_UNAME_MACHINE LX_UNAME_MACHINE64
+#else
+#define LX_LIB_PATH LX_LIB_PATH32
+#define LX_UNAME_MACHINE LX_UNAME_MACHINE32
+#endif
+
+/*
+ * This must be large enough for both the 32-bit table and 64-bit table.
+ */
+#define LX_NSYSCALLS 352
+
+/* The number of In-Kernel Emulation functions */
+#define LX_N_IKE_FUNCS 29
+
+/*
+ * brand(2) subcommands
+ *
+ * Everything >= 128 is a brand-specific subcommand.
+ * > 192 is reserved for in-kernel emulated system calls.
+ */
+#define B_LPID_TO_SPAIR 128
+#define B_SYSENTRY 129
+#define B_SYSRETURN 130
+#define B_PTRACE_SYSCALL 131
+#define B_SET_AFFINITY_MASK 132
+#define B_GET_AFFINITY_MASK 133
+#define B_PTRACE_EXT_OPTS 134
+#define B_PTRACE_STOP_FOR_OPT 135
+#define B_UNSUPPORTED 136
+#define B_STORE_ARGS 137
+#define B_CLR_NTV_SYSC_FLAG 138
+#define B_SIGNAL_RETURN 139
+#define B_UNWIND_NTV_SYSC_FLAG 140
+#define B_EXIT_AS_SIG 141
+
+#define B_IKE_SYSCALL 192
+
+/* B_PTRACE_EXT_OPTS subcommands */
+#define B_PTRACE_EXT_OPTS_SET 1
+#define B_PTRACE_EXT_OPTS_GET 2
+#define B_PTRACE_EXT_OPTS_EVT 3
+
+/*
+ * Support for Linux PTRACE_SETOPTIONS handling.
+ */
+#define LX_PTRACE_O_TRACESYSGOOD 0x0001
+#define LX_PTRACE_O_TRACEFORK 0x0002
+#define LX_PTRACE_O_TRACEVFORK 0x0004
+#define LX_PTRACE_O_TRACECLONE 0x0008
+#define LX_PTRACE_O_TRACEEXEC 0x0010
+#define LX_PTRACE_O_TRACEVFORKDONE 0x0020
+#define LX_PTRACE_O_TRACEEXIT 0x0040
+#define LX_PTRACE_O_TRACESECCOMP 0x0080
+
+/* siginfo si_status for traced events */
+#define LX_PTRACE_EVENT_FORK 0x100
+#define LX_PTRACE_EVENT_VFORK 0x200
+#define LX_PTRACE_EVENT_CLONE 0x300
+#define LX_PTRACE_EVENT_EXEC 0x400
+#define LX_PTRACE_EVENT_VFORK_DONE 0x500
+#define LX_PTRACE_EVENT_EXIT 0x600
+#define LX_PTRACE_EVENT_SECCOMP 0x700
+
+#define LX_VERSION_1 1
+#define LX_VERSION LX_VERSION_1
+
+#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS
+#define LX_KERN_VERSION_NUM (ZONE_ATTR_BRAND_ATTRS + 1)
+
+/*
+ * Aux vector containing phdr of Linux executable and ehdr of interpreter
+ * (if any), both of which are used by lx_librtld_db to ascertain r_debug.
+ * We repurpose the 3rd brand-specific aux vector slot for the Linux
+ * AT_SYSINFO_EHDR entry (we modify the a_type in the brand library).
+ */
+#define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1
+#define AT_SUN_BRAND_LX_INTERP AT_SUN_BRAND_AUX2
+#define AT_SUN_BRAND_LX_SYSINFO_EHDR AT_SUN_BRAND_AUX3
+
+/* Aux vector containing hz value */
+#define AT_CLKTCK 17
+/* Aux vector containing vDSO addr */
+#define AT_SYSINFO_EHDR 33
+
+#ifndef _ASM
+
+typedef struct lx_brand_registration {
+ uint_t lxbr_version; /* version number */
+ void *lxbr_handler; /* base address of handler */
+ void *lxbr_tracehandler; /* base address of trace handler */
+ void *lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration_t;
+
+typedef struct lx_brand_registration32 {
+ uint_t lxbr_version; /* version number */
+ uint32_t lxbr_handler; /* base address of handler */
+ uint32_t lxbr_tracehandler; /* base address of trace handler */
+ uint32_t lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration32_t;
+
+#ifdef __amd64
+typedef struct lx_regs {
+ long lxr_fs;
+ long lxr_rdi;
+ long lxr_rsi;
+ long lxr_rbp;
+ long lxr_rsp;
+ long lxr_rbx;
+ long lxr_rdx;
+ long lxr_rcx;
+ long lxr_rax;
+ long lxr_r8;
+ long lxr_r9;
+ long lxr_r10;
+ long lxr_r11;
+ long lxr_r12;
+ long lxr_r13;
+ long lxr_r14;
+ long lxr_r15;
+ long lxr_rip;
+
+ long lxr_orig_rax;
+} lx_regs_t;
+#else /* ! __amd64 */
+typedef struct lx_regs {
+ long lxr_gs;
+ long lxr_edi;
+ long lxr_esi;
+ long lxr_ebp;
+ long lxr_esp;
+ long lxr_ebx;
+ long lxr_edx;
+ long lxr_ecx;
+ long lxr_eax;
+ long lxr_eip;
+
+ long lxr_orig_eax;
+} lx_regs_t;
+#endif /* __amd64 */
+
+#endif /* _ASM */
+
+/*
+ * GDT usage
+ */
+#define GDT_TLSMIN (GDT_BRANDMIN)
+#define GDT_TLSMAX (GDT_TLSMIN + 2)
+#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN)
+
+#ifndef _ASM
+
+/*
+ * Stores information needed by the lx linker to launch the main
+ * lx executable.
+ */
+typedef struct lx_elf_data64 {
+ uintptr_t ed_phdr;
+ uintptr_t ed_phent;
+ uintptr_t ed_phnum;
+ uintptr_t ed_entry;
+ uintptr_t ed_base;
+ uintptr_t ed_ldentry;
+} lx_elf_data64_t;
+
+typedef struct lx_elf_data32 {
+ int ed_phdr;
+ int ed_phent;
+ int ed_phnum;
+ int ed_entry;
+ int ed_base;
+ int ed_ldentry;
+} lx_elf_data32_t;
+
+#if defined(_LP64)
+typedef lx_elf_data64_t lx_elf_data_t;
+#else
+typedef lx_elf_data32_t lx_elf_data_t;
+#endif
+
+#ifdef _KERNEL
+
+typedef struct lx_proc_data {
+ uintptr_t l_handler; /* address of user-space handler */
+ uintptr_t l_tracehandler; /* address of user-space traced handler */
+ uintptr_t l_traceflag; /* address of 32-bit tracing flag */
+ pid_t l_ppid; /* pid of originating parent proc */
+ uint64_t l_ptrace; /* process being observed with ptrace */
+ uint_t l_ptrace_opts; /* process's extended ptrace options */
+ uint_t l_ptrace_event; /* extended ptrace option trap event */
+ lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+ int l_signal; /* signal to deliver to parent when this */
+ /* thread group dies */
+} lx_proc_data_t;
+
+#endif /* _KERNEL */
+
+/*
+ * A data type big enough to bitmap all Linux possible cpus.
+ * The bitmap size is defined as 1024 cpus in the Linux 2.4 and 2.6 man pages
+ * for sched_getaffinity() and sched_getaffinity().
+ */
+#define LX_NCPU (1024)
+#define LX_AFF_ULONGS (LX_NCPU / (8 * sizeof (ulong_t)))
+typedef ulong_t lx_affmask_t[LX_AFF_ULONGS];
+
+/* Max. length of kernel version string */
+#define LX_VERS_MAX 16
+
+#ifdef _KERNEL
+
+/*
+ * lx-specific data in the klwp_t
+ */
+typedef struct lx_lwp_data {
+ uint_t br_ntv_syscall; /* 1 = syscall from native libc */
+ uint_t br_lwp_flags; /* misc. flags */
+ klwp_t *br_lwp; /* back pointer to container lwp */
+ int br_signal; /* signal to send to parent when */
+ /* clone()'ed child terminates */
+ int br_exitwhy; /* reason for thread (process) exit */
+ int br_exitwhat; /* exit code / killing signal */
+ lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */
+ struct user_desc br_tls[LX_TLSNUM];
+ /* descriptors used by libc for TLS */
+ ulong_t br_lx_fsbase; /* lx fsbase for 64-bit thread ptr */
+ ulong_t br_ntv_fsbase; /* native fsbase 64-bit thread ptr */
+ /*
+ * 64-bit thread-specific syscall mode state "stack". Bits tracking the
+ * syscall mode are shifted on/off this int like a stack as we take
+ * signals and return.
+ */
+ uint_t br_scms;
+ pid_t br_pid; /* converted pid for this thread */
+ pid_t br_tgid; /* thread group ID for this thread */
+ pid_t br_ppid; /* parent pid for this thread */
+ id_t br_ptid; /* parent tid for this thread */
+ void *br_clear_ctidp; /* clone thread id ptr */
+ void *br_set_ctidp; /* clone thread id ptr */
+
+ /*
+ * The following struct is used by some system calls to pass extra
+ * flags into the kernel without impinging on the namespace for
+ * illumos.
+ */
+ void *br_scall_args;
+ int br_args_size; /* size in bytes of br_scall_args */
+
+ uint_t br_ptrace; /* ptrace is active for this LWP */
+} lx_lwp_data_t;
+
+/*
+ * Upper limit on br_args_size, low because this value can persist until
+ * overridden with another value, and the size is given from userland.
+ */
+#define LX_BR_ARGS_SIZE_MAX (1024)
+
+/* brand specific data */
+typedef struct lx_zone_data {
+ char lxzd_kernel_version[LX_VERS_MAX];
+ int lxzd_max_syscall;
+} lx_zone_data_t;
+
+#define BR_CPU_BOUND 0x0001
+
+#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
+#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
+#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data)
+#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data)
+/* Macro for converting to system call arguments. */
+#define LX_ARGS(scall) ((struct lx_##scall##_args *)\
+ (ttolxlwp(curthread)->br_scall_args))
+
+void lx_brand_int80_callback(void);
+void lx_brand_syscall_callback(void);
+int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+
+extern char *lx_get_zone_kern_version(zone_t *);
+
+extern int lx_debug;
+#define lx_print if (lx_debug) printf
+
+#endif /* _KERNEL */
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_BRAND_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
new file mode 100644
index 0000000000..4d81386057
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_FUTEX_H
+#define _SYS_LX_FUTEX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_FD 2
+#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
+#define FUTEX_WAKE_OP 5
+#define FUTEX_LOCK_PI 6
+#define FUTEX_UNLOCK_PI 7
+#define FUTEX_TRYLOCK_PI 8
+#define FUTEX_WAIT_BITSET 9
+#define FUTEX_WAKE_BITSET 10
+#define FUTEX_WAIT_REQUEUE_PI 11
+#define FUTEX_CMP_REQUEUE_PI 12
+#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE_PI
+
+/*
+ * Flags that can be OR'd into a futex operation.
+ */
+#define FUTEX_CMD_MASK 0x007f
+#define FUTEX_PRIVATE_FLAG 0x0080
+#define FUTEX_CLOCK_REALTIME 0x0100
+
+/*
+ * FUTEX_WAKE_OP operations
+ */
+#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
+#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */
+#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */
+#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
+#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
+
+/*
+ * FUTEX_WAKE_OP comparison operations
+ */
+#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
+#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
+#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */
+#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */
+#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */
+#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */
+
+/*
+ * The encoding of the FUTEX_WAKE_OP operation in 32 bits:
+ *
+ * +--+-- - --+-- - --+-- - --+-- - --+
+ * |S |OP |CMP |OPARG |CMPARG |
+ * +--+-- - --+-- - --+-- - --+-- - --+
+ * |31|30 - 28|27 - 24|23 - 12|11 - 0|
+ *
+ * The S bit denotes that the OPARG should be (1 << OPARG) instead of OPARG.
+ * (Yes, this whole thing is entirely absurd -- see the block comment in
+ * lx_futex.c for an explanation of this nonsense.) Macros to extract the
+ * various components from the operation, given the above encoding:
+ */
+#define FUTEX_OP_OP(x) (((x) >> 28) & 7)
+#define FUTEX_OP_CMP(x) (((x) >> 24) & 15)
+#define FUTEX_OP_OPARG(x) (((x) >> 31) ? (1 << (((x) << 8) >> 20)) : \
+ ((((x) << 8) >> 20)))
+#define FUTEX_OP_CMPARG(x) (((x) << 20) >> 20)
+
+#ifdef _KERNEL
+extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2);
+extern void lx_futex_init(void);
+extern int lx_futex_fini(void);
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FUTEX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h
new file mode 100644
index 0000000000..03b9d43038
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_IMPL_H
+#define _LX_IMPL_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t,
+ ulong_t, ulong_t);
+
+
+extern lx_systrace_f *lx_systrace_entry_ptr;
+extern lx_systrace_f *lx_systrace_return_ptr;
+
+extern void lx_brand_systrace_enable(void);
+extern void lx_brand_systrace_disable(void);
+
+extern void lx_unsupported(char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
new file mode 100644
index 0000000000..5080c3adae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_LDT_H
+#define _SYS_LINUX_LDT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/segments.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ldt_info {
+ uint_t entry_number;
+ uint_t base_addr;
+ uint_t limit;
+ uint_t seg_32bit:1,
+ contents:2,
+ read_exec_only:1,
+ limit_in_pages:1,
+ seg_not_present:1,
+ useable:1;
+};
+
+#define LDT_INFO_EMPTY(info) \
+ ((info)->base_addr == 0 && (info)->limit == 0 && \
+ (info)->contents == 0 && (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && (info)->useable == 0)
+
+#if defined(__amd64)
+#define SETMODE(desc) (desc)->usd_long = SDP_SHORT;
+#else
+#define SETMODE(desc)
+#endif
+
+#define LDT_INFO_TO_DESC(info, desc) { \
+ USEGD_SETBASE(desc, (info)->base_addr); \
+ USEGD_SETLIMIT(desc, (info)->limit); \
+ (desc)->usd_type = ((info)->contents << 2) | \
+ ((info)->read_exec_only ^ 1) << 1 | 0x10; \
+ (desc)->usd_dpl = SEL_UPL; \
+ (desc)->usd_p = (info)->seg_not_present ^ 1; \
+ (desc)->usd_def32 = (info)->seg_32bit; \
+ (desc)->usd_gran = (info)->limit_in_pages; \
+ (desc)->usd_avl = (info)->useable; \
+ SETMODE(desc); \
+}
+
+#define DESC_TO_LDT_INFO(desc, info) { \
+ bzero((info), sizeof (*(info))); \
+ (info)->base_addr = USEGD_GETBASE(desc); \
+ (info)->limit = USEGD_GETLIMIT(desc); \
+ (info)->seg_not_present = (desc)->usd_p ^ 1; \
+ (info)->contents = ((desc)->usd_type >> 2) & 3; \
+ (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \
+ (info)->seg_32bit = (desc)->usd_def32; \
+ (info)->limit_in_pages = (desc)->usd_gran; \
+ (info)->useable = (desc)->usd_avl; \
+}
+
+extern void lx_set_gdt(int, user_desc_t *);
+extern void lx_clear_gdt(int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_LDT_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h
new file mode 100644
index 0000000000..80c8079f0b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_PID_H
+#define _SYS_LX_PID_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/note.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+struct lx_pid {
+ pid_t s_pid; /* the solaris pid and ... */
+ id_t s_tid; /* ... tid pair */
+ pid_t l_pid; /* the corresponding linux pid */
+ time_t l_start; /* birthday of this pid */
+ struct pid *l_pidp;
+ struct lx_pid *stol_next; /* link in stol hash table */
+ struct lx_pid *ltos_next; /* link in ltos hash table */
+};
+
+extern int lx_pid_assign(kthread_t *);
+extern void lx_pid_reassign(kthread_t *);
+extern void lx_pid_rele(pid_t, id_t);
+extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *);
+extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *);
+extern void lx_pid_init(void);
+extern void lx_pid_fini(void);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_PID_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
new file mode 100644
index 0000000000..74bbc939a3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_PTM_LINUX_H
+#define _SYS_PTM_LINUX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_PTM_DRV "lx_ptm"
+#define LX_PTM_MINOR_NODE "lx_ptmajor"
+
+#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PTM_LINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
new file mode 100644
index 0000000000..b0ae748f3c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SCHED_H
+#define _SYS_LINUX_SCHED_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux scheduler policies.
+ */
+#define LX_SCHED_OTHER 0
+#define LX_SCHED_FIFO 1
+#define LX_SCHED_RR 2
+
+#define LX_PRI_MAX 99
+
+typedef int l_pid_t;
+
+struct lx_sched_param {
+ int lx_sched_prio;
+};
+
+extern int sched_setprocset(procset_t *, l_pid_t);
+extern long do_priocntlsys(int, procset_t *, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
new file mode 100644
index 0000000000..766aa91ef5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LINUX_SYSCALLS_H
+#define _SYS_LINUX_SYSCALLS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+extern long lx_arch_prctl();
+extern long lx_brk();
+extern long lx_getpid();
+extern long lx_getppid();
+extern long lx_clone();
+extern long lx_kill();
+extern long lx_tkill();
+extern long lx_tgkill();
+extern long lx_modify_ldt();
+extern long lx_pipe();
+extern long lx_pipe2();
+extern long lx_read();
+extern long lx_ioctl();
+extern long lx_gettid();
+extern long lx_futex();
+extern long lx_get_thread_area();
+extern long lx_sched_getparam();
+extern long lx_sched_getscheduler();
+extern long lx_sched_rr_get_interval();
+extern long lx_sched_setparam();
+extern long lx_sched_setscheduler();
+extern long lx_set_thread_area();
+extern long lx_set_tid_address();
+extern long lx_setresgid();
+extern long lx_setresgid16();
+extern long lx_setresuid();
+extern long lx_setresuid16();
+extern long lx_sysinfo();
+extern long lx_setgroups();
+extern long lx_rt_sigqueueinfo();
+extern long lx_rt_tgsigqueueinfo();
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SYSCALLS_H */
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
new file mode 100644
index 0000000000..19a7577ac0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+/*
+ * The brk() system call needs to be in-kernel because Linux expects a call to
+ * brk(0) to return the current breakpoint. In Solaris, the process breakpoint
+ * is setup and managed by libc. Due to the way we link our libraries and the
+ * need for Linux to manage its own breakpoint, this has to remain in the
+ * kernel.
+ */
+extern int brk(caddr_t);
+
+long
+lx_brk(caddr_t nva)
+{
+ proc_t *p = curproc;
+ klwp_t *lwp = ttolwp(curthread);
+
+ if (nva != 0) {
+ (void) brk(nva);
+
+ /*
+ * Despite claims to the contrary in the manpage, when Linux
+ * brk() fails, errno is left unchanged.
+ */
+ lwp->lwp_errno = 0;
+ }
+ return ((long)(p->p_brkbase + p->p_brksize));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
new file mode 100644
index 0000000000..3cb6a37ad1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -0,0 +1,164 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+#include <lx_signum.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+/*
+ * Our lwp has already been created at this point, so this routine is
+ * responsible for setting up all the state needed to track this as a
+ * linux cloned thread.
+ */
+/* ARGSUSED */
+long
+lx_clone(int flags, void *stkp, void *ptidp, void *tls, void *ctidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ struct lx_proc_data *lproc = ttolxproc(curthread);
+ struct ldt_info info;
+ struct user_desc descr;
+ int tls_index;
+ int entry = -1;
+ int signo;
+
+ signo = flags & LX_CSIGNAL;
+ if (signo < 0 || signo > LX_NSIG)
+ return (set_errno(EINVAL));
+
+ if (!(flags & LX_CLONE_THREAD)) {
+ lproc->l_signal = signo;
+ } else {
+ if (flags & LX_CLONE_SETTLS) {
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ if (copyin((caddr_t)tls, &info, sizeof (info)))
+ return (set_errno(EFAULT));
+
+ if (LDT_INFO_EMPTY(&info))
+ return (set_errno(EINVAL));
+
+ entry = info.entry_number;
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ tls_index = entry - GDT_TLSMIN;
+
+ /*
+ * Convert the user-space structure into a real
+ * x86 descriptor and copy it into this LWP's
+ * TLS array. We also load it into the GDT.
+ */
+ LDT_INFO_TO_DESC(&info, &descr);
+ bcopy(&descr, &lwpd->br_tls[tls_index],
+ sizeof (descr));
+ lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ } else {
+ /*
+ * For 64-bit, we need to set %fsbase -- which
+ * requires us to save the native %fsbase and
+ * set our LX %fsbase. Don't use rdmsr since
+ * the value might get changed before we get to
+ * this code. We use the value from the pcb
+ * which the native libc should have already
+ * setup via syslwp_private.
+ */
+#if defined(__amd64)
+ pcb_t *pcb;
+ pcb = (pcb_t *)&curthread->t_lwp->lwp_pcb;
+ lwpd->br_ntv_fsbase = pcb->pcb_fsbase;
+#endif
+ lwpd->br_lx_fsbase = (uintptr_t)tls;
+ }
+ }
+
+ lwpd->br_clear_ctidp =
+ (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
+
+ if (signo && ! (flags & LX_CLONE_DETACH))
+ lwpd->br_signal = signo;
+ else
+ lwpd->br_signal = 0;
+
+ if (flags & LX_CLONE_THREAD)
+ lwpd->br_tgid = curthread->t_procp->p_pid;
+
+ if (flags & LX_CLONE_PARENT)
+ lwpd->br_ppid = 0;
+
+ if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+ (suword32(ctidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+ (suword32(ptidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ }
+ return (lwpd->br_pid);
+}
+
+long
+lx_set_tid_address(int *tidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ lwpd->br_clear_ctidp = tidp;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) {
+ rv = 1;
+ } else {
+ rv = lwpd->br_pid;
+ }
+
+ return (rv);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
new file mode 100644
index 0000000000..7f8abcd8d9
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
@@ -0,0 +1,739 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <vm/page.h>
+#include <sys/mman.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+#include <sys/inttypes.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Futexes are a Linux-specific implementation of inter-process mutexes.
+ * They are designed to use shared memory for simple, uncontested
+ * operations, and rely on the kernel to resolve any contention issues.
+ *
+ * Most of the information in this section comes from the paper "Futexes
+ * Are Tricky", by Ulrich Drepper. This paper is currently available at:
+ * http://people.redhat.com/~drepper/futex.pdf.
+ *
+ * A futex itself a 4-byte integer, which must be 4-byte aligned. The
+ * value of this integer is expected to be modified using user-level atomic
+ * operations. The futex(4) design itself does not impose any semantic
+ * constraints on the value stored in the futex; it is up to the
+ * application to define its own protocol.
+ *
+ * When the application decides that kernel intervention is required, it
+ * will use the futex(2) system call. There are 5 different operations
+ * that can be performed on a futex, using this system call. Since this
+ * interface has evolved over time, there are several different prototypes
+ * available to the user. Fortunately, there is only a single kernel-level
+ * interface:
+ *
+ * long sys_futex(void *futex1, int cmd, int val1,
+ * struct timespec *timeout, void *futex2, int val2)
+ *
+ * The kernel-level operations that may be performed on a futex are:
+ *
+ * FUTEX_WAIT
+ *
+ * Atomically verify that futex1 contains the value val1. If it
+ * doesn't, return EWOULDBLOCK. If it does contain the expected
+ * value, the thread will sleep until somebody performs a FUTEX_WAKE
+ * on the futex. The caller may also specify a timeout, indicating
+ * the maximum time the thread should sleep. If the timer expires,
+ * the call returns ETIMEDOUT. If the thread is awoken with a signal,
+ * the call returns EINTR. Otherwise, the call returns 0.
+ *
+ * FUTEX_WAKE
+ *
+ * Wake up val1 processes that are waiting on futex1. The call
+ * returns the number of blocked threads that were woken up.
+ *
+ * FUTEX_WAKE_OP
+ *
+ * The implementation of a conditional variable in terms of futexes
+ * actually uses two futexes: one to assure sequential access and one to
+ * represent the condition variable. This implementation gives rise to a
+ * particular performance problem whereby a thread is awoken on the futex
+ * that represents the condition variable only to have to (potentially)
+ * immediately wait on the futex that protects the condition variable.
+ * (Do not confuse the futex that serves to protect the condition variable
+ * with the pthread_mutex_t associated with pthread_cond_t -- which
+ * represents a third futex.) To (over)solve this problem, FUTEX_WAKE_OP
+ * was invented, which performs an atomic compare-and-exchange on a
+ * second address in a specified fashion (that is, with a specified
+ * operation). Here are the possible operations (OPARG is defined
+ * to be 12 bit value embedded in the operation):
+ *
+ * - FUTEX_OP_SET: Sets the value at the second address to OPARG
+ * - FUTEX_OP_ADD: Adds the value to OPARG
+ * - FUTEX_OP_OR: OR's the value with OPARG
+ * - FUTEX_OP_ANDN: Performs a negated AND of the value with OPARG
+ * - FUTEX_OP_XOR: XOR's the value with OPARG
+ *
+ * After this compare-and-exchange on the second address, a FUTEX_WAKE is
+ * performed on the first address and -- if the compare-and-exchange
+ * matches a specified result based on a specified comparison operation --
+ * a FUTEX_WAKE is performed on the second address. Here are the possible
+ * comparison operations:
+ *
+ * - FUTEX_OP_CMP_EQ: If old value is CMPARG, wake
+ * - FUTEX_OP_CMP_NE: If old value is not equal to CMPARG, wake
+ * - FUTEX_OP_CMP_LT: If old value is less than CMPARG, wake
+ * - FUTEX_OP_CMP_LE: If old value is less than or equal to CMPARG, wake
+ * - FUTEX_OP_CMP_GT: If old value is greater than CMPARG, wake
+ * - FUTEX_OP_CMP_GE: If old value is greater than or equal to CMPARG, wake
+ *
+ * As a practical matter, the only way that this is used (or, some might
+ * argue, is usable) is by the implementation of pthread_cond_signal(),
+ * which uses FUTEX_WAKE_OP to -- in a single system call -- unlock the
+ * futex that protects the condition variable and wake the futex that
+ * represents the condition variable. The second wake-up is conditional
+ * because the futex that protects the condition variable (rather than the
+ * one that represents it) may or may not have waiters. Given that this
+ * is the use case, FUTEX_WAKE_OP is falsely generic: despite allowing for
+ * five different kinds of operations and six different kinds of
+ * comparision operations, in practice only one is used. (Namely, setting
+ * to 0 and waking if the old value is greater than 1 -- which denotes
+ * that waiters are present and the wakeup should be performed.) Moreover,
+ * because FUTEX_WAKE_OP does not (and cannot) optimize anything in the
+ * case that the pthread_mutex_t associated with the pthread_cond_t is
+ * held at the time of a pthread_cond_signal(), this entire mechanism is
+ * essentially for naught in this case. As one can imagine (and can
+ * verify on just about any source base that uses pthread_cond_signal()),
+ * it is overwhelmingly the common case that the lock associated with the
+ * pthread_cond_t is held at the time of pthread_cond_signal(), assuring
+ * that the problem that all of this complexity was designed to solve
+ * isn't, in fact, solved because the signalled thread simply wakes up
+ * only to block again on the held mutex. Cue a slow clap!
+ *
+ * FUTEX_CMP_REQUEUE
+ *
+ * If the value stored in futex1 matches that passed in in val2, wake
+ * up val1 processes that are waiting on futex1. Otherwise, return
+ * EAGAIN.
+ *
+ * If there are more than val1 threads waiting on the futex, remove
+ * the remaining threads from this futex, and requeue them on futex2.
+ * The caller can limit the number of threads being requeued by
+ * encoding an integral numerical value in the position usually used
+ * for the timeout pointer.
+ *
+ * The call returns the number of blocked threads that were woken up
+ * or requeued.
+ *
+ * FUTEX_REQUEUE
+ *
+ * Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
+ * This command has been declared broken and obsolete, but we still
+ * need to support it.
+ *
+ * FUTEX_FD
+ *
+ * Return a file descriptor, which can be used to refer to the futex.
+ * This operation was broken by design, and was blessedly removed in
+ * Linux 2.6.26 ("because it was inherently racy"); it should go without
+ * saying that we don't support this operation.
+ */
+
+/*
+ * This structure is used to track all the threads currently waiting on a
+ * futex. There is one fwaiter_t for each blocked thread. We store all
+ * fwaiter_t's in a hash structure, indexed by the memid_t of the integer
+ * containing the futex's value.
+ *
+ * At the moment, all fwaiter_t's for a single futex are simply dumped into
+ * the hash bucket. If futex contention ever becomes a hot path, we can
+ * chain a single futex's waiters together.
+ */
+typedef struct fwaiter {
+ memid_t fw_memid; /* memid of the user-space futex */
+ kcondvar_t fw_cv; /* cond var */
+ struct fwaiter *fw_next; /* hash queue */
+ struct fwaiter *fw_prev; /* hash queue */
+ volatile int fw_woken;
+} fwaiter_t;
+
+#define MEMID_COPY(s, d) \
+ { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; }
+#define MEMID_EQUAL(s, d) \
+ ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1])
+
+/* Borrowed from the page freelist hash code. */
+#define HASH_SHIFT_SZ 7
+#define HASH_SIZE (1 << HASH_SHIFT_SZ)
+#define HASH_FUNC(id) \
+ ((((uintptr_t)((id)->val[1]) >> PAGESHIFT) + \
+ ((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> 3) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \
+ (HASH_SIZE - 1))
+
+static fwaiter_t *futex_hash[HASH_SIZE];
+static kmutex_t futex_hash_lock[HASH_SIZE];
+
+static void
+futex_hashin(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = futex_hash[index];
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp;
+ futex_hash[index] = fwp;
+}
+
+static void
+futex_hashout(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ if (fwp->fw_prev)
+ fwp->fw_prev->fw_next = fwp->fw_next;
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp->fw_prev;
+ if (futex_hash[index] == fwp)
+ futex_hash[index] = fwp->fw_next;
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = NULL;
+}
+
+/*
+ * Go to sleep until somebody does a WAKE operation on this futex, we get a
+ * signal, or the timeout expires.
+ */
+static int
+futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout)
+{
+ int err, ret;
+ int32_t curval;
+ fwaiter_t fw;
+ int index;
+
+ fw.fw_woken = 0;
+ MEMID_COPY(memid, &fw.fw_memid);
+ cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL);
+
+ index = HASH_FUNC(&fw.fw_memid);
+ mutex_enter(&futex_hash_lock[index]);
+
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ err = set_errno(EFAULT);
+ goto out;
+ }
+ if (curval != val) {
+ err = set_errno(EWOULDBLOCK);
+ goto out;
+ }
+
+ futex_hashin(&fw);
+
+ err = 0;
+ while ((fw.fw_woken == 0) && (err == 0)) {
+ ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index],
+ timeout, timechanged);
+ if (ret < 0)
+ err = set_errno(ETIMEDOUT);
+ else if (ret == 0)
+ err = set_errno(EINTR);
+ }
+
+ /*
+ * The futex is normally hashed out in wakeup. If we timed out or
+ * got a signal, we need to hash it out here instead.
+ */
+ if (fw.fw_woken == 0)
+ futex_hashout(&fw);
+
+out:
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (err);
+}
+
+/*
+ * Wake up to wake_threads threads that are blocked on the futex at memid.
+ */
+static int
+futex_wake(memid_t *memid, int wake_threads)
+{
+ fwaiter_t *fwp, *next;
+ int index;
+ int ret = 0;
+
+ index = HASH_FUNC(memid);
+
+ mutex_enter(&futex_hash_lock[index]);
+
+ for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) {
+ next = fwp->fw_next;
+ if (MEMID_EQUAL(&fwp->fw_memid, memid)) {
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ ret++;
+ }
+ }
+
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (ret);
+}
+
+static int
+futex_wake_op_execute(int32_t *addr, int32_t val3)
+{
+ int32_t op = FUTEX_OP_OP(val3);
+ int32_t cmp = FUTEX_OP_CMP(val3);
+ int32_t cmparg = FUTEX_OP_CMPARG(val3);
+ int32_t oparg, oldval, newval;
+ label_t ljb;
+ int rval;
+
+ if ((uintptr_t)addr >= KERNELBASE)
+ return (set_errno(EFAULT));
+
+ if (on_fault(&ljb))
+ return (set_errno(EFAULT));
+
+ oparg = FUTEX_OP_OPARG(val3);
+
+ do {
+ oldval = *addr;
+ newval = oparg;
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ break;
+
+ case FUTEX_OP_ADD:
+ newval += oparg;
+ break;
+
+ case FUTEX_OP_OR:
+ newval |= oparg;
+ break;
+
+ case FUTEX_OP_ANDN:
+ newval &= ~oparg;
+ break;
+
+ case FUTEX_OP_XOR:
+ newval ^= oparg;
+ break;
+
+ default:
+ no_fault();
+ return (set_errno(EINVAL));
+ }
+ } while (atomic_cas_32((uint32_t *)addr, oldval, newval) != oldval);
+
+ no_fault();
+
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ:
+ rval = (oldval == cmparg);
+ break;
+
+ case FUTEX_OP_CMP_NE:
+ rval = (oldval != cmparg);
+ break;
+
+ case FUTEX_OP_CMP_LT:
+ rval = (oldval < cmparg);
+ break;
+
+ case FUTEX_OP_CMP_LE:
+ rval = (oldval <= cmparg);
+ break;
+
+ case FUTEX_OP_CMP_GT:
+ rval = (oldval > cmparg);
+ break;
+
+ case FUTEX_OP_CMP_GE:
+ rval = (oldval >= cmparg);
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ return (rval);
+}
+
+static int
+futex_wake_op(memid_t *memid, caddr_t addr2, memid_t *memid2,
+ int wake_threads, int wake_threads2, int val3)
+{
+ kmutex_t *l1, *l2;
+ int ret = 0, ret2 = 0, wake;
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(memid2);
+
+ if (index1 == index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = &futex_hash_lock[index2];
+ } else {
+ l1 = &futex_hash_lock[index2];
+ l2 = &futex_hash_lock[index1];
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ /* LINTED: alignment */
+ if ((wake = futex_wake_op_execute((int32_t *)addr2, val3)) < 0)
+ goto out;
+
+ for (fwp = futex_hash[index1]; fwp; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ if (++ret >= wake_threads) {
+ break;
+ }
+ }
+
+ if (!wake)
+ goto out;
+
+ for (fwp = futex_hash[index2]; fwp; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid2))
+ continue;
+
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ if (++ret2 >= wake_threads2) {
+ break;
+ }
+ }
+
+ ret += ret2;
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ return (ret);
+}
+
+/*
+ * Wake up to wake_threads waiting on the futex at memid. If there are
+ * more than that many threads waiting, requeue the remaining threads on
+ * the futex at requeue_memid.
+ */
+static int
+futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads,
+ ulong_t requeue_threads, caddr_t addr, int *cmpval)
+{
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+ int ret = 0;
+ int32_t curval;
+ kmutex_t *l1, *l2;
+
+ /*
+ * To ensure that we don't miss a wakeup if the value of cmpval
+ * changes, we need to grab locks on both the original and new hash
+ * buckets. To avoid deadlock, we always grab the lower-indexed
+ * lock first.
+ */
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(requeue_memid);
+
+ if (index1 == index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = &futex_hash_lock[index2];
+ } else {
+ l1 = &futex_hash_lock[index2];
+ l2 = &futex_hash_lock[index1];
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ if (cmpval != NULL) {
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
+ for (fwp = futex_hash[index1]; fwp; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ if (ret++ < wake_threads) {
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ } else {
+ MEMID_COPY(requeue_memid, &fwp->fw_memid);
+ futex_hashin(fwp);
+
+ if ((ret - wake_threads) >= requeue_threads)
+ break;
+ }
+ }
+
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ if (ret < 0)
+ return (set_errno(-ret));
+ return (ret);
+}
+
+/*
+ * Copy in the relative timeout provided by the application and convert it
+ * to an absolute timeout.
+ */
+static int
+get_timeout(void *lx_timeout, timestruc_t *timeout)
+{
+ timestruc_t now;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(lx_timeout, timeout, sizeof (timestruc_t)))
+ return (EFAULT);
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ timestruc32_t timeout32;
+ if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t)))
+ return (EFAULT);
+ timeout->tv_sec = (time_t)timeout32.tv_sec;
+ timeout->tv_nsec = timeout32.tv_nsec;
+ }
+#endif
+ gethrestime(&now);
+
+ if (itimerspecfix(timeout))
+ return (EINVAL);
+
+ timespecadd(timeout, &now);
+ return (0);
+}
+
+long
+lx_futex(uintptr_t addr, int op, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val3)
+{
+ struct as *as = curproc->p_as;
+ memid_t memid, memid2;
+ timestruc_t timeout;
+ timestruc_t *tptr = NULL;
+ int val2 = NULL;
+ int rval = 0;
+ int cmd = op & FUTEX_CMD_MASK;
+ int private = op & FUTEX_PRIVATE_FLAG;
+ char dmsg[32];
+
+ /* must be aligned on int boundary */
+ if (addr & 0x3)
+ return (set_errno(EINVAL));
+
+ /* Sanity check the futex command */
+ if (cmd < 0 || cmd > FUTEX_MAX_CMD)
+ return (set_errno(EINVAL));
+
+ if (cmd == FUTEX_FD) {
+ /*
+ * FUTEX_FD was sentenced to death for grievous crimes of
+ * semantics against humanity; it has been ripped out of Linux
+ * and will never be supported by us.
+ */
+ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
+ lx_unsupported(dmsg);
+ return (set_errno(ENOSYS));
+ }
+
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_UNLOCK_PI:
+ case FUTEX_TRYLOCK_PI:
+ case FUTEX_WAIT_BITSET:
+ case FUTEX_WAKE_BITSET:
+ case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_CMP_REQUEUE_PI:
+ /*
+ * These are operations that we don't currently support, but
+ * may well need to in the future. For now, callers need to
+ * deal with these being missing -- but if and as that changes,
+ * they may well need to be implemented.
+ */
+ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
+ lx_unsupported(dmsg);
+ return (set_errno(ENOSYS));
+ }
+
+ /* Copy in the timeout structure from userspace. */
+ if (cmd == FUTEX_WAIT && lx_timeout != NULL) {
+ rval = get_timeout((timespec_t *)lx_timeout, &timeout);
+ if (rval != 0)
+ return (set_errno(rval));
+ tptr = &timeout;
+ }
+
+ switch (cmd) {
+ case FUTEX_REQUEUE:
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_WAKE_OP:
+ /*
+ * lx_timeout is nominally a pointer to a userspace address.
+ * For several commands, however, it actually contains
+ * an additional interage parameter. This is horrible, and
+ * the people who did this to us should be sorry.
+ */
+ val2 = (int)lx_timeout;
+ }
+
+ /*
+ * Translate the process-specific, user-space futex virtual
+ * address(es) to a universal memid. If the private bit is set, we
+ * can just use our as plus the virtual address, saving quite a bit
+ * of effort.
+ */
+ if (private) {
+ memid.val[0] = (uintptr_t)as;
+ memid.val[1] = (uintptr_t)addr;
+ } else {
+ rval = as_getmemid(as, (void *)addr, &memid);
+ if (rval != 0)
+ return (set_errno(rval));
+ }
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_WAKE_OP) {
+ if (addr2 & 0x3)
+ return (set_errno(EINVAL));
+
+ if (private) {
+ memid2.val[0] = (uintptr_t)as;
+ memid2.val[1] = (uintptr_t)addr2;
+ } else {
+ rval = as_getmemid(as, (void *)addr2, &memid2);
+ if (rval)
+ return (set_errno(rval));
+ }
+ }
+
+ switch (cmd) {
+ case FUTEX_WAIT:
+ rval = futex_wait(&memid, (void *)addr, val, tptr);
+ break;
+
+ case FUTEX_WAKE:
+ rval = futex_wake(&memid, val);
+ break;
+
+ case FUTEX_WAKE_OP:
+ rval = futex_wake_op(&memid, (void *)addr2, &memid2,
+ val, val2, val3);
+ break;
+
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_REQUEUE:
+ rval = futex_requeue(&memid, &memid2, val,
+ val2, (void *)addr2, &val3);
+
+ break;
+ }
+
+ return (rval);
+}
+
+void
+lx_futex_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++)
+ mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
+ bzero(futex_hash, sizeof (futex_hash));
+}
+
+int
+lx_futex_fini(void)
+{
+ int i, err;
+
+ err = 0;
+ for (i = 0; (err == 0) && (i < HASH_SIZE); i++) {
+ mutex_enter(&futex_hash_lock[i]);
+ if (futex_hash[i] != NULL)
+ err = EBUSY;
+ mutex_exit(&futex_hash_lock[i]);
+ }
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
new file mode 100644
index 0000000000..aa8b2b40e1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+/*
+ * return the pid
+ */
+long
+lx_getpid()
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) {
+ rv = 1;
+ } else {
+ ASSERT(lwpd != NULL);
+ rv = lwpd->br_tgid;
+ }
+
+ return (rv);
+}
+
+/*
+ * return the parent pid
+ */
+long
+lx_getppid(void)
+{
+ return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL));
+}
+
+/*
+ * return the thread id
+ */
+long
+lx_gettid(void)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c
new file mode 100644
index 0000000000..5ca18b7556
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+
+typedef ushort_t l_uid16_t;
+typedef ushort_t l_gid16_t;
+typedef uint_t l_uid_t;
+typedef uint_t l_gid_t;
+
+#define LINUX_UID16_TO_UID32(uid16) \
+ (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16))
+
+#define LINUX_GID16_TO_GID32(gid16) \
+ (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16))
+
+#define LX_NGROUPS_MAX 32
+extern int setgroups(int, gid_t *);
+
+/*
+ * This function is based on setreuid in common/syscall/uid.c and exists
+ * because Solaris does not have a way to explicitly set the saved uid (suid)
+ * from any other system call.
+ */
+long
+lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ int uidchge = 0;
+ uid_t oldruid = ruid;
+ cred_t *cr, *newcr;
+ zoneid_t zoneid = getzoneid();
+
+ if ((ruid != -1 && (ruid > MAXUID)) ||
+ (euid != -1 && (euid > MAXUID)) ||
+ (suid != -1 && (suid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+
+retry:
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (ruid != -1 &&
+ ruid != cr->cr_ruid && ruid != cr->cr_uid &&
+ ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) {
+ error = EPERM;
+ } else if (euid != -1 &&
+ euid != cr->cr_ruid && euid != cr->cr_uid &&
+ euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) {
+ error = EPERM;
+ } else if (suid != -1 &&
+ suid != cr->cr_ruid && suid != cr->cr_uid &&
+ suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) {
+ error = EPERM;
+ } else {
+ if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) {
+ /*
+ * The ruid of the process is going to change. In order
+ * to avoid a race condition involving the
+ * process count associated with the newly given ruid,
+ * we increment the count before assigning the
+ * credential to the process.
+ * To do that, we'll have to take pidlock, so we first
+ * release p_crlock.
+ */
+ mutex_exit(&p->p_crlock);
+ uidchge = 1;
+ mutex_enter(&pidlock);
+ upcount_inc(ruid, zoneid);
+ mutex_exit(&pidlock);
+ /*
+ * As we released p_crlock we can't rely on the cr
+ * we read. So retry the whole thing.
+ */
+ goto retry;
+ }
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (euid != -1)
+ newcr->cr_uid = euid;
+ if (suid != -1)
+ newcr->cr_suid = suid;
+ if (ruid != -1) {
+ oldruid = newcr->cr_ruid;
+ newcr->cr_ruid = ruid;
+ ASSERT(ruid != oldruid ? uidchge : 1);
+ }
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_uid != newcr->cr_uid ||
+ cr->cr_ruid != newcr->cr_ruid ||
+ cr->cr_suid != newcr->cr_suid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ /*
+ * We decrement the number of processes associated with the oldruid
+ * to match the increment above, even if the ruid of the process
+ * did not change or an error occurred (oldruid == uid).
+ */
+ if (uidchge) {
+ ASSERT(oldruid != -1 && ruid != -1);
+ mutex_enter(&pidlock);
+ upcount_dec(oldruid, zoneid);
+ mutex_exit(&pidlock);
+ }
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16)
+{
+ long rval;
+
+ rval = lx_setresuid(
+ LINUX_UID16_TO_UID32(ruid16),
+ LINUX_UID16_TO_UID32(euid16),
+ LINUX_UID16_TO_UID32(suid16));
+
+ return (rval);
+}
+
+/*
+ * This function is based on setregid in common/syscall/gid.c
+ */
+long
+lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ cred_t *cr, *newcr;
+
+ if ((rgid != -1 && (rgid > MAXUID)) ||
+ (egid != -1 && (egid > MAXUID)) ||
+ (sgid != -1 && (sgid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (rgid != -1 &&
+ rgid != cr->cr_rgid && rgid != cr->cr_gid &&
+ rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (egid != -1 &&
+ egid != cr->cr_rgid && egid != cr->cr_gid &&
+ egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (sgid != -1 &&
+ sgid != cr->cr_rgid && sgid != cr->cr_gid &&
+ sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else {
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (egid != -1)
+ newcr->cr_gid = egid;
+ if (sgid != -1)
+ newcr->cr_sgid = sgid;
+ if (rgid != -1)
+ newcr->cr_rgid = rgid;
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_gid != newcr->cr_gid ||
+ cr->cr_rgid != newcr->cr_rgid ||
+ cr->cr_sgid != newcr->cr_sgid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16)
+{
+ long rval;
+
+ rval = lx_setresgid(
+ LINUX_GID16_TO_GID32(rgid16),
+ LINUX_GID16_TO_GID32(egid16),
+ LINUX_GID16_TO_GID32(sgid16));
+
+ return (rval);
+}
+
+/*
+ * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ
+ * the terrible hack below so that tests may proceed, if only on DEBUG kernels.
+ */
+long
+lx_setgroups(int ngroups, gid_t *grouplist)
+{
+#ifdef DEBUG
+ if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX)
+ ngroups = ngroups_max;
+#endif /* DEBUG */
+
+ return (setgroups(ngroups, grouplist));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
new file mode 100644
index 0000000000..bf4689e578
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
@@ -0,0 +1,1158 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/termio.h>
+#include <sys/termios.h>
+#include <sys/ptyvar.h>
+#include <net/if.h>
+#include <sys/sockio.h>
+#include <sys/stropts.h>
+#include <sys/ptms.h>
+#include <sys/cred.h>
+#include <sys/cred_impl.h>
+#include <sys/sysmacros.h>
+#include <sys/lx_ptm.h>
+#include <sys/sunddi.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/session.h>
+#include <sys/kmem.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <net/if_arp.h>
+
+/*
+ * Supported ioctls
+ */
+#define LX_TCGETS 0x5401
+#define LX_TCSETS 0x5402
+#define LX_TCSETSW 0x5403
+#define LX_TCSETSF 0x5404
+#define LX_TCGETA 0x5405
+#define LX_TCSETA 0x5406
+#define LX_TCSETAW 0x5407
+#define LX_TCSETAF 0x5408
+#define LX_TCSBRK 0x5409
+#define LX_TCXONC 0x540a
+#define LX_TCFLSH 0x540b
+#define LX_TIOCEXCL 0x540c
+#define LX_TIOCNXCL 0x540d
+#define LX_TIOCSCTTY 0x540e
+#define LX_TIOCGPGRP 0x540f
+#define LX_TIOCSPGRP 0x5410
+#define LX_TIOCOUTQ 0x5411
+#define LX_TIOCSTI 0x5412
+#define LX_TIOCGWINSZ 0x5413
+#define LX_TIOCSWINSZ 0x5414
+#define LX_TIOCMGET 0x5415
+#define LX_TIOCMBIS 0x5416
+#define LX_TIOCMBIC 0x5417
+#define LX_TIOCMSET 0x5418
+#define LX_TIOCGSOFTCAR 0x5419
+#define LX_TIOCSSOFTCAR 0x541a
+#define LX_FIONREAD 0x541b
+#define LX_TIOCPKT 0x5420
+#define LX_FIONBIO 0x5421
+#define LX_TIOCNOTTY 0x5422
+#define LX_TIOCSETD 0x5423
+#define LX_TIOCGETD 0x5424
+#define LX_TCSBRKP 0x5425
+#define LX_TIOCGSID 0x5429
+#define LX_TIOCGPTN 0x80045430
+#define LX_TIOCSPTLCK 0x40045431
+#define LX_FIONCLEX 0x5450
+#define LX_FIOCLEX 0x5451
+#define LX_FIOASYNC 0x5452
+#define LX_FIOSETOWN 0x8901
+#define LX_SIOCSPGRP 0x8902
+#define LX_FIOGETOWN 0x8903
+#define LX_SIOCGPGRP 0x8904
+#define LX_SIOCATMARK 0x8905
+#define LX_SIOCGIFCONF 0x8912
+#define LX_SIOCGIFFLAGS 0x8913
+#define LX_SIOCSIFFLAGS 0x8914
+#define LX_SIOCGIFADDR 0x8915
+#define LX_SIOCSIFADDR 0x8916
+#define LX_SIOCGIFDSTADDR 0x8917
+#define LX_SIOCSIFDSTADDR 0x8918
+#define LX_SIOCGIFBRDADDR 0x8919
+#define LX_SIOCSIFBRDADDR 0x891a
+#define LX_SIOCGIFNETMASK 0x891b
+#define LX_SIOCSIFNETMASK 0x891c
+#define LX_SIOCGIFMETRIC 0x891d
+#define LX_SIOCSIFMETRIC 0x891e
+#define LX_SIOCGIFMEM 0x891f
+#define LX_SIOCSIFMEM 0x8920
+#define LX_SIOCGIFMTU 0x8921
+#define LX_SIOCSIFMTU 0x8922
+#define LX_SIOCSIFHWADDR 0x8924
+#define LX_SIOCGIFHWADDR 0x8927
+
+#define FLUSER(fp) fp->f_flag | get_udatamodel()
+#define FLFAKE(fp) fp->f_flag | FKIOCTL
+
+/*
+ * LX_NCC must be different from LX_NCCS since while the termio and termios
+ * structures may look similar they are fundamentally different sizes and
+ * have different members.
+ */
+#define LX_NCC 8
+#define LX_NCCS 19
+
+struct lx_termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCC]; /* control characters */
+};
+
+struct lx_termios {
+ uint32_t c_iflag; /* input mode flags */
+ uint32_t c_oflag; /* output mode flags */
+ uint32_t c_cflag; /* control mode flags */
+ uint32_t c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCCS]; /* control characters */
+};
+
+/*
+ * c_cc characters which are valid for lx_termio and lx_termios
+ */
+#define LX_VINTR 0
+#define LX_VQUIT 1
+#define LX_VERASE 2
+#define LX_VKILL 3
+#define LX_VEOF 4
+#define LX_VTIME 5
+#define LX_VMIN 6
+#define LX_VSWTC 7
+
+/*
+ * c_cc characters which are valid for lx_termios
+ */
+#define LX_VSTART 8
+#define LX_VSTOP 9
+#define LX_VSUSP 10
+#define LX_VEOL 11
+#define LX_VREPRINT 12
+#define LX_VDISCARD 13
+#define LX_VWERASE 14
+#define LX_VLNEXT 15
+#define LX_VEOL2 16
+
+/* VSD key for lx_cc information */
+static uint_t lx_ioctl_vsd = 0;
+
+extern int lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid);
+
+
+/* Terminal helpers */
+
+static void
+l2s_termios(struct lx_termios *l_tios, struct termios *s_tios)
+{
+ ASSERT((l_tios != NULL) && (s_tios != NULL));
+
+ bzero(s_tios, sizeof (*s_tios));
+
+ s_tios->c_iflag = l_tios->c_iflag;
+ s_tios->c_oflag = l_tios->c_oflag;
+ s_tios->c_cflag = l_tios->c_cflag;
+ s_tios->c_lflag = l_tios->c_lflag;
+
+ if (s_tios->c_lflag & ICANON) {
+ s_tios->c_cc[VEOF] = l_tios->c_cc[LX_VEOF];
+ s_tios->c_cc[VEOL] = l_tios->c_cc[LX_VEOL];
+ } else {
+ s_tios->c_cc[VMIN] = l_tios->c_cc[LX_VMIN];
+ s_tios->c_cc[VTIME] = l_tios->c_cc[LX_VTIME];
+ }
+
+ s_tios->c_cc[VEOL2] = l_tios->c_cc[LX_VEOL2];
+ s_tios->c_cc[VERASE] = l_tios->c_cc[LX_VERASE];
+ s_tios->c_cc[VKILL] = l_tios->c_cc[LX_VKILL];
+ s_tios->c_cc[VREPRINT] = l_tios->c_cc[LX_VREPRINT];
+ s_tios->c_cc[VLNEXT] = l_tios->c_cc[LX_VLNEXT];
+ s_tios->c_cc[VWERASE] = l_tios->c_cc[LX_VWERASE];
+ s_tios->c_cc[VINTR] = l_tios->c_cc[LX_VINTR];
+ s_tios->c_cc[VQUIT] = l_tios->c_cc[LX_VQUIT];
+ s_tios->c_cc[VSWTCH] = l_tios->c_cc[LX_VSWTC];
+ s_tios->c_cc[VSTART] = l_tios->c_cc[LX_VSTART];
+ s_tios->c_cc[VSTOP] = l_tios->c_cc[LX_VSTOP];
+ s_tios->c_cc[VSUSP] = l_tios->c_cc[LX_VSUSP];
+ s_tios->c_cc[VDISCARD] = l_tios->c_cc[LX_VDISCARD];
+}
+
+static void
+l2s_termio(struct lx_termio *l_tio, struct termio *s_tio)
+{
+ ASSERT((l_tio != NULL) && (s_tio != NULL));
+
+ bzero(s_tio, sizeof (*s_tio));
+
+ s_tio->c_iflag = l_tio->c_iflag;
+ s_tio->c_oflag = l_tio->c_oflag;
+ s_tio->c_cflag = l_tio->c_cflag;
+ s_tio->c_lflag = l_tio->c_lflag;
+
+ if (s_tio->c_lflag & ICANON) {
+ s_tio->c_cc[VEOF] = l_tio->c_cc[LX_VEOF];
+ } else {
+ s_tio->c_cc[VMIN] = l_tio->c_cc[LX_VMIN];
+ s_tio->c_cc[VTIME] = l_tio->c_cc[LX_VTIME];
+ }
+
+ s_tio->c_cc[VINTR] = l_tio->c_cc[LX_VINTR];
+ s_tio->c_cc[VQUIT] = l_tio->c_cc[LX_VQUIT];
+ s_tio->c_cc[VERASE] = l_tio->c_cc[LX_VERASE];
+ s_tio->c_cc[VKILL] = l_tio->c_cc[LX_VKILL];
+ s_tio->c_cc[VSWTCH] = l_tio->c_cc[LX_VSWTC];
+}
+
+static void
+termios2lx_cc(struct lx_termios *l_tios, struct lx_cc *lio)
+{
+ ASSERT((l_tios != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tios->c_cc[LX_VEOF];
+ lio->veol = l_tios->c_cc[LX_VEOL];
+ lio->vmin = l_tios->c_cc[LX_VMIN];
+ lio->vtime = l_tios->c_cc[LX_VTIME];
+}
+
+static void
+termio2lx_cc(struct lx_termio *l_tio, struct lx_cc *lio)
+{
+ ASSERT((l_tio != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tio->c_cc[LX_VEOF];
+ lio->veol = 0;
+ lio->vmin = l_tio->c_cc[LX_VMIN];
+ lio->vtime = l_tio->c_cc[LX_VTIME];
+}
+
+static void
+s2l_termios(struct termios *s_tios, struct lx_termios *l_tios)
+{
+ ASSERT((s_tios != NULL) && (l_tios != NULL));
+
+ bzero(l_tios, sizeof (*l_tios));
+
+ l_tios->c_iflag = s_tios->c_iflag;
+ l_tios->c_oflag = s_tios->c_oflag;
+ l_tios->c_cflag = s_tios->c_cflag;
+ l_tios->c_lflag = s_tios->c_lflag;
+
+ if (s_tios->c_lflag & ICANON) {
+ l_tios->c_cc[LX_VEOF] = s_tios->c_cc[VEOF];
+ l_tios->c_cc[LX_VEOL] = s_tios->c_cc[VEOL];
+ } else {
+ l_tios->c_cc[LX_VMIN] = s_tios->c_cc[VMIN];
+ l_tios->c_cc[LX_VTIME] = s_tios->c_cc[VTIME];
+ }
+
+ l_tios->c_cc[LX_VEOL2] = s_tios->c_cc[VEOL2];
+ l_tios->c_cc[LX_VERASE] = s_tios->c_cc[VERASE];
+ l_tios->c_cc[LX_VKILL] = s_tios->c_cc[VKILL];
+ l_tios->c_cc[LX_VREPRINT] = s_tios->c_cc[VREPRINT];
+ l_tios->c_cc[LX_VLNEXT] = s_tios->c_cc[VLNEXT];
+ l_tios->c_cc[LX_VWERASE] = s_tios->c_cc[VWERASE];
+ l_tios->c_cc[LX_VINTR] = s_tios->c_cc[VINTR];
+ l_tios->c_cc[LX_VQUIT] = s_tios->c_cc[VQUIT];
+ l_tios->c_cc[LX_VSWTC] = s_tios->c_cc[VSWTCH];
+ l_tios->c_cc[LX_VSTART] = s_tios->c_cc[VSTART];
+ l_tios->c_cc[LX_VSTOP] = s_tios->c_cc[VSTOP];
+ l_tios->c_cc[LX_VSUSP] = s_tios->c_cc[VSUSP];
+ l_tios->c_cc[LX_VDISCARD] = s_tios->c_cc[VDISCARD];
+}
+
+static void
+s2l_termio(struct termio *s_tio, struct lx_termio *l_tio)
+{
+ ASSERT((s_tio != NULL) && (l_tio != NULL));
+
+ bzero(l_tio, sizeof (*l_tio));
+
+ l_tio->c_iflag = s_tio->c_iflag;
+ l_tio->c_oflag = s_tio->c_oflag;
+ l_tio->c_cflag = s_tio->c_cflag;
+ l_tio->c_lflag = s_tio->c_lflag;
+
+ if (s_tio->c_lflag & ICANON) {
+ l_tio->c_cc[LX_VEOF] = s_tio->c_cc[VEOF];
+ } else {
+ l_tio->c_cc[LX_VMIN] = s_tio->c_cc[VMIN];
+ l_tio->c_cc[LX_VTIME] = s_tio->c_cc[VTIME];
+ }
+
+ l_tio->c_cc[LX_VINTR] = s_tio->c_cc[VINTR];
+ l_tio->c_cc[LX_VQUIT] = s_tio->c_cc[VQUIT];
+ l_tio->c_cc[LX_VERASE] = s_tio->c_cc[VERASE];
+ l_tio->c_cc[LX_VKILL] = s_tio->c_cc[VKILL];
+ l_tio->c_cc[LX_VSWTC] = s_tio->c_cc[VSWTCH];
+}
+
+static void
+set_lx_cc(vnode_t *vp, struct lx_cc *lio)
+{
+ struct lx_cc *cur;
+ /*
+ * Linux expects that the termio/termios control characters are
+ * preserved more strictly than illumos supports. In order to preserve
+ * the illusion that the characters are maintained, they are stored as
+ * vnode-specific data.
+ */
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (struct lx_cc *)vsd_get(vp, lx_ioctl_vsd);
+ if (cur == NULL) {
+ cur = kmem_alloc(sizeof (struct lx_cc), KM_SLEEP);
+ bcopy(lio, cur, sizeof (struct lx_cc));
+ (void) vsd_set(vp, lx_ioctl_vsd, cur);
+ } else {
+ bcopy(lio, cur, sizeof (struct lx_cc));
+ }
+ mutex_exit(&vp->v_vsd_lock);
+}
+
+static int
+get_lx_cc(vnode_t *vp, struct lx_cc *lio)
+{
+ struct lx_cc *cur;
+ int rv = 1;
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (struct lx_cc *)vsd_get(vp, lx_ioctl_vsd);
+ if (cur != NULL) {
+ bcopy(cur, lio, sizeof (*lio));
+ rv = 0;
+ }
+ mutex_exit(&vp->v_vsd_lock);
+ return (rv);
+}
+
+/* Socket helpers */
+
+static void
+ifname_l2s(char *ifname)
+{
+ if (strncmp(ifname, "lo", IFNAMSIZ) == 0)
+ (void) strlcpy(ifname, "lo0", IFNAMSIZ);
+}
+
+static void
+ifname_s2l(char *ifname)
+{
+ if (strncmp(ifname, "lo0", IFNAMSIZ) == 0)
+ (void) strlcpy(ifname, "lo", IFNAMSIZ);
+}
+
+typedef struct lx_ifreq32 {
+ char ifr_name[IFNAMSIZ];
+ union {
+ struct sockaddr ifru_addr;
+ };
+} lx_ifreq32_t;
+
+typedef struct lx_ifreq64 {
+ char ifr_name[IFNAMSIZ];
+ union {
+ struct sockaddr ifru_addr;
+ /* pad this out to the Linux size */
+ uint64_t ifmap[3];
+ };
+} lx_ifreq64_t;
+
+typedef struct lx_ifconf32 {
+ int32_t if_len;
+ caddr32_t if_buf;
+} lx_ifconf32_t;
+
+typedef struct lx_ifconf64 {
+ int32_t if_len;
+ caddr_t if_buf;
+} lx_ifconf64_t;
+
+/* Linux ARP protocol hardware identifiers */
+#define LX_ARPHRD_ETHER 1 /* Ethernet */
+#define LX_ARPHRD_LOOPBACK 772 /* Loopback */
+#define LX_ARPHRD_VOID 0xffff /* Unknown */
+
+
+/* Generic translators */
+
+static int
+ict_pass(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int error = 0;
+ int rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_fionbio(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp;
+ int32_t iflag, flags;
+ int error;
+
+ if (copyin((caddr_t)arg, &iflag, sizeof (iflag)))
+ return (set_errno(EFAULT));
+
+ mutex_enter(&fp->f_tlock);
+ vp = fp->f_vnode;
+ flags = fp->f_flag;
+ /* Linux sets NONBLOCK instead of FIONBIO */
+ if (iflag)
+ flags |= FNONBLOCK;
+ else
+ flags &= ~FNONBLOCK;
+ /* push the flag down */
+ error = VOP_SETFL(vp, fp->f_flag, flags, fp->f_cred, NULL);
+ fp->f_flag = flags;
+ mutex_exit(&fp->f_tlock);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_fionread(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp;
+ struct vattr vattr;
+ int error = 0;
+ int rv;
+ /*
+ * offset is int32_t because that is what FIONREAD is defined in terms
+ * of. We cap at INT_MAX as in other cases for this ioctl.
+ */
+ int32_t offset;
+
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG || vp->v_type == VDIR) {
+ vattr.va_mask = AT_SIZE;
+ error = VOP_GETATTR(vp, &vattr, 0, fp->f_cred, NULL);
+ if (error != 0)
+ return (set_errno(error));
+ offset = MIN(vattr.va_size - fp->f_offset, INT_MAX);
+ if (copyout(&offset, (caddr_t)arg, sizeof (offset)))
+ return (set_errno(EFAULT));
+ } else {
+ error = VOP_IOCTL(vp, FIONREAD, arg, FLUSER(fp), fp->f_cred,
+ &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+/* Terminal-related translators */
+
+static int
+ict_tcsets(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int error, rv;
+
+ ASSERT(cmd == TCSETS || cmd == TCSETSW || cmd == TCSETSF);
+
+ if (copyin((struct lx_termios *)arg, &l_tios, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+ termios2lx_cc(&l_tios, &lio);
+ l2s_termios(&l_tios, &s_tios);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tios,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ /* preserve lx_cc */
+ set_lx_cc(fp->f_vnode, &lio);
+
+ return (0);
+}
+
+static int
+ict_tcseta(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termio l_tio;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int error, rv;
+
+ ASSERT(cmd == TCSETA || cmd == TCSETAW || cmd == TCSETAF);
+
+ if (copyin((struct lx_termio *)arg, &l_tio, sizeof (l_tio)) != 0)
+ return (set_errno(EFAULT));
+ l2s_termio(&l_tio, &s_tio);
+ termio2lx_cc(&l_tio, &lio);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tio,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ /* preserve lx_cc */
+ set_lx_cc(fp->f_vnode, &lio);
+
+ return (0);
+}
+
+static int
+ict_tcgets_ptm(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios, *s_tiosd;
+ uint_t s_tiosl;
+
+ /* get termios defaults */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&s_tiosd,
+ &s_tiosl) != DDI_SUCCESS)
+ return (EIO);
+ ASSERT(s_tiosl == sizeof (*s_tiosd));
+ bcopy(s_tiosd, &s_tios, sizeof (s_tios));
+ ddi_prop_free(s_tiosd);
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+ if (copyout(&l_tios, (struct lx_termios *)arg, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tcgets_native(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tios,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+
+ /* return preserved lx_cc */
+ if (get_lx_cc(fp->f_vnode, &lio) == 0) {
+ l_tios.c_cc[LX_VEOF] = lio.veof;
+ l_tios.c_cc[LX_VEOL] = lio.veol;
+ l_tios.c_cc[LX_VMIN] = lio.vmin;
+ l_tios.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ if (copyout(&l_tios, (struct lx_termios *)arg, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tcgets(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ if (getmajor(fp->f_vnode->v_rdev) == ddi_name_to_major(LX_PTM_DRV))
+ return (ict_tcgets_ptm(fp, cmd, arg, lxcmd));
+ else
+ return (ict_tcgets_native(fp, cmd, arg, lxcmd));
+}
+
+static int
+ict_tcgeta(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termio l_tio;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tio,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+
+ s2l_termio(&s_tio, &l_tio);
+ /* return preserved lx_cc */
+ if (get_lx_cc(fp->f_vnode, &lio) == 0) {
+ l_tio.c_cc[LX_VEOF] = lio.veof;
+ l_tio.c_cc[LX_VMIN] = lio.vmin;
+ l_tio.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ if (copyout(&l_tio, (struct lx_termios *)arg, sizeof (l_tio)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tiocspgrp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t lpid, spid, tid;
+ int error, rv;
+
+ /* Converting to the illumos pid is necessary */
+ if (copyin((pid_t *)arg, &lpid, sizeof (lpid)) < 0)
+ return (set_errno(EFAULT));
+ if (lx_lpid_to_spair(lpid, &spid, &tid) < 0)
+ return (set_errno(EPERM));
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&spid,
+ fp->f_flag |FKIOCTL, fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_tcsbrkp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int rv, error;
+ /* use null duration to emulate TCSBRKP */
+ int dur = 0;
+ error = VOP_IOCTL(fp->f_vnode, TCSBRK, (intptr_t)&dur,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_tiocgpgrp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t ttysid, mysid;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, TIOCGSID, (intptr_t)&ttysid,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0)
+ return (set_errno(error));
+
+ mutex_enter(&curproc->p_splock);
+ if (ttysid != curproc->p_sessp->s_sid) {
+ mutex_exit(&curproc->p_splock);
+ return (set_errno(ENOTTY));
+ }
+ mutex_exit(&curproc->p_splock);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp),
+ fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_sptlock(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct strioctl istr;
+ int error, rv;
+
+ istr.ic_cmd = UNLKPT;
+ istr.ic_len = 0;
+ istr.ic_timout = 0;
+ istr.ic_dp = NULL;
+ error = VOP_IOCTL(fp->f_vnode, I_STR, (intptr_t)&istr,
+ fp->f_flag |FKIOCTL, fp->f_cred, &rv, NULL);
+ /*
+ * The success/fail return values are different between Linux
+ * and illumos. Linux expects 0 or -1. Illumos can return
+ * positive number on success.
+ */
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_gptn(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct strioctl istr;
+ cred_t *cr;
+ pt_own_t pto;
+ int error, rv;
+ int ptyno;
+
+ /* This operation is only valid for the lx_ptm device. */
+ if (getmajor(fp->f_vnode->v_rdev) != ddi_name_to_major(LX_PTM_DRV))
+ return (set_errno(ENOTTY));
+
+ cr = CRED();
+ pto.pto_ruid = cr->cr_uid;
+ pto.pto_rgid = cr->cr_gid;
+
+ istr.ic_cmd = OWNERPT;
+ istr.ic_len = sizeof (pto);
+ istr.ic_timout = 0;
+ istr.ic_dp = (char *)&pto;
+ error = VOP_IOCTL(fp->f_vnode, I_STR, (intptr_t)&istr,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+
+ if (error)
+ return (set_errno((error == ENOTTY) ? error: EACCES));
+
+ ptyno = getminor(fp->f_vnode->v_rdev) - 1;
+ if (copyout(&ptyno, (caddr_t)arg, sizeof (ptyno)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tiocgwinsz(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int error, rv;
+ struct winsize winsize;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+
+ if (error == EINVAL) {
+ bzero(&winsize, sizeof (winsize));
+ if (copyout(&winsize, (caddr_t)arg, sizeof (winsize)))
+ return (set_errno(EFAULT));
+ } else if (error != 0) {
+ return (set_errno(error));
+ }
+
+ return (0);
+}
+
+static int
+ict_tiocsctty(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t ttysid, mysid;
+ int error, rv;
+ proc_t *p = curproc;
+
+ /* getsid */
+ mutex_enter(&p->p_splock);
+ mysid = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
+
+ error = VOP_IOCTL(fp->f_vnode, TIOCGSID, (intptr_t)&ttysid,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0)
+ return (set_errno(error));
+ else if (ttysid == mysid)
+ return (0);
+
+ /*
+ * Need to make sure we're a session leader, otherwise the
+ * TIOCSCTTY ioctl will fail.
+ */
+ mutex_enter(&pidlock);
+ if (p->p_sessp->s_sidp != p->p_pidp && !pgmembers(p->p_pid)) {
+ mutex_exit(&pidlock);
+ sess_create();
+ } else {
+ mutex_exit(&pidlock);
+ }
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, 0, FLUSER(fp),
+ fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+/* Socket-related translators */
+
+static int
+ict_siocatmark(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp = fp->f_vnode;
+ int error, rv;
+ /*
+ * Linux expects a SIOCATMARK of a UDP socket to return ENOTTY, while
+ * Illumos allows it. Linux prior to 2.6.39 returned EINVAL for this.
+ */
+ if (vp->v_type != VSOCK || VTOSO(vp)->so_type != SOCK_STREAM)
+ return (set_errno(ENOTTY));
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+ if (error)
+ return (set_errno(error));
+
+ return (0);
+}
+
+static int
+ict_siocifhwaddr(file_t *fp, int cmd, struct ifreq *req)
+{
+ struct arpreq arpreq;
+ int error, rv;
+
+ /*
+ * We're not going to support SIOCSIFHWADDR, but we need to be able to
+ * check the result of the copyin first to see if the command should
+ * have returned EFAULT.
+ */
+ if (cmd == LX_SIOCSIFHWADDR)
+ return (set_errno(EINVAL));
+
+ if (strcmp(req->ifr_name, "lo0") == 0) {
+ /* Abuse ifr_addr for linux ifr_hwaddr */
+ bzero(&req->ifr_addr, sizeof (struct sockaddr));
+ req->ifr_addr.sa_family = LX_ARPHRD_LOOPBACK;
+ return (0);
+ }
+
+ error = VOP_IOCTL(fp->f_vnode, SIOCGIFADDR, (intptr_t)req,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ /* set_errno will be performed by ict_sioifreq */
+ if (error != 0)
+ return (error);
+
+ bzero(&arpreq, sizeof (arpreq));
+ bcopy(&req->ifr_addr, &arpreq.arp_pa, sizeof (struct sockaddr));
+
+ error = VOP_IOCTL(fp->f_vnode, SIOCGARP, (intptr_t)&arpreq,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0)
+ return (error);
+
+ /* Abuse ifr_addr for linux ifr_hwaddr */
+ bcopy(&arpreq.arp_ha, &req->ifr_addr, sizeof (struct sockaddr));
+ /* consider all non-loopback as ethernet for now */
+ req->ifr_addr.sa_family = LX_ARPHRD_ETHER;
+
+ return (0);
+}
+
+static int
+ict_sioifreq(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct ifreq req;
+ int error, rv, len;
+
+ /* The Linux ifreq is smaller than illumos */
+ if (curproc->p_model == DATAMODEL_LP64)
+ len = sizeof (lx_ifreq64_t);
+ else
+ len = sizeof (lx_ifreq32_t);
+ bzero(&req, sizeof (req));
+
+ if (copyin((struct ifreq *)arg, &req, len) != 0)
+ return (set_errno(EFAULT));
+ ifname_l2s(req.ifr_name);
+
+ switch (cmd) {
+ case SIOCGIFFLAGS:
+ case SIOCSIFFLAGS:
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ case SIOCGIFMETRIC:
+ case SIOCSIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCSIFMTU:
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&req,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ break;
+ case LX_SIOCGIFHWADDR:
+ case LX_SIOCSIFHWADDR:
+ error = ict_siocifhwaddr(fp, cmd, &req);
+ break;
+ default:
+ error = EINVAL;
+ }
+
+ if (error != 0)
+ return (set_errno(error));
+
+ ifname_s2l(req.ifr_name);
+ if (copyout(&req, (struct ifreq *)arg, len) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_siocgifconf32(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_ifconf32_t conf;
+ lx_ifreq32_t *oreq;
+ struct ifconf sconf;
+ int ifcount, error, rv, i, buf_len;
+
+ if (copyin((lx_ifconf32_t *)arg, &conf, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+
+ /* They want to know how many interfaces there are. */
+ if (conf.if_len <= 0 || conf.if_buf == NULL) {
+ error = VOP_IOCTL(fp->f_vnode, SIOCGIFNUM, (intptr_t)&ifcount,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0)
+ return (set_errno(error));
+
+ conf.if_len = ifcount * sizeof (lx_ifreq32_t);
+
+ if (copyout(&conf, (lx_ifconf32_t *)arg, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+ } else {
+ ifcount = conf.if_len / sizeof (lx_ifreq32_t);
+ }
+
+ /* Get interface configuration list. */
+ sconf.ifc_len = ifcount * sizeof (struct ifreq);
+ sconf.ifc_req = (struct ifreq *)kmem_alloc(sconf.ifc_len, KM_SLEEP);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&sconf,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0) {
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+ return (set_errno(error));
+ }
+
+ /* Convert data to Linux format & rename interfaces */
+ buf_len = ifcount * sizeof (lx_ifreq32_t);
+ oreq = (lx_ifreq32_t *)kmem_alloc(buf_len, KM_SLEEP);
+ for (i = 0; i < sconf.ifc_len / sizeof (struct ifreq); i++) {
+ bcopy(&sconf.ifc_req[i], oreq + i, sizeof (lx_ifreq32_t));
+ ifname_s2l(oreq[i].ifr_name);
+ }
+ conf.if_len = i * sizeof (*oreq);
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+
+ rv = 0;
+ if (copyout(oreq, (caddr_t)(uintptr_t)conf.if_buf, conf.if_len) != 0 ||
+ copyout(&conf, (lx_ifconf32_t *)arg, sizeof (conf)) != 0)
+ rv = set_errno(EFAULT);
+
+ kmem_free(oreq, buf_len);
+ return (rv);
+}
+
+static int
+ict_siocgifconf64(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_ifconf64_t conf;
+ lx_ifreq64_t *oreq;
+ struct ifconf sconf;
+ int ifcount, error, rv, i, buf_len;
+
+ if (copyin((lx_ifconf64_t *)arg, &conf, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+
+ /* They want to know how many interfaces there are. */
+ if (conf.if_len <= 0 || conf.if_buf == NULL) {
+ error = VOP_IOCTL(fp->f_vnode, SIOCGIFNUM, (intptr_t)&ifcount,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0)
+ return (set_errno(error));
+
+ conf.if_len = ifcount * sizeof (lx_ifreq64_t);
+
+ if (copyout(&conf, (lx_ifconf64_t *)arg, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+ } else {
+ ifcount = conf.if_len / sizeof (lx_ifreq64_t);
+ }
+
+ /* Get interface configuration list. */
+ sconf.ifc_len = ifcount * sizeof (struct ifreq);
+ sconf.ifc_req = (struct ifreq *)kmem_alloc(sconf.ifc_len, KM_SLEEP);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&sconf,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error != 0) {
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+ return (set_errno(error));
+ }
+
+ /* Convert data to Linux format & rename interfaces */
+ buf_len = ifcount * sizeof (lx_ifreq64_t);
+ oreq = (lx_ifreq64_t *)kmem_alloc(buf_len, KM_SLEEP);
+ for (i = 0; i < sconf.ifc_len / sizeof (struct ifreq); i++) {
+ bcopy(&sconf.ifc_req[i], oreq + i, sizeof (lx_ifreq64_t));
+ ifname_s2l(oreq[i].ifr_name);
+ }
+ conf.if_len = i * sizeof (*oreq);
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+
+ rv = 0;
+ if (copyout(oreq, (caddr_t)(uintptr_t)conf.if_buf, conf.if_len) != 0 ||
+ copyout(&conf, (lx_ifconf64_t *)arg, sizeof (conf)) != 0)
+ rv = set_errno(EFAULT);
+
+ kmem_free(oreq, buf_len);
+ return (rv);
+}
+
+static int
+ict_siocgifconf(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ if (curproc->p_model == DATAMODEL_LP64)
+ return (ict_siocgifconf64(fp, cmd, arg, lxcmd));
+ else
+ return (ict_siocgifconf32(fp, cmd, arg, lxcmd));
+}
+
+/* Structure used to define an ioctl translator. */
+typedef struct ioc_cmd_translator {
+ int ict_lxcmd;
+ int ict_cmd;
+ int (*ict_func)(file_t *fp, int cmd, intptr_t arg, int lxcmd);
+} ioc_cmd_translator_t;
+
+#define IOC_CMD_TRANSLATOR_PASS(ioc_cmd_sym) \
+ { (int)LX_##ioc_cmd_sym, (int)ioc_cmd_sym, ict_pass },
+
+#define IOC_CMD_TRANSLATOR_FILTER(ioc_cmd_sym, ioct_handler) \
+ { (int)LX_##ioc_cmd_sym, (int)ioc_cmd_sym, ioct_handler },
+
+#define IOC_CMD_TRANSLATOR_CUSTOM(ioc_cmd_sym, ioct_handler) \
+ { (int)ioc_cmd_sym, (int)ioc_cmd_sym, ioct_handler },
+
+#define IOC_CMD_TRANSLATOR_END \
+ {0, 0, NULL}
+
+static ioc_cmd_translator_t ioc_translators[] = {
+ IOC_CMD_TRANSLATOR_FILTER(FIONBIO, ict_fionbio)
+ IOC_CMD_TRANSLATOR_FILTER(FIONREAD, ict_fionread)
+ IOC_CMD_TRANSLATOR_PASS(FIOGETOWN)
+ IOC_CMD_TRANSLATOR_PASS(FIOASYNC)
+
+ /* streams related */
+ IOC_CMD_TRANSLATOR_PASS(TCXONC)
+ IOC_CMD_TRANSLATOR_PASS(TCFLSH)
+ IOC_CMD_TRANSLATOR_PASS(TIOCEXCL)
+ IOC_CMD_TRANSLATOR_PASS(TIOCNXCL)
+ IOC_CMD_TRANSLATOR_PASS(TIOCSTI)
+ IOC_CMD_TRANSLATOR_PASS(TIOCSWINSZ)
+ IOC_CMD_TRANSLATOR_PASS(TIOCMBIS)
+ IOC_CMD_TRANSLATOR_PASS(TIOCMBIC)
+ IOC_CMD_TRANSLATOR_PASS(TIOCMSET)
+ IOC_CMD_TRANSLATOR_PASS(TIOCSETD)
+ IOC_CMD_TRANSLATOR_PASS(TCSBRK)
+
+ /* terminal related */
+ IOC_CMD_TRANSLATOR_PASS(TIOCGETD)
+ IOC_CMD_TRANSLATOR_PASS(TIOCGSID)
+ IOC_CMD_TRANSLATOR_PASS(TIOCNOTTY)
+ IOC_CMD_TRANSLATOR_PASS(TIOCPKT)
+
+ IOC_CMD_TRANSLATOR_FILTER(TCSETS, ict_tcsets)
+ IOC_CMD_TRANSLATOR_FILTER(TCSETSW, ict_tcsets)
+ IOC_CMD_TRANSLATOR_FILTER(TCSETSF, ict_tcsets)
+ IOC_CMD_TRANSLATOR_FILTER(TCSETA, ict_tcseta)
+ IOC_CMD_TRANSLATOR_FILTER(TCSETAW, ict_tcseta)
+ IOC_CMD_TRANSLATOR_FILTER(TCSETAF, ict_tcseta)
+ IOC_CMD_TRANSLATOR_FILTER(TCGETS, ict_tcgets)
+ IOC_CMD_TRANSLATOR_FILTER(TCGETA, ict_tcgeta)
+ IOC_CMD_TRANSLATOR_FILTER(TIOCGWINSZ, ict_tiocgwinsz)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCSBRKP, ict_tcsbrkp)
+ IOC_CMD_TRANSLATOR_FILTER(TIOCSPGRP, ict_tiocspgrp)
+ IOC_CMD_TRANSLATOR_FILTER(TIOCGPGRP, ict_tiocgpgrp)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSPTLCK, ict_sptlock)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPTN, ict_gptn)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty)
+
+ /* socket related */
+ IOC_CMD_TRANSLATOR_PASS(SIOCSPGRP)
+ IOC_CMD_TRANSLATOR_PASS(SIOCGPGRP)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCATMARK, ict_siocatmark)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFFLAGS, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFFLAGS, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFDSTADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFDSTADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFBRDADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFBRDADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFNETMASK, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFNETMASK, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMETRIC, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMETRIC, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMTU, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMTU, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFHWADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCSIFHWADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFCONF, ict_siocgifconf)
+
+ IOC_CMD_TRANSLATOR_END
+};
+
+static void
+lx_ioctl_vsd_free(void *data)
+{
+ kmem_free(data, sizeof (struct lx_cc));
+}
+
+void
+lx_ioctl_init()
+{
+ vsd_create(&lx_ioctl_vsd, lx_ioctl_vsd_free);
+}
+
+void
+lx_ioctl_fini()
+{
+ vsd_destroy(&lx_ioctl_vsd);
+}
+
+int
+lx_ioctl(int fdes, int cmd, intptr_t arg)
+{
+ file_t *fp;
+ int res = 0;
+ ioc_cmd_translator_t *ict;
+
+ if (cmd == LX_FIOCLEX || cmd == LX_FIONCLEX) {
+ res = f_setfd_error(fdes, (cmd == LX_FIOCLEX) ? FD_CLOEXEC : 0);
+ return ((res != 0) ? set_errno(res) : 0);
+ }
+
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+
+ /*
+ * Today, none of the ioctls supported by the emulation possess
+ * overlapping cmd values. Because of that, no type interrogation of
+ * the fd is done before executing specific ioctl emulation. It's
+ * assumed that the vnode-specific logic called by the emulation
+ * function will reject ioctl commands not supported by the fd.
+ */
+ ict = ioc_translators;
+ while (ict->ict_func != NULL) {
+ if (ict->ict_lxcmd == cmd)
+ break;
+ ict++;
+ }
+
+ if (ict->ict_func != NULL) {
+ res = ict->ict_func(fp, ict->ict_cmd, arg, ict->ict_lxcmd);
+ } else {
+ res = set_errno(EINVAL);
+ }
+
+ releasef(fdes);
+ return (res);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
new file mode 100644
index 0000000000..e20e906d33
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
@@ -0,0 +1,399 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/thread.h>
+#include <sys/signal.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <lx_signum.h>
+#include <sys/contract/process_impl.h>
+
+extern int kill(pid_t, int);
+
+/*
+ * Check if it is legal to send this signal to the init process. Linux
+ * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid
+ * 1.
+ */
+static int
+init_sig_check(int sig, pid_t pid)
+{
+ proc_t *p;
+ int rv = 0;
+
+ mutex_enter(&pidlock);
+
+ if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL))
+ rv = ESRCH;
+ else if (sig && (sigismember(&cantmask, sig) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_DFL) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_IGN)))
+ rv = EPERM;
+
+ mutex_exit(&pidlock);
+
+ return (rv);
+}
+
+static long
+lx_thrkill(pid_t tgid, pid_t pid, int lx_sig, boolean_t tgkill)
+{
+ kthread_t *t;
+ proc_t *pp;
+ pid_t initpid;
+ sigqueue_t *sqp;
+ struct lx_lwp_data *br = ttolxlwp(curthread);
+ int tid = 1; /* default tid */
+ int sig, rv;
+
+ /*
+ * Unlike kill(2), Linux tkill(2) doesn't allow signals to
+ * be sent to process IDs <= 0 as it doesn't overlay any special
+ * semantics on the pid.
+ */
+ if ((pid <= 0) || ((lx_sig < 0) || (lx_sig > LX_NSIG)) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * If the Linux pid is 1, translate the pid to the actual init
+ * pid for the zone. Note that Linux dictates that no unhandled
+ * signals may be sent to init, so check for that, too.
+ *
+ * Otherwise, extract the tid and real pid from the Linux pid.
+ */
+ initpid = curproc->p_zone->zone_proc_initpid;
+ if (pid == 1)
+ pid = initpid;
+ if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0))
+ return (set_errno(rv));
+ else if (lx_lpid_to_spair(pid, &pid, &tid) < 0)
+ return (set_errno(ESRCH));
+
+ if (tgkill && tgid != pid)
+ return (set_errno(ESRCH));
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+
+ /*
+ * Find the process for the passed pid...
+ */
+ mutex_enter(&pidlock);
+ if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) {
+ mutex_exit(&pidlock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+ mutex_enter(&pp->p_lock);
+ mutex_exit(&pidlock);
+
+ /*
+ * Deny permission to send the signal if either of the following
+ * is true:
+ *
+ * + The signal is SIGCONT and the target pid is not in the same
+ * session as the sender
+ *
+ * + prochasprocperm() shows the user lacks sufficient permission
+ * to send the signal to the target pid
+ */
+ if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) ||
+ (!prochasprocperm(pp, curproc, CRED()))) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(EPERM);
+ goto free_and_exit;
+ }
+
+ /* check for the tid */
+ if ((t = idtot(pp, tid)) == NULL) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+
+ /* a signal of 0 means just check for the existence of the thread */
+ if (lx_sig == 0) {
+ mutex_exit(&pp->p_lock);
+ rv = 0;
+ goto free_and_exit;
+ }
+
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = SI_LWP;
+ sqp->sq_info.si_pid = br->br_pid;
+ sqp->sq_info.si_zoneid = getzoneid();
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(pp, t, sqp);
+
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+
+free_and_exit:
+ kmem_free(sqp, sizeof (sigqueue_t));
+ return (rv);
+}
+
+long
+lx_tgkill(pid_t tgid, pid_t pid, int lx_sig)
+{
+ return (lx_thrkill(tgid, pid, lx_sig, B_TRUE));
+}
+
+long
+lx_tkill(pid_t pid, int lx_sig)
+{
+ return (lx_thrkill(0, pid, lx_sig, B_FALSE));
+}
+
+long
+lx_kill(pid_t lx_pid, int lx_sig)
+{
+ pid_t s_pid, initpid;
+ sigsend_t v;
+ zone_t *zone = curproc->p_zone;
+ struct proc *p;
+ int err, sig, nfound;
+
+ if ((lx_sig < 0) || (lx_sig > LX_NSIG) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * Since some linux apps rely on init(1M) having PID 1, we
+ * transparently translate 1 to the real init(1M)'s pid. We then
+ * check to be sure that it is legal for this process to send this
+ * signal to init(1M).
+ */
+ initpid = zone->zone_proc_initpid;
+ if (lx_pid == 1 || lx_pid == -1) {
+ s_pid = initpid;
+ } else if (lx_pid == 0) {
+ s_pid = 0;
+ } else if (lx_pid > 0) {
+ if (lx_lpid_to_spair(lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid that means it doesn't
+ * exist in this zone.
+ */
+ return (set_errno(ESRCH));
+ }
+ } else {
+ ASSERT(lx_pid < 0);
+ if (lx_lpid_to_spair(-lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid it means that the
+ * process group leader doesn't exist in this zone.
+ * In this case assuming that the Linux pid is
+ * the same as the Solaris pid will get us the
+ * correct behavior.
+ */
+ s_pid = -lx_pid;
+ }
+ }
+
+ if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0))
+ return (set_errno(err));
+
+ /*
+ * For individual processes, kill() semantics are the same between
+ * Solaris and Linux.
+ */
+ if (lx_pid >= 0)
+ return (kill(s_pid, sig));
+
+ /*
+ * In Solaris, sending a signal to -pid means "send a signal to
+ * everyone in process group pid." In Linux it means "send a
+ * signal to everyone in the group other than init." Sending a
+ * signal to -1 means "send a signal to every process except init
+ * and myself."
+ */
+
+ bzero(&v, sizeof (v));
+ v.sig = sig;
+ v.checkperm = 1;
+ v.sicode = SI_USER;
+ err = 0;
+
+ mutex_enter(&pidlock);
+
+ p = (lx_pid == -1) ? practive : pgfind(s_pid);
+ nfound = 0;
+ while (err == 0 && p != NULL) {
+ if ((p->p_zone == zone) && (p->p_stat != SIDL) &&
+ (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) {
+ nfound++;
+ err = sigsendproc(p, &v);
+ }
+
+ p = (lx_pid == -1) ? p->p_next : p->p_pglink;
+ }
+ mutex_exit(&pidlock);
+ if (nfound == 0)
+ err = ESRCH;
+ else if (err == 0 && v.perm == 0)
+ err = EPERM;
+ return (err ? set_errno(err) : 0);
+}
+
+/*
+ * This handles the unusual case where the user sends a non-queueable signal
+ * through rt_sigqueueinfo. Signals sent with codes that indicate they are
+ * queuable are sent through the sigqueue syscall via the user level function
+ * lx_rt_sigqueueinfo().
+ */
+long
+lx_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+ proc_t *target_proc;
+ pid_t s_pid;
+ zone_t *zone = curproc->p_zone;
+ sigsend_t send;
+ int err;
+ siginfo_t kinfo;
+
+ if (copyin(uinfo, &kinfo, sizeof (siginfo_t)) != 0)
+ return (set_errno(EFAULT));
+ /* Unlike in lx_kill, this process id must be exact, no negatives. */
+ if (tgid == 0)
+ return (set_errno(ESRCH));
+ if (tgid < 0)
+ return (set_errno(EINVAL));
+ /*
+ * Translate init directly, otherwise use the convenient utility
+ * function to translate. Since we're sending to the whole group, we
+ * only need the solaris pid, and not the lwp id.
+ */
+ if (tgid == 1) {
+ s_pid = zone->zone_proc_initpid;
+ } else {
+ if (lx_lpid_to_spair(tgid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid that means it doesn't
+ * exist in this zone.
+ */
+ return (set_errno(ESRCH));
+ }
+ }
+ /*
+ * We shouldn't have queuable signals here, those are sent elsewhere by
+ * the useland handler for this emulated call.
+ */
+ if (!SI_CANQUEUE(kinfo.si_code)) {
+ return (set_errno(EINVAL));
+ }
+ /* Since our signal shouldn't queue, we just call sigsendproc(). */
+ bzero(&send, sizeof (send));
+ send.sig = sig;
+ send.checkperm = 1;
+ send.sicode = kinfo.si_code;
+ send.value = kinfo.si_value;
+
+ mutex_enter(&pidlock);
+ target_proc = prfind(s_pid);
+ err = 0;
+ if (target_proc != NULL) {
+ err = sigsendproc(target_proc, &send);
+ if (err == 0 && send.perm == 0)
+ err = EPERM;
+ } else {
+ err = ESRCH;
+ }
+ mutex_exit(&pidlock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+/*
+ * Unlike the above function, this handles all system calls to rt_tgsigqueue
+ * regardless of si_code.
+ */
+long
+lx_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t *uinfo)
+{
+ id_t s_tid;
+ pid_t s_pid;
+ proc_t *target_proc;
+ sigqueue_t *sqp;
+ kthread_t *t;
+ siginfo_t kinfo;
+
+ if (copyin(uinfo, &kinfo, sizeof (siginfo_t)) != 0)
+ return (set_errno(EFAULT));
+ if (lx_lpid_to_spair(tid, &s_pid, &s_tid) != 0)
+ return (set_errno(ESRCH));
+ /*
+ * For group leaders, solaris pid == linux pid, so the solaris leader
+ * pid should be the same as the tgid.
+ */
+ ASSERT(s_pid == tgid);
+
+ mutex_enter(&pidlock);
+ target_proc = prfind(s_pid);
+ if (target_proc != NULL)
+ mutex_enter(&target_proc->p_lock);
+ mutex_exit(&pidlock);
+
+ if (target_proc == NULL) {
+ return (set_errno(ESRCH));
+ }
+ if (sig < 0 || sig >= NSIG)
+ return (set_errno(EINVAL));
+
+ /*
+ * Some code adapted from lwp_kill, duplicated here because we do some
+ * customization to the sq_info field of sqp.
+ */
+ if ((t = idtot(target_proc, s_tid)) == NULL) {
+ mutex_exit(&target_proc->p_lock);
+ return (set_errno(ESRCH));
+ }
+ /* Just checking for existence of the process, not sending a signal. */
+ if (sig == 0) {
+ mutex_exit(&target_proc->p_lock);
+ return (0);
+ }
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = kinfo.si_code;
+ sqp->sq_info.si_pid = target_proc->p_pid;
+ sqp->sq_info.si_ctid = PRCTID(target_proc);
+ sqp->sq_info.si_zoneid = getzoneid();
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(target_proc, t, sqp);
+ mutex_exit(&target_proc->p_lock);
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
new file mode 100644
index 0000000000..aa6e12a7d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/segments.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/sysi86.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_ldt.h>
+
+/*
+ * Read the ldt_info structure in from the Linux app, convert it to an ssd
+ * structure, and then call setdscr() to do all the heavy lifting.
+ */
+static int
+write_ldt(void *data, ulong_t count)
+{
+ user_desc_t usd;
+ struct ssd ssd;
+ struct ldt_info ldt_inf;
+ proc_t *pp = curthread->t_procp;
+ int err;
+
+ if (count != sizeof (ldt_inf))
+ return (set_errno(EINVAL));
+
+ if (copyin(data, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ if (ldt_inf.entry_number >= MAXNLDT)
+ return (set_errno(EINVAL));
+
+ LDT_INFO_TO_DESC(&ldt_inf, &usd);
+ usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number));
+
+ /*
+ * Get everyone into a safe state before changing the LDT.
+ */
+ if (!holdlwps(SHOLDFORK1))
+ return (set_errno(EINTR));
+
+ err = setdscr(&ssd);
+
+ /*
+ * Release the hounds!
+ */
+ mutex_enter(&pp->p_lock);
+ continuelwps(pp);
+ mutex_exit(&pp->p_lock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+static int
+read_ldt(void *uptr, ulong_t count)
+{
+ proc_t *pp = curproc;
+ int bytes;
+
+ if (pp->p_ldt == NULL)
+ return (0);
+
+ bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
+ if (bytes > count)
+ bytes = count;
+
+ if (copyout(pp->p_ldt, uptr, bytes))
+ return (set_errno(EFAULT));
+
+ return (bytes);
+}
+
+long
+lx_modify_ldt(int op, void *data, ulong_t count)
+{
+ int rval;
+
+ switch (op) {
+ case 0:
+ rval = read_ldt(data, count);
+ break;
+
+ case 1:
+ rval = write_ldt(data, count);
+ break;
+
+ default:
+ rval = set_errno(ENOSYS);
+ break;
+ }
+
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_pipe.c b/usr/src/uts/common/brand/lx/syscall/lx_pipe.c
new file mode 100644
index 0000000000..cef549141e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_pipe.c
@@ -0,0 +1,180 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. All Rights Reserved.
+ *
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cred.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <sys/fs/fifonode.h>
+#include <sys/fcntl.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+
+/*
+ * Based on native pipe(2) system call, except that the pipe is half-duplex.
+ */
+static int
+lx_hd_pipe(intptr_t arg, int flags)
+{
+ vnode_t *vp1, *vp2;
+ struct file *fp1, *fp2;
+ int error = 0;
+ int flag1, flag2, iflags;
+ int fd1, fd2;
+
+ /*
+ * Validate allowed flags.
+ */
+ if ((flags & ~(FCLOEXEC|FNONBLOCK)) != 0) {
+ return (set_errno(EINVAL));
+ }
+ /*
+ * Allocate and initialize two vnodes.
+ */
+ makepipe(&vp1, &vp2);
+
+ /*
+ * Allocate and initialize two file table entries and two
+ * file pointers. The first file pointer is open for read and the
+ * second is open for write.
+ */
+ if ((error = falloc(vp1, FREAD, &fp1, &fd1)) != 0) {
+ VN_RELE(vp1);
+ VN_RELE(vp2);
+ return (set_errno(error));
+ }
+
+ if ((error = falloc(vp2, FWRITE, &fp2, &fd2)) != 0)
+ goto out2;
+
+ /*
+ * Create two stream heads and attach to each vnode.
+ */
+ if ((error = fifo_stropen(&vp1, FREAD, fp1->f_cred, 0, 0)) != 0)
+ goto out;
+
+ if ((error = fifo_stropen(&vp2, FWRITE, fp2->f_cred, 0, 0)) != 0) {
+ (void) VOP_CLOSE(vp1, FREAD, 1, (offset_t)0,
+ fp1->f_cred, NULL);
+ goto out;
+ }
+
+ strmate(vp1, vp2);
+
+ VTOF(vp1)->fn_ino = VTOF(vp2)->fn_ino = fifogetid();
+
+ /*
+ * Set the O_NONBLOCK flag if requested.
+ */
+ if (flags & FNONBLOCK) {
+ flag1 = fp1->f_flag;
+ flag2 = fp2->f_flag;
+ iflags = flags & FNONBLOCK;
+
+ if ((error = VOP_SETFL(vp1, flag1, iflags, fp1->f_cred,
+ NULL)) != 0) {
+ goto out_vop_close;
+ }
+ fp1->f_flag |= iflags;
+
+ if ((error = VOP_SETFL(vp2, flag2, iflags, fp2->f_cred,
+ NULL)) != 0) {
+ goto out_vop_close;
+ }
+ fp2->f_flag |= iflags;
+ }
+
+ /*
+ * Return the file descriptors to the user. They now
+ * point to two different vnodes which have different
+ * stream heads.
+ */
+ if (copyout(&fd1, &((int *)arg)[0], sizeof (int)) ||
+ copyout(&fd2, &((int *)arg)[1], sizeof (int))) {
+ error = EFAULT;
+ goto out_vop_close;
+ }
+
+ /*
+ * Now fill in the entries that falloc reserved
+ */
+ mutex_exit(&fp1->f_tlock);
+ mutex_exit(&fp2->f_tlock);
+ setf(fd1, fp1);
+ setf(fd2, fp2);
+
+ /*
+ * Optionally set the FCLOEXEC flag
+ */
+ if ((flags & FCLOEXEC) != 0) {
+ f_setfd(fd1, FD_CLOEXEC);
+ f_setfd(fd2, FD_CLOEXEC);
+ }
+
+ return (0);
+out_vop_close:
+ (void) VOP_CLOSE(vp1, FREAD, 1, (offset_t)0, fp1->f_cred, NULL);
+ (void) VOP_CLOSE(vp2, FWRITE, 1, (offset_t)0, fp2->f_cred, NULL);
+out:
+ setf(fd2, NULL);
+ unfalloc(fp2);
+out2:
+ setf(fd1, NULL);
+ unfalloc(fp1);
+ VN_RELE(vp1);
+ VN_RELE(vp2);
+ return (set_errno(error));
+}
+
+/*
+ * pipe(2) system call.
+ */
+long
+lx_pipe(intptr_t arg)
+{
+ return (lx_hd_pipe(arg, 0));
+}
+
+/*
+ * pipe2(2) system call.
+ */
+long
+lx_pipe2(intptr_t arg, int flags)
+{
+ return (lx_hd_pipe(arg, flags));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_rw.c b/usr/src/uts/common/brand/lx/syscall/lx_rw.c
new file mode 100644
index 0000000000..57cc3e54d0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_rw.c
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+
+/* uts/common/syscall/rw.c */
+extern ssize_t read(int fdes, void *cbuf, size_t count);
+
+ssize_t
+lx_read(int fd, void *buf, size_t nbyte)
+{
+ file_t *fp;
+ vtype_t t;
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+ t = fp->f_vnode->v_type;
+ releasef(fd);
+
+ if (t == VDIR)
+ return (set_errno(EISDIR));
+
+ return (read(fd, buf, nbyte));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
new file mode 100644
index 0000000000..bb91a752d2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -0,0 +1,513 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/processor.h>
+#include <sys/brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_sched.h>
+#include <sys/lx_brand.h>
+
+extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+
+int
+lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp,
+ int64_t *rval)
+{
+ pid_t s_pid;
+ id_t s_tid;
+ kthread_t *t = curthread;
+ lx_lwp_data_t *lx_lwp;
+
+ if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK)
+ return (set_errno(EINVAL));
+
+ /*
+ * The caller wants to know how large the mask should be.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len == 0) {
+ *rval = sizeof (lx_affmask_t);
+ return (0);
+ }
+
+ /*
+ * Otherwise, ensure they have a large enough mask.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) {
+ *rval = -1;
+ return (set_errno(EINVAL));
+ }
+
+ if (pid == 0) {
+ s_pid = curproc->p_pid;
+ s_tid = curthread->t_tid;
+ } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) {
+ return (set_errno(ESRCH));
+ }
+
+ /*
+ * For now, we only support manipulating threads in the
+ * same process.
+ */
+ if (curproc->p_pid != s_pid)
+ return (set_errno(EPERM));
+
+ /*
+ * We must hold the process lock so that the thread list
+ * doesn't change while we're looking at it. We'll hold
+ * the lock until we no longer reference the
+ * corresponding lwp.
+ */
+
+ mutex_enter(&curproc->p_lock);
+
+ do {
+ if (t->t_tid == s_tid)
+ break;
+ t = t->t_forw;
+ } while (t != curthread);
+
+ /*
+ * If the given PID is in the current thread's process,
+ * then we _must_ find it in the process's thread list.
+ */
+ ASSERT(t->t_tid == s_tid);
+
+ lx_lwp = t->t_lwp->lwp_brand;
+
+ if (cmd == B_SET_AFFINITY_MASK) {
+ if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = 0;
+ } else {
+ if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = sizeof (lx_affmask_t);
+ }
+
+ mutex_exit(&curproc->p_lock);
+ return (0);
+}
+
+long
+lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ if (policy < 0) {
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+ }
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getscheduler(l_pid_t pid)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int policy;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ policy = set_errno(EINVAL);
+
+ return (policy);
+}
+
+long
+lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int policy;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_sched_param local_param;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ tsinfo_t *tsi;
+ int prio, scale;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ bzero(&local_param, sizeof (local_param));
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+ /*
+ * I don't know if we need to do this, coz it can't be
+ * changed from zero anyway.....
+ */
+ tsi = (tsinfo_t *)pcinfo.pc_clinfo;
+ prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+ scale = tsi->ts_maxupri;
+ if (scale == 0)
+ local_param.lx_sched_prio = 0;
+ else
+ local_param.lx_sched_prio = -(prio * 20) / scale;
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ local_param.lx_sched_prio =
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+ else
+ rv = set_errno(EINVAL);
+
+ if (rv == 0)
+ if (copyout(&local_param, param, sizeof (local_param)))
+ return (set_errno(EFAULT));
+
+ return (rv);
+}
+
+long
+lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct timespec interval;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
+ bzero(&pcinfo, sizeof (pcinfo));
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (pcparm.pc_cid == pcinfo.pc_cid &&
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+ interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+ interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+
+ if (copyout(&interval, ival, sizeof (interval)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
+
+ return (set_errno(EINVAL));
+}
+
+int
+sched_setprocset(procset_t *procset, l_pid_t pid)
+{
+ id_t lid, rid;
+ idtype_t lidtype, ridtype;
+
+ /*
+ * define the target lwp
+ */
+ if (pid == 0) {
+ ridtype = P_ALL;
+ lidtype = P_PID;
+ rid = 0;
+ lid = P_MYID;
+ } else {
+ if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+ return (set_errno(ESRCH));
+ if (pid != curproc->p_pid)
+ return (set_errno(ESRCH));
+ rid = 0;
+ ridtype = P_ALL;
+ lidtype = P_LWPID;
+ }
+ setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
+
+ return (0);
+}
+
+long
+do_priocntlsys(int cmd, procset_t *procset, void *arg)
+{
+ return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
+ UIO_SYSSPACE));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
new file mode 100644
index 0000000000..6151656cf0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
@@ -0,0 +1,140 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <vm/anon.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+
+struct lx_sysinfo {
+ int64_t si_uptime; /* Seconds since boot */
+ uint64_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint64_t si_totalram; /* Total memory size */
+ uint64_t si_freeram; /* Available memory */
+ uint64_t si_sharedram; /* Shared memory */
+ uint64_t si_bufferram; /* Buffer memory */
+ uint64_t si_totalswap; /* Total swap space */
+ uint64_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint16_t si_pad; /* Padding */
+ uint64_t si_totalhigh; /* High memory size */
+ uint64_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+};
+
+extern pgcnt_t swapfs_minfree;
+
+long
+lx_sysinfo(struct lx_sysinfo *sip)
+{
+ struct lx_sysinfo si;
+ zone_t *zone = curthread->t_procp->p_zone;
+ uint64_t zphysmem, zfreemem, ztotswap, zfreeswap;
+
+ si.si_uptime = gethrestime_sec() - zone->zone_boot_time;
+
+ si.si_loads[0] = zone->zone_hp_avenrun[0];
+ si.si_loads[1] = zone->zone_hp_avenrun[1];
+ si.si_loads[2] = zone->zone_hp_avenrun[2];
+
+ /*
+ * In linux each thread looks like a process, so we conflate the
+ * two in this stat as well.
+ */
+ si.si_procs = (int32_t)zone->zone_nlwps;
+
+ /*
+ * If memory or swap limits are set on the zone, use those, otherwise
+ * use the system values. physmem and freemem are in pages, but the
+ * zone values are in bytes. Likewise, ani_max and ani_free are in
+ * pages.
+ */
+ if (zone->zone_phys_mem_ctl == UINT64_MAX) {
+ zphysmem = physmem;
+ zfreemem = freemem;
+ } else {
+ zphysmem = btop(zone->zone_phys_mem_ctl);
+ zfreemem = btop(zone->zone_phys_mem_ctl - zone->zone_phys_mem);
+ }
+
+ if (zone->zone_max_swap_ctl == UINT64_MAX) {
+ ztotswap = k_anoninfo.ani_max;
+ zfreeswap = k_anoninfo.ani_free;
+ } else {
+ /*
+ * See the comment in swapctl for a description of how free is
+ * calculated within a zone.
+ */
+ rctl_qty_t used;
+ spgcnt_t avail;
+ uint64_t max;
+
+ avail = MAX((spgcnt_t)(availrmem - swapfs_minfree), 0);
+ max = k_anoninfo.ani_max + k_anoninfo.ani_mem_resv + avail;
+
+ mutex_enter(&zone->zone_mem_lock);
+ ztotswap = btop(zone->zone_max_swap_ctl);
+ used = btop(zone->zone_max_swap);
+ mutex_exit(&zone->zone_mem_lock);
+
+ zfreeswap = MIN(ztotswap, max) - used;
+ }
+
+ /*
+ * If the maximum memory stat is less than 1^20 pages (i.e. 4GB),
+ * then we report the result in bytes. Otherwise we use pages.
+ * Once we start supporting >1TB systems/zones, we'll need a third
+ * option.
+ */
+ if (MAX(zphysmem, ztotswap) < 1024 * 1024) {
+ si.si_totalram = ptob(zphysmem);
+ si.si_freeram = ptob(zfreemem);
+ si.si_totalswap = ptob(ztotswap);
+ si.si_freeswap = ptob(zfreeswap);
+ si.si_mem_unit = 1;
+ } else {
+ si.si_totalram = zphysmem;
+ si.si_freeram = zfreemem;
+ si.si_totalswap = ztotswap;
+ si.si_freeswap = zfreeswap;
+ si.si_mem_unit = PAGESIZE;
+ }
+ si.si_bufferram = 0;
+ si.si_sharedram = 0;
+
+ /*
+ * These two stats refer to high physical memory. If an
+ * application running in a Linux zone cares about this, then
+ * either it or we are broken.
+ */
+ si.si_totalhigh = 0;
+ si.si_freehigh = 0;
+
+ if (copyout(&si, sip, sizeof (si)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
new file mode 100644
index 0000000000..3d72c0baa2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+
+/* For arch_prctl(2) */
+#define LX_ARCH_SET_GS 0x1001
+#define LX_ARCH_SET_FS 0x1002
+#define LX_ARCH_GET_FS 0x1003
+#define LX_ARCH_GET_GS 0x1004
+
+long
+lx_arch_prctl(int code, ulong_t addr)
+{
+#if defined(__amd64)
+ struct lx_lwp_data *llwp = ttolxlwp(curthread);
+ pcb_t *pcb;
+
+
+ /* We currently only support [g|s]et_fs */
+ switch (code) {
+ case LX_ARCH_GET_FS:
+ if (copyout(&llwp->br_lx_fsbase, (void *)addr,
+ sizeof (llwp->br_lx_fsbase)))
+ return (set_errno(EFAULT));
+ break;
+ case LX_ARCH_SET_FS:
+ llwp->br_lx_fsbase = addr;
+ /*
+ * Save current native libc fsbase. Don't use rdmsr since the
+ * value might get changed before we get to this code. We
+ * use the value from the pcb which the native libc should
+ * have already setup via syslwp_private.
+ */
+ pcb = (pcb_t *)&curthread->t_lwp->lwp_pcb;
+ llwp->br_ntv_fsbase = pcb->pcb_fsbase;
+ break;
+ default:
+ return (set_errno(EINVAL));
+ }
+#endif
+
+ return (0);
+}
+
+long
+lx_get_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+
+ if (fuword32(&inf->entry_number, (uint32_t *)&entry))
+ return (set_errno(EFAULT));
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ /*
+ * convert the solaris ldt to the linux format expected by the
+ * caller
+ */
+ DESC_TO_LDT_INFO(dscrp, &ldt_inf);
+ ldt_inf.entry_number = entry;
+
+ if (copyout(&ldt_inf, inf, sizeof (struct ldt_info)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+long
+lx_set_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+ int i;
+
+ /* Check that casts for accessing the words in user_desc are valid */
+ ASSERT(sizeof (user_desc_t) == 8);
+
+ if (copyin(inf, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ entry = ldt_inf.entry_number;
+ if (entry == -1) {
+ /*
+ * Find an empty entry in the tls for this thread.
+ * The casts assume each user_desc_t entry is 8 bytes.
+ */
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++) {
+ if (((uint_t *)dscrp)[0] == 0 &&
+ ((uint_t *)dscrp)[1] == 0)
+ break;
+ }
+
+ if (i < LX_TLSNUM) {
+ /*
+ * found one
+ */
+ entry = i + GDT_TLSMIN;
+ if (suword32(&inf->entry_number, entry))
+ return (set_errno(EFAULT));
+ } else {
+ return (set_errno(ESRCH));
+ }
+ }
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ /*
+ * convert the linux ldt info to standard intel descriptor
+ */
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ if (LDT_INFO_EMPTY(&ldt_inf)) {
+ ((uint_t *)dscrp)[0] = 0;
+ ((uint_t *)dscrp)[1] = 0;
+ } else {
+ LDT_INFO_TO_DESC(&ldt_inf, dscrp);
+ }
+
+ /*
+ * update the gdt with the new descriptor
+ */
+ kpreempt_disable();
+
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++)
+ lx_set_gdt(GDT_TLSMIN + i, dscrp);
+
+ kpreempt_enable();
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
index d61928d578..317a9da895 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.c
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include <sys/errno.h>
@@ -48,7 +49,7 @@ void sn1_setbrand(proc_t *);
int sn1_getattr(zone_t *, int, void *, size_t *);
int sn1_setattr(zone_t *, int, void *, size_t);
int sn1_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void sn1_copy_procdata(proc_t *, proc_t *);
void sn1_proc_exit(struct proc *, klwp_t *);
void sn1_exec();
@@ -78,7 +79,12 @@ struct brand_ops sn1_brops = {
sn1_elfexec,
NULL,
NULL,
+ NULL,
NSIG,
+ NULL,
+ NULL,
+ NULL,
+ NULL
};
#ifdef sparc
@@ -94,9 +100,11 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
+ NULL,
sn1_brand_int91_callback,
sn1_brand_syscall_callback,
- sn1_brand_syscall32_callback
+ sn1_brand_syscall32_callback,
+ NULL
};
#else /* ! __amd64 */
@@ -104,7 +112,9 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
NULL,
+ NULL,
sn1_brand_syscall_callback,
+ NULL,
NULL
};
#endif /* __amd64 */
@@ -115,7 +125,8 @@ struct brand sn1_brand = {
BRAND_VER_1,
"sn1",
&sn1_brops,
- &sn1_mops
+ &sn1_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
@@ -151,7 +162,7 @@ sn1_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
/*ARGSUSED*/
int
sn1_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
{
int res;
@@ -225,7 +236,7 @@ sn1_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
{
return (brand_solaris_elfexec(vp, uap, args, idatap, level, execsz,
setid, exec_file, cred, brand_action, &sn1_brand, SN1_BRANDNAME,
- SN1_LIB, SN1_LIB32, SN1_LINKER, SN1_LINKER32));
+ SN1_LIB, SN1_LIB32));
}
int
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.h b/usr/src/uts/common/brand/sn1/sn1_brand.h
index b487745e21..fef9dc128b 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.h
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _SN1_BRAND_H
@@ -37,20 +38,14 @@ extern "C" {
#define SN1_VERSION SN1_VERSION_1
#define SN1_LIB_NAME "sn1_brand.so.1"
-#define SN1_LINKER_NAME "ld.so.1"
#define SN1_LIB32 BRAND_NATIVE_DIR "usr/lib/" SN1_LIB_NAME
-#define SN1_LINKER32 "/lib/" SN1_LINKER_NAME
-
#define SN1_LIB64 BRAND_NATIVE_DIR "usr/lib/64/" SN1_LIB_NAME
-#define SN1_LINKER64 "/lib/64/" SN1_LINKER_NAME
#if defined(_LP64)
#define SN1_LIB SN1_LIB64
-#define SN1_LINKER SN1_LINKER64
#else /* !_LP64 */
#define SN1_LIB SN1_LIB32
-#define SN1_LINKER SN1_LINKER32
#endif /* !_LP64 */
#if defined(_KERNEL)
diff --git a/usr/src/uts/common/brand/sngl/sngl_brand.c b/usr/src/uts/common/brand/sngl/sngl_brand.c
new file mode 100644
index 0000000000..043b1f4cc5
--- /dev/null
+++ b/usr/src/uts/common/brand/sngl/sngl_brand.c
@@ -0,0 +1,257 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/model.h>
+#include <sys/proc.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cmn_err.h>
+#include <sys/archsystm.h>
+#include <sys/pathname.h>
+#include <sys/sunddi.h>
+
+#include <sys/machbrand.h>
+#include <sys/brand.h>
+#include "sngl_brand.h"
+
+char *sngl_emulation_table = NULL;
+
+void sngl_init_brand_data(zone_t *);
+void sngl_free_brand_data(zone_t *);
+void sngl_setbrand(proc_t *);
+int sngl_getattr(zone_t *, int, void *, size_t *);
+int sngl_setattr(zone_t *, int, void *, size_t);
+int sngl_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+void sngl_copy_procdata(proc_t *, proc_t *);
+void sngl_proc_exit(struct proc *, klwp_t *);
+void sngl_exec();
+int sngl_initlwp(klwp_t *);
+void sngl_forklwp(klwp_t *, klwp_t *);
+void sngl_freelwp(klwp_t *);
+void sngl_lwpexit(klwp_t *);
+int sngl_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int);
+
+/* SNGL brand */
+struct brand_ops sngl_brops = {
+ sngl_init_brand_data,
+ sngl_free_brand_data,
+ sngl_brandsys,
+ sngl_setbrand,
+ sngl_getattr,
+ sngl_setattr,
+ sngl_copy_procdata,
+ sngl_proc_exit,
+ sngl_exec,
+ lwp_setrval,
+ sngl_initlwp,
+ sngl_forklwp,
+ sngl_freelwp,
+ sngl_lwpexit,
+ sngl_elfexec,
+ NULL,
+ NULL,
+ NULL,
+ NSIG,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+#ifdef __amd64
+
+struct brand_mach_ops sngl_mops = {
+ sngl_brand_sysenter_callback,
+ sngl_brand_int91_callback,
+ sngl_brand_syscall_callback,
+ sngl_brand_syscall32_callback
+};
+
+#else /* ! __amd64 */
+
+struct brand_mach_ops sngl_mops = {
+ sngl_brand_sysenter_callback,
+ NULL,
+ sngl_brand_syscall_callback,
+ NULL
+};
+#endif /* __amd64 */
+
+struct brand sngl_brand = {
+ BRAND_VER_1,
+ "sngl",
+ &sngl_brops,
+ &sngl_mops,
+ sizeof (brand_proc_data_t),
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, /* type of module */
+ "SNGL Brand", /* description of module */
+ &sngl_brand /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+sngl_setbrand(proc_t *p)
+{
+ brand_solaris_setbrand(p, &sngl_brand);
+}
+
+/*ARGSUSED*/
+int
+sngl_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+ return (EINVAL);
+}
+
+/*ARGSUSED*/
+int
+sngl_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+ return (EINVAL);
+}
+
+/*ARGSUSED*/
+int
+sngl_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+ int res;
+
+ *rval = 0;
+ res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &sngl_brand,
+ SNGL_VERSION);
+ if (res >= 0)
+ return (res);
+
+ return (EINVAL);
+}
+
+void
+sngl_copy_procdata(proc_t *child, proc_t *parent)
+{
+ brand_solaris_copy_procdata(child, parent, &sngl_brand);
+}
+
+void
+sngl_proc_exit(struct proc *p, klwp_t *l)
+{
+ brand_solaris_proc_exit(p, l, &sngl_brand);
+}
+
+void
+sngl_exec()
+{
+ brand_solaris_exec(&sngl_brand);
+}
+
+int
+sngl_initlwp(klwp_t *l)
+{
+ return (brand_solaris_initlwp(l, &sngl_brand));
+}
+
+void
+sngl_forklwp(klwp_t *p, klwp_t *c)
+{
+ brand_solaris_forklwp(p, c, &sngl_brand);
+}
+
+void
+sngl_freelwp(klwp_t *l)
+{
+ brand_solaris_freelwp(l, &sngl_brand);
+}
+
+void
+sngl_lwpexit(klwp_t *l)
+{
+ brand_solaris_lwpexit(l, &sngl_brand);
+}
+
+void
+sngl_free_brand_data(zone_t *zone)
+{
+}
+
+void
+sngl_init_brand_data(zone_t *zone)
+{
+}
+
+int
+sngl_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
+ int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
+ int brand_action)
+{
+ return (brand_solaris_elfexec(vp, uap, args, idatap, level, execsz,
+ setid, exec_file, cred, brand_action, &sngl_brand, SNGL_BRANDNAME,
+ SNGL_LIB, SNGL_LIB32));
+}
+
+int
+_init(void)
+{
+ int err;
+
+ /*
+ * Set up the table to interpose on open.
+ */
+ sngl_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
+ sngl_emulation_table[SYS_open] = 1; /* 5 */
+
+ err = mod_install(&modlinkage);
+ if (err) {
+ cmn_err(CE_WARN, "Couldn't install brand module");
+ kmem_free(sngl_emulation_table, NSYSCALL);
+ }
+
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (brand_solaris_fini(&sngl_emulation_table, &modlinkage,
+ &sngl_brand));
+}
diff --git a/usr/src/uts/common/brand/sngl/sngl_brand.h b/usr/src/uts/common/brand/sngl/sngl_brand.h
new file mode 100644
index 0000000000..7c6cbd5412
--- /dev/null
+++ b/usr/src/uts/common/brand/sngl/sngl_brand.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SNGL_BRAND_H
+#define _SNGL_BRAND_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/brand.h>
+
+#define SNGL_BRANDNAME "sngl"
+
+#define SNGL_VERSION 1
+
+#define SNGL_LIB_NAME "sngl_brand.so.1"
+#define SNGL_LIB32 "/system/usr/lib/" SNGL_LIB_NAME
+#define SNGL_LIB64 "/system/usr/lib/64/" SNGL_LIB_NAME
+
+#if defined(_LP64)
+#define SNGL_LIB SNGL_LIB64
+#else /* !_LP64 */
+#define SNGL_LIB SNGL_LIB32
+#endif /* !_LP64 */
+
+#if defined(_KERNEL)
+
+void sngl_brand_syscall_callback(void);
+void sngl_brand_sysenter_callback(void);
+
+#if defined(__amd64)
+void sngl_brand_syscall32_callback(void);
+void sngl_brand_int91_callback(void);
+#endif /* __amd64 */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SNGL_BRAND_H */
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c
index f24b864eef..448deffdd4 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.c
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/errno.h>
@@ -51,7 +52,7 @@ void s10_setbrand(proc_t *);
int s10_getattr(zone_t *, int, void *, size_t *);
int s10_setattr(zone_t *, int, void *, size_t);
int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void s10_copy_procdata(proc_t *, proc_t *);
void s10_proc_exit(struct proc *, klwp_t *);
void s10_exec();
@@ -83,7 +84,12 @@ struct brand_ops s10_brops = {
s10_elfexec,
s10_sigset_native_to_s10,
s10_sigset_s10_to_native,
+ NULL,
S10_NSIG,
+ NULL,
+ NULL,
+ NULL,
+ NULL
};
#ifdef sparc
@@ -99,9 +105,11 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
+ NULL,
s10_brand_int91_callback,
s10_brand_syscall_callback,
- s10_brand_syscall32_callback
+ s10_brand_syscall32_callback,
+ NULL
};
#else /* ! __amd64 */
@@ -109,7 +117,9 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
NULL,
+ NULL,
s10_brand_syscall_callback,
+ NULL,
NULL
};
#endif /* __amd64 */
@@ -120,7 +130,8 @@ struct brand s10_brand = {
BRAND_VER_1,
"solaris10",
&s10_brops,
- &s10_mops
+ &s10_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
@@ -252,7 +263,7 @@ s10_native(void *cmd, void *args)
/*ARGSUSED*/
int
s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
{
proc_t *p = curproc;
int res;
@@ -394,7 +405,7 @@ s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
{
return (brand_solaris_elfexec(vp, uap, args, idatap, level, execsz,
setid, exec_file, cred, brand_action, &s10_brand, S10_BRANDNAME,
- S10_LIB, S10_LIB32, S10_LINKER, S10_LINKER32));
+ S10_LIB, S10_LIB32));
}
void
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.h b/usr/src/uts/common/brand/solaris10/s10_brand.h
index 11f9853f48..ffef485e12 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.h
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _S10_BRAND_H
@@ -42,17 +43,12 @@ extern "C" {
#define S10_LINKER_NAME "ld.so.1"
#define S10_LIB32 BRAND_NATIVE_DIR "usr/lib/" S10_LIB_NAME
-#define S10_LINKER32 "/lib/" S10_LINKER_NAME
-
#define S10_LIB64 BRAND_NATIVE_DIR "usr/lib/64/" S10_LIB_NAME
-#define S10_LINKER64 "/lib/64/" S10_LINKER_NAME
#if defined(_LP64)
#define S10_LIB S10_LIB64
-#define S10_LINKER S10_LINKER64
#else /* !_LP64 */
#define S10_LIB S10_LIB32
-#define S10_LINKER S10_LINKER32
#endif /* !_LP64 */
/*
diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c
index 6b3ba51f31..a71be771fd 100644
--- a/usr/src/uts/common/conf/param.c
+++ b/usr/src/uts/common/conf/param.c
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
*/
@@ -559,8 +560,8 @@ char *isa_list = architecture;
static pgcnt_t original_physmem = 0;
#define MIN_DEFAULT_MAXUSERS 8u
-#define MAX_DEFAULT_MAXUSERS 2048u
-#define MAX_MAXUSERS 4096u
+#define MAX_DEFAULT_MAXUSERS 10000u
+#define MAX_MAXUSERS 20000u
void
param_preset(void)
@@ -572,7 +573,7 @@ void
param_calc(int platform_max_nprocs)
{
/*
- * Default to about one "user" per megabyte, taking into
+ * Default to about one "user" per 8MB, taking into
* account both physical and virtual constraints.
* Note: 2^20 is a meg; shifting right by (20 - PAGESHIFT)
* converts pages to megs without integer overflow.
@@ -586,8 +587,9 @@ param_calc(int platform_max_nprocs)
if (maxusers == 0) {
pgcnt_t physmegs = physmem >> (20 - PAGESHIFT);
pgcnt_t virtmegs = vmem_size(heap_arena, VMEM_FREE) >> 20;
- maxusers = MIN(MAX(MIN(physmegs, virtmegs),
- MIN_DEFAULT_MAXUSERS), MAX_DEFAULT_MAXUSERS);
+ maxusers = MIN(physmegs, virtmegs) >> 3; /* divide by 8 */
+ maxusers = MAX(maxusers, MIN_DEFAULT_MAXUSERS);
+ maxusers = MIN(maxusers, MAX_DEFAULT_MAXUSERS);
}
if (maxusers > MAX_MAXUSERS) {
maxusers = MAX_MAXUSERS;
diff --git a/usr/src/uts/common/crypto/api/kcf_random.c b/usr/src/uts/common/crypto/api/kcf_random.c
index fc77b8d78b..7766d8ba7a 100644
--- a/usr/src/uts/common/crypto/api/kcf_random.c
+++ b/usr/src/uts/common/crypto/api/kcf_random.c
@@ -69,6 +69,7 @@
#include <sys/cpuvar.h>
#include <sys/taskq.h>
#include <rng/fips_random.h>
+#include <sys/strlog.h>
#define RNDPOOLSIZE 1024 /* Pool size in bytes */
#define MINEXTRACTBYTES 20
@@ -932,7 +933,8 @@ rnd_handler(void *arg)
int len = 0;
if (!rng_prov_found && rng_ok_to_log) {
- cmn_err(CE_WARN, "No randomness provider enabled for "
+ (void) strlog(0, 0, 0, SL_NOTE,
+ "No randomness provider enabled for "
"/dev/random. Use cryptoadm(1M) to enable a provider.");
rng_ok_to_log = B_FALSE;
}
diff --git a/usr/src/uts/common/crypto/core/kcf_sched.c b/usr/src/uts/common/crypto/core/kcf_sched.c
index f461fe048c..8b2760b237 100644
--- a/usr/src/uts/common/crypto/core/kcf_sched.c
+++ b/usr/src/uts/common/crypto/core/kcf_sched.c
@@ -1027,9 +1027,9 @@ kcfpool_svc(void *arg)
case 0:
case -1:
/*
- * Woke up with no work to do. Check
- * if this thread should exit. We keep
- * at least kcf_minthreads.
+ * Woke up with no work to do. Check if we
+ * should lwp_exit() (which won't return). We
+ * keep at least kcf_minthreads.
*/
if (kcfpool->kp_threads > kcf_minthreads) {
KCF_ATOMIC_DECR(kcfpool->kp_threads);
diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c
index 1c5e1f79a9..3ecbf39393 100644
--- a/usr/src/uts/common/disp/cmt.c
+++ b/usr/src/uts/common/disp/cmt.c
@@ -201,13 +201,15 @@ pg_cmt_cpu_startup(cpu_t *cp)
/*
* Return non-zero if thread can migrate between "from" and "to"
- * without a performance penalty
+ * without a performance penalty. This is true only if we share a core on
+ * virtually any CPU; sharing the last-level cache is insufficient to make
+ * migration possible without penalty.
*/
int
pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
{
- if (from->cpu_physid->cpu_cacheid ==
- to->cpu_physid->cpu_cacheid)
+ if (from->cpu_physid->cpu_coreid ==
+ to->cpu_physid->cpu_coreid)
return (1);
return (0);
}
diff --git a/usr/src/uts/common/disp/cpucaps.c b/usr/src/uts/common/disp/cpucaps.c
index 46f53faab6..2a4365ff73 100644
--- a/usr/src/uts/common/disp/cpucaps.c
+++ b/usr/src/uts/common/disp/cpucaps.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
#include <sys/disp.h>
@@ -74,6 +75,32 @@
* Putting threads on wait queues in random places while running in the
* kernel might lead to all kinds of locking problems.
*
+ * Bursting
+ * ========
+ *
+ * CPU bursting occurs when the CPU usage is over the baseline but under the
+ * cap. The baseline CPU (zone.cpu-baseline) is set in a multi-tenant
+ * environment so that we know how much CPU is allocated for a tenant under
+ * normal utilization. We can then track how much time a zone is spending
+ * over the "normal" CPU utilization expected for that zone using the
+ * "above_base_sec" kstat. This kstat is cumulative.
+ *
+ * If the zone has a burst limit (zone.cpu-burst-time) then the zone can
+ * burst for that period of time (in seconds) before the effective cap is
+ * lowered to the baseline. Once the effective cap is lowered, the zone
+ * will run at the baseline for the burst limit before the effective cap is
+ * raised again to the full value. This will allow the zone to burst again.
+ * We can watch this behavior using the kstats. The "effective" kstat shows
+ * which cap is being used, the baseline value or the burst value. The
+ * "burst_limit_sec" shows the value of the zone.cpu-burst-time rctl and the
+ * "bursting_sec" kstat shows how many seconds the zone has currently been
+ * bursting. When the CPU load is continuously greater than the baseline,
+ * bursting_sec will increase, up to the burst_limit_sec value, then the
+ * effective kstat will drop to the baseline and the bursting_sec value will
+ * decrease until it hits 0, at which time the effective kstat will return to
+ * the full burst value and the bursting_sec value will begin to increase
+ * again.
+ *
* Accounting
* ==========
*
@@ -203,18 +230,28 @@ static void caps_update();
*/
struct cap_kstat {
kstat_named_t cap_value;
+ kstat_named_t cap_baseline;
+ kstat_named_t cap_effective;
+ kstat_named_t cap_burst_limit;
+ kstat_named_t cap_bursting;
kstat_named_t cap_usage;
kstat_named_t cap_nwait;
kstat_named_t cap_below;
kstat_named_t cap_above;
+ kstat_named_t cap_above_base;
kstat_named_t cap_maxusage;
kstat_named_t cap_zonename;
} cap_kstat = {
{ "value", KSTAT_DATA_UINT64 },
+ { "baseline", KSTAT_DATA_UINT64 },
+ { "effective", KSTAT_DATA_UINT64 },
+ { "burst_limit_sec", KSTAT_DATA_UINT64 },
+ { "bursting_sec", KSTAT_DATA_UINT64 },
{ "usage", KSTAT_DATA_UINT64 },
{ "nwait", KSTAT_DATA_UINT64 },
{ "below_sec", KSTAT_DATA_UINT64 },
{ "above_sec", KSTAT_DATA_UINT64 },
+ { "above_base_sec", KSTAT_DATA_UINT64 },
{ "maxusage", KSTAT_DATA_UINT64 },
{ "zonename", KSTAT_DATA_STRING },
};
@@ -311,7 +348,7 @@ cap_enable(list_t *l, cpucap_t *cap, hrtime_t value)
cap->cap_below = cap->cap_above = 0;
cap->cap_maxusage = 0;
cap->cap_usage = 0;
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
waitq_unblock(&cap->cap_waitq);
if (CPUCAPS_OFF()) {
cpucaps_enabled = B_TRUE;
@@ -340,19 +377,21 @@ cap_disable(list_t *l, cpucap_t *cap)
ASSERT(CAP_ENABLED(cap));
waitq_block(&cap->cap_waitq);
+
+ /* do this first to avoid race with cap_kstat_update */
+ if (cap->cap_kstat != NULL) {
+ kstat_delete(cap->cap_kstat);
+ cap->cap_kstat = NULL;
+ }
+
list_remove(l, cap);
if (list_is_empty(&capped_projects) && list_is_empty(&capped_zones)) {
cpucaps_enabled = B_FALSE;
cpucaps_clock_callout = NULL;
}
- cap->cap_value = 0;
+ cap->cap_value = cap->cap_chk_value = 0;
cap->cap_project = NULL;
cap->cap_zone = NULL;
- if (cap->cap_kstat != NULL) {
- kstat_delete(cap->cap_kstat);
- cap->cap_kstat = NULL;
- }
-
}
/*
@@ -487,6 +526,8 @@ cap_walk(list_t *l, void (*cb)(cpucap_t *, int64_t))
* The waitq_isempty check is performed without the waitq lock. If a new thread
* is placed on the waitq right after the check, it will be picked up during the
* next invocation of cap_poke_waitq().
+ *
+ * Called once per tick for zones.
*/
/* ARGSUSED */
static void
@@ -494,15 +535,92 @@ cap_poke_waitq(cpucap_t *cap, int64_t gen)
{
ASSERT(MUTEX_HELD(&caps_lock));
- if (cap->cap_usage >= cap->cap_value) {
+ if (cap->cap_base != 0) {
+ /*
+ * Because of the way usage is calculated and decayed, its
+ * possible for the zone to be slightly over its cap, but we
+ * don't want to count that after we have reduced the effective
+ * cap to the baseline. That way the zone will be able to
+ * burst again after the burst_limit has expired.
+ */
+ if (cap->cap_usage > cap->cap_base &&
+ cap->cap_chk_value == cap->cap_value) {
+ cap->cap_above_base++;
+
+ /*
+ * If bursting is limited and we've been bursting
+ * longer than we're supposed to, then set the
+ * effective cap to the baseline.
+ */
+ if (cap->cap_burst_limit != 0) {
+ cap->cap_bursting++;
+ if (cap->cap_bursting >= cap->cap_burst_limit)
+ cap->cap_chk_value = cap->cap_base;
+ }
+ } else if (cap->cap_bursting > 0) {
+ /*
+ * We're not bursting now, but we were, decay the
+ * bursting timer.
+ */
+ cap->cap_bursting--;
+ /*
+ * Reset the effective cap once we decay to 0 so we
+ * can burst again.
+ */
+ if (cap->cap_bursting == 0 &&
+ cap->cap_chk_value != cap->cap_value)
+ cap->cap_chk_value = cap->cap_value;
+ }
+ }
+
+ if (cap->cap_usage >= cap->cap_chk_value) {
cap->cap_above++;
} else {
waitq_t *wq = &cap->cap_waitq;
cap->cap_below++;
- if (!waitq_isempty(wq))
- waitq_runone(wq);
+ if (!waitq_isempty(wq)) {
+ int i, ndequeue, p;
+
+ /*
+ * Since this function is only called once per tick,
+ * we can hit a situation where we have artificially
+ * limited the project/zone below its cap. This would
+ * happen if we have multiple threads queued up but
+ * only dequeued one thread/tick. To avoid this we
+ * dequeue multiple threads, calculated based on the
+ * usage percentage of the cap. It is possible that we
+ * could dequeue too many threads and some of them
+ * might be put back on the wait queue quickly, but
+ * since we know that threads are on the wait queue
+ * because we're capping, we know that there is unused
+ * CPU cycles anyway, so this extra work would not
+ * hurt. Also, the ndequeue number is only an upper
+ * bound and we might dequeue less, depending on how
+ * many threads are actually in the wait queue. The
+ * ndequeue values are empirically derived and could be
+ * adjusted or calculated in another way if necessary.
+ */
+ p = (int)((100 * cap->cap_usage) / cap->cap_chk_value);
+ if (p >= 98)
+ ndequeue = 10;
+ else if (p >= 95)
+ ndequeue = 20;
+ else if (p >= 90)
+ ndequeue = 40;
+ else if (p >= 85)
+ ndequeue = 80;
+ else
+ ndequeue = 160;
+
+ for (i = 0; i < ndequeue; i++) {
+ waitq_runone(wq);
+ if (waitq_isempty(wq))
+ break;
+ }
+ DTRACE_PROBE2(cpucaps__pokeq, int, p, int, i);
+ }
}
}
@@ -629,14 +747,14 @@ cap_project_zone_modify_walker(kproject_t *kpj, void *arg)
* Remove all projects in this zone without caps
* from the capped_projects list.
*/
- if (project_cap->cap_value == MAX_USAGE) {
+ if (project_cap->cap_chk_value == MAX_USAGE) {
cap_project_disable(kpj);
}
} else if (CAP_DISABLED(project_cap)) {
/*
* Add the project to capped_projects list.
*/
- ASSERT(project_cap->cap_value == 0);
+ ASSERT(project_cap->cap_chk_value == 0);
cap_project_enable(kpj, MAX_USAGE);
}
mutex_exit(&caps_lock);
@@ -746,7 +864,7 @@ cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val)
/*
* No state transitions, just change the value
*/
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
}
ASSERT(MUTEX_HELD(&caps_lock));
@@ -757,6 +875,108 @@ cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val)
}
/*
+ * Set zone's base cpu value to base_val
+ */
+int
+cpucaps_zone_set_base(zone_t *zone, rctl_qty_t base_val)
+{
+ cpucap_t *cap = NULL;
+ hrtime_t value;
+
+ ASSERT(base_val <= MAXCAP);
+ if (base_val > MAXCAP)
+ base_val = MAXCAP;
+
+ if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone))
+ return (0);
+
+ if (zone->zone_cpucap == NULL)
+ cap = cap_alloc();
+
+ mutex_enter(&caps_lock);
+
+ if (cpucaps_busy) {
+ mutex_exit(&caps_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Double-check whether zone->zone_cpucap is NULL, now with caps_lock
+ * held. If it is still NULL, assign a newly allocated cpucap to it.
+ */
+ if (zone->zone_cpucap == NULL) {
+ zone->zone_cpucap = cap;
+ } else if (cap != NULL) {
+ cap_free(cap);
+ }
+
+ cap = zone->zone_cpucap;
+
+ value = base_val * cap_tick_cost;
+ if (value < 0 || value > cap->cap_value)
+ value = 0;
+
+ cap->cap_base = value;
+
+ mutex_exit(&caps_lock);
+
+ return (0);
+}
+
+/*
+ * Set zone's maximum burst time in seconds. A burst time of 0 means that
+ * the zone can run over its baseline indefinitely.
+ */
+int
+cpucaps_zone_set_burst_time(zone_t *zone, rctl_qty_t base_val)
+{
+ cpucap_t *cap = NULL;
+ hrtime_t value;
+
+ ASSERT(base_val <= INT_MAX);
+ /* Treat the default as 0 - no limit */
+ if (base_val == INT_MAX)
+ base_val = 0;
+ if (base_val > INT_MAX)
+ base_val = INT_MAX;
+
+ if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone))
+ return (0);
+
+ if (zone->zone_cpucap == NULL)
+ cap = cap_alloc();
+
+ mutex_enter(&caps_lock);
+
+ if (cpucaps_busy) {
+ mutex_exit(&caps_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Double-check whether zone->zone_cpucap is NULL, now with caps_lock
+ * held. If it is still NULL, assign a newly allocated cpucap to it.
+ */
+ if (zone->zone_cpucap == NULL) {
+ zone->zone_cpucap = cap;
+ } else if (cap != NULL) {
+ cap_free(cap);
+ }
+
+ cap = zone->zone_cpucap;
+
+ value = SEC_TO_TICK(base_val);
+ if (value < 0)
+ value = 0;
+
+ cap->cap_burst_limit = value;
+
+ mutex_exit(&caps_lock);
+
+ return (0);
+}
+
+/*
* The project is going away so disable its cap.
*/
void
@@ -902,7 +1122,7 @@ cpucaps_project_set(kproject_t *kpj, rctl_qty_t cap_val)
if (CAP_DISABLED(cap))
cap_project_enable(kpj, value);
else
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
} else if (CAP_ENABLED(cap)) {
/*
* User requested to drop a cap on the project. If it is part of
@@ -910,7 +1130,7 @@ cpucaps_project_set(kproject_t *kpj, rctl_qty_t cap_val)
* otherwise disable the cap.
*/
if (ZONE_IS_CAPPED(kpj->kpj_zone)) {
- cap->cap_value = MAX_USAGE;
+ cap->cap_value = cap->cap_chk_value = MAX_USAGE;
} else {
cap_project_disable(kpj);
}
@@ -948,6 +1168,26 @@ cpucaps_zone_get(zone_t *zone)
}
/*
+ * Get current zone baseline.
+ */
+rctl_qty_t
+cpucaps_zone_get_base(zone_t *zone)
+{
+ return (zone->zone_cpucap != NULL ?
+ (rctl_qty_t)(zone->zone_cpucap->cap_base / cap_tick_cost) : 0);
+}
+
+/*
+ * Get current zone maximum burst time.
+ */
+rctl_qty_t
+cpucaps_zone_get_burst_time(zone_t *zone)
+{
+ return (zone->zone_cpucap != NULL ?
+ (rctl_qty_t)(TICK_TO_SEC(zone->zone_cpucap->cap_burst_limit)) : 0);
+}
+
+/*
* Charge project of thread t the time thread t spent on CPU since previously
* adjusted.
*
@@ -1045,7 +1285,7 @@ cpucaps_charge(kthread_id_t t, caps_sc_t *csc, cpucaps_charge_t charge_type)
project_cap = kpj->kpj_cpucap;
- if (project_cap->cap_usage >= project_cap->cap_value) {
+ if (project_cap->cap_usage >= project_cap->cap_chk_value) {
t->t_schedflag |= TS_PROJWAITQ;
rc = B_TRUE;
} else if (t->t_schedflag & TS_PROJWAITQ) {
@@ -1059,7 +1299,7 @@ cpucaps_charge(kthread_id_t t, caps_sc_t *csc, cpucaps_charge_t charge_type)
} else {
cpucap_t *zone_cap = zone->zone_cpucap;
- if (zone_cap->cap_usage >= zone_cap->cap_value) {
+ if (zone_cap->cap_usage >= zone_cap->cap_chk_value) {
t->t_schedflag |= TS_ZONEWAITQ;
rc = B_TRUE;
} else if (t->t_schedflag & TS_ZONEWAITQ) {
@@ -1119,6 +1359,7 @@ cpucaps_enforce(kthread_t *t)
/*
* Convert internal cap statistics into values exported by cap kstat.
+ * Note that the kstat is held throughout this function but caps_lock is not.
*/
static int
cap_kstat_update(kstat_t *ksp, int rw)
@@ -1133,6 +1374,12 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_value.value.ui64 =
ROUND_SCALE(cap->cap_value, cap_tick_cost);
+ capsp->cap_baseline.value.ui64 =
+ ROUND_SCALE(cap->cap_base, cap_tick_cost);
+ capsp->cap_effective.value.ui64 =
+ ROUND_SCALE(cap->cap_chk_value, cap_tick_cost);
+ capsp->cap_burst_limit.value.ui64 =
+ ROUND_SCALE(cap->cap_burst_limit, tick_sec);
capsp->cap_usage.value.ui64 =
ROUND_SCALE(cap->cap_usage, cap_tick_cost);
capsp->cap_maxusage.value.ui64 =
@@ -1140,6 +1387,10 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_nwait.value.ui64 = cap->cap_waitq.wq_count;
capsp->cap_below.value.ui64 = ROUND_SCALE(cap->cap_below, tick_sec);
capsp->cap_above.value.ui64 = ROUND_SCALE(cap->cap_above, tick_sec);
+ capsp->cap_above_base.value.ui64 =
+ ROUND_SCALE(cap->cap_above_base, tick_sec);
+ capsp->cap_bursting.value.ui64 =
+ ROUND_SCALE(cap->cap_bursting, tick_sec);
kstat_named_setstr(&capsp->cap_zonename, zonename);
return (0);
diff --git a/usr/src/uts/common/disp/disp.c b/usr/src/uts/common/disp/disp.c
index 0c2c0b4993..5f9c2c68a2 100644
--- a/usr/src/uts/common/disp/disp.c
+++ b/usr/src/uts/common/disp/disp.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
@@ -105,7 +109,7 @@ static void cpu_resched(cpu_t *cp, pri_t tpri);
/*
* If this is set, only interrupt threads will cause kernel preemptions.
* This is done by changing the value of kpreemptpri. kpreemptpri
- * will either be the max sysclass pri + 1 or the min interrupt pri.
+ * will either be the max sysclass pri or the min interrupt pri.
*/
int only_intr_kpreempt;
@@ -252,7 +256,23 @@ dispinit(void)
maxglobpri = cl_maxglobpri;
}
}
- kpreemptpri = (pri_t)v.v_maxsyspri + 1;
+
+ /*
+ * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is
+ * to say, maxclsyspri + 1. However, over time, the system has used
+ * more and more asynchronous kernel threads, with an increasing number
+ * of these doing work on direct behalf of higher-level software (e.g.,
+ * network processing). This has led to potential priority inversions:
+ * threads doing low-priority lengthy kernel work can effectively
+ * delay kernel-level processing of higher-priority data. To minimize
+ * such inversions, we set kpreemptpri to be v_maxsyspri; anything in
+ * the kernel that runs at maxclsyspri will therefore induce kernel
+ * preemption, and this priority should be used if/when an asynchronous
+ * thread (or, as is often the case, task queue) is performing a task
+ * on behalf of higher-level software (or any task that is otherwise
+ * latency-sensitve).
+ */
+ kpreemptpri = (pri_t)v.v_maxsyspri;
if (kpqpri == KPQPRI)
kpqpri = kpreemptpri;
@@ -2258,7 +2278,7 @@ disp_getbest(disp_t *dp)
* placed earlier.
*/
if (tcp == NULL ||
- pri >= minclsyspri ||
+ (pri >= minclsyspri && tp->t_procp == &p0) ||
tp->t_cpu != tcp)
break;
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 8998b6ea51..331ca417e2 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -1050,6 +1050,8 @@ installctx(
ctx->free_op = free;
ctx->arg = arg;
ctx->next = t->t_ctx;
+ ctx->save_ts = 0;
+ ctx->restore_ts = 0;
t->t_ctx = ctx;
}
@@ -1124,9 +1126,12 @@ savectx(kthread_t *t)
struct ctxop *ctx;
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->save_op != NULL)
+ for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) {
+ if (ctx->save_op != NULL) {
+ ctx->save_ts = gethrtime_unscaled();
(ctx->save_op)(ctx->arg);
+ }
+ }
}
void
@@ -1135,9 +1140,12 @@ restorectx(kthread_t *t)
struct ctxop *ctx;
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->restore_op != NULL)
+ for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) {
+ if (ctx->restore_op != NULL) {
+ ctx->restore_ts = gethrtime_unscaled();
(ctx->restore_op)(ctx->arg);
+ }
+ }
}
void
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c
index 0d2d6f524c..f02cf48a65 100644
--- a/usr/src/uts/common/dtrace/dtrace.c
+++ b/usr/src/uts/common/dtrace/dtrace.c
@@ -845,7 +845,7 @@ static int
dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
- size_t sz;
+ size_t sz, strsize;
ASSERT(type->dtdt_flags & DIF_TF_BYREF);
/*
@@ -855,11 +855,24 @@ dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
return (1);
- if (type->dtdt_kind == DIF_TYPE_STRING)
- sz = dtrace_strlen(src,
- vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
- else
+ if (type->dtdt_kind == DIF_TYPE_STRING) {
+ dtrace_state_t *state = vstate->dtvs_state;
+
+ if (state != NULL) {
+ strsize = state->dts_options[DTRACEOPT_STRSIZE];
+ } else {
+ /*
+ * In helper context, we have a NULL state; fall back
+ * to using the system-wide default for the string size
+ * in this case.
+ */
+ strsize = dtrace_strsize_default;
+ }
+
+ sz = dtrace_strlen(src, strsize) + 1;
+ } else {
sz = type->dtdt_size;
+ }
return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
}
@@ -7506,7 +7519,7 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
priv = DTRACE_PRIV_ALL;
} else {
*uidp = crgetuid(cr);
- *zoneidp = crgetzoneid(cr);
+ *zoneidp = crgetzonedid(cr);
priv = 0;
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
@@ -8002,7 +8015,7 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
provider->dtpv_priv.dtpp_flags = priv;
if (cr != NULL) {
provider->dtpv_priv.dtpp_uid = crgetuid(cr);
- provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
+ provider->dtpv_priv.dtpp_zoneid = crgetzonedid(cr);
}
provider->dtpv_pops = *pops;
@@ -8613,6 +8626,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
uint32_t priv;
uid_t uid;
zoneid_t zoneid;
+ dtrace_state_t *state = enab->dten_vstate->dtvs_state;
ASSERT(MUTEX_HELD(&dtrace_lock));
dtrace_ecb_create_cache = NULL;
@@ -8627,8 +8641,22 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
}
dtrace_probekey(desc, &pkey);
- dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
- &priv, &uid, &zoneid);
+ dtrace_cred2priv(state->dts_cred.dcr_cred, &priv, &uid, &zoneid);
+
+ if ((priv & DTRACE_PRIV_ZONEOWNER) &&
+ state->dts_options[DTRACEOPT_ZONE] != DTRACEOPT_UNSET) {
+ /*
+ * If we have the privilege of instrumenting all zones but we
+ * have been told to instrument but one, we will spoof this up
+ * depriving ourselves of DTRACE_PRIV_ZONEOWNER for purposes
+ * of dtrace_match(). (Note that DTRACEOPT_ZONE is not for
+ * security but rather for performance: it allows the global
+ * zone to instrument USDT probes in a local zone without
+ * requiring all zones to be instrumented.)
+ */
+ priv &= ~DTRACE_PRIV_ZONEOWNER;
+ zoneid = state->dts_options[DTRACEOPT_ZONE];
+ }
return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
enab));
diff --git a/usr/src/uts/common/dtrace/sdt_subr.c b/usr/src/uts/common/dtrace/sdt_subr.c
index 157acc25fc..3d350ff278 100644
--- a/usr/src/uts/common/dtrace/sdt_subr.c
+++ b/usr/src/uts/common/dtrace/sdt_subr.c
@@ -97,6 +97,10 @@ static dtrace_pattr_t iscsi_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
};
+/*
+ * When adding a new provider you must add it before sdt as sdt is a catch all
+ * for remaining probes.
+ */
sdt_provider_t sdt_providers[] = {
{ "vtrace", "__vtrace_", &vtrace_attr },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, DTRACE_PRIV_USER },
@@ -117,6 +121,7 @@ sdt_provider_t sdt_providers[] = {
{ "fc", "__fc_", &fc_attr },
{ "srp", "__srp_", &fc_attr },
{ "sysevent", "__sysevent_", &stab_attr },
+ { "vnd", "__vnd_", &stab_attr },
{ "sdt", NULL, &sdt_attr },
{ NULL }
};
@@ -1151,6 +1156,34 @@ sdt_argdesc_t sdt_args[] = {
{ "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *",
"fc_port_info_t *" },
+ { "vnd", "flow-blocked", 0, 0, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "flow-blocked", 1, 1, "uint64_t", "uint64_t" },
+ { "vnd", "flow-blocked", 2, 2, "uintptr_t", "uintptr_t" },
+ { "vnd", "flow-resumed", 0, 0, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "flow-resumed", 1, 1, "uint64_t", "uint64_t" },
+ { "vnd", "flow-resumed", 2, 2, "uintptr_t", "uintptr_t" },
+ { "vnd", "drop-in", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-in", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-in", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-in", 3, 3, "const char *", "const char *" },
+ { "vnd", "drop-out", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-out", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-out", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-out", 3, 3, "const char *", "const char *" },
+ { "vnd", "drop-ctl", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-ctl", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-ctl", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-ctl", 3, 3, "const char *", "const char *" },
+ { "vnd", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "send", 1, 1, "void *", "csinfo_t *" },
+ { "vnd", "send", 2, 2, "void *", "ipinfo_t *" },
+ { "vnd", "send", 3, 3, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "send", 4, 4, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "recv", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "recv", 1, 1, "void *", "csinfo_t *" },
+ { "vnd", "recv", 2, 2, "void *", "ipinfo_t *" },
+ { "vnd", "recv", 3, 3, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "recv", 4, 4, "mblk_t *", "etherinfo_t *" },
{ NULL }
};
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 7d89db1575..1f16b377b1 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -167,8 +167,8 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
*/
int
mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
- intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
- caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
+ intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
+ caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
{
size_t len;
struct vattr vat;
@@ -180,6 +180,7 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
Phdr *junk = NULL;
Phdr *dynphdr = NULL;
Phdr *dtrphdr = NULL;
+ char *interp = NULL;
uintptr_t lddata;
long execsz;
intptr_t minaddr;
@@ -187,6 +188,9 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
if (lddatap != NULL)
*lddatap = NULL;
+ if (minaddrp != NULL)
+ *minaddrp = NULL;
+
if (error = execpermissions(vp, &vat, args)) {
uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
return (error);
@@ -216,19 +220,54 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
return (error);
}
+ if (minaddrp != NULL)
+ *minaddrp = minaddr;
+
/*
- * Inform our caller if the executable needs an interpreter.
+ * If the executable requires an interpreter, determine its name.
*/
- *interp = (dynphdr == NULL) ? 0 : 1;
+ if (dynphdr != NULL) {
+ ssize_t resid;
+
+ if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
+ uprintf("%s: Invalid interpreter\n", exec_file);
+ kmem_free(phdrbase, phdrsize);
+ return (ENOEXEC);
+ }
+
+ interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
+ (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
+ (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
+ interp[dynphdr->p_filesz - 1] != '\0') {
+ uprintf("%s: Cannot obtain interpreter pathname\n",
+ exec_file);
+ kmem_free(interp, MAXPATHLEN);
+ kmem_free(phdrbase, phdrsize);
+ return (error != 0 ? error : ENOEXEC);
+ }
+ }
/*
* If this is a statically linked executable, voffset should indicate
* the address of the executable itself (it normally holds the address
* of the interpreter).
*/
- if (ehdr->e_type == ET_EXEC && *interp == 0)
+ if (ehdr->e_type == ET_EXEC && interp == NULL)
*voffset = minaddr;
+ /*
+ * If the caller has asked for the interpreter name, return it (it's
+ * up to the caller to free it); if the caller hasn't asked for it,
+ * free it ourselves.
+ */
+ if (interpp != NULL) {
+ *interpp = interp;
+ } else if (interp != NULL) {
+ kmem_free(interp, MAXPATHLEN);
+ }
+
if (uphdr != NULL) {
*uphdr_vaddr = uphdr->p_vaddr;
} else {
@@ -249,7 +288,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
caddr_t bssbase = 0;
caddr_t brkbase = 0;
size_t brksize = 0;
- ssize_t dlnsize;
+ ssize_t dlnsize, nsize = 0;
aux_entry_t *aux;
int error;
ssize_t resid;
@@ -339,14 +378,40 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#endif /* _LP64 */
/*
- * We delay invoking the brand callback until we've figured out
- * what kind of elf binary we're trying to run, 32-bit or 64-bit.
- * We do this because now the brand library can just check
- * args->to_model to see if the target is 32-bit or 64-bit without
- * having do duplicate all the code above.
+ * We delay invoking the brand callback until we've figured out what
+ * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
+ * because now the brand library can just check args->to_model to see if
+ * the target is 32-bit or 64-bit without having do duplicate all the
+ * code above.
+ *
+ * We also give the brand a chance to indicate that based on the ELF
+ * OSABI of the target binary it should become unbranded and optionally
+ * indicate that it should be treated as existing in a specific prefix.
+ *
+ * Note that if a brand opts to go down this route it does not actually
+ * end up being debranded. In other words, future programs that exec
+ * will still be considered for branding unless this escape hatch is
+ * used. Consider the case of lx brand for example. If a user runs
+ * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
+ * of DTrace that's in /native will take this escape hatch and be run
+ * and interpreted using the normal system call table; however, the
+ * execution of a non-illumos binary in the form of /bin/ls will still
+ * be branded and be subject to all of the normal actions of the brand.
*/
if ((level < 2) &&
(brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
+ &args->brand_nroot) == B_TRUE) {
+ ASSERT(ehdrp->e_ident[EI_OSABI]);
+ brand_action = EBA_NATIVE;
+ /* Add one for the trailing '/' in the path */
+ if (args->brand_nroot != NULL)
+ nsize = strlen(args->brand_nroot) + 1;
+ }
+ }
+
+ if ((level < 2) &&
+ (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
error = BROP(p)->b_elfexec(vp, uap, args,
idatap, level + 1, execsz, setid, exec_file, cred,
brand_action);
@@ -417,6 +482,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_BASE
* AT_FLAGS
* AT_PAGESZ
+ * AT_RANDOM
* AT_SUN_LDSECURE
* AT_SUN_HWCAP
* AT_SUN_HWCAP2
@@ -424,7 +490,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_SUN_EXECNAME
* AT_NULL
*
- * total == 9
+ * total == 10
*/
if (hasdy && hasu) {
/*
@@ -439,7 +505,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 5
*/
- args->auxsize = (9 + 5) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 5) * sizeof (aux_entry_t);
} else if (hasdy) {
/*
* Has PT_INTERP but no PT_PHDR
@@ -449,9 +515,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 2
*/
- args->auxsize = (9 + 2) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 2) * sizeof (aux_entry_t);
} else {
- args->auxsize = 9 * sizeof (aux_entry_t);
+ args->auxsize = 10 * sizeof (aux_entry_t);
}
} else {
args->auxsize = 0;
@@ -464,13 +530,21 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (args->emulator != NULL)
args->auxsize += sizeof (aux_entry_t);
+ /*
+ * If this is a native binary that's been given a modified interpreter
+ * root, inform it that the native system exists at that root.
+ */
+ if (args->brand_nroot != NULL) {
+ args->auxsize += sizeof (aux_entry_t);
+ }
+
if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
branded = 1;
/*
- * We will be adding 4 entries to the aux vectors. One for
- * the the brandname and 3 for the brand specific aux vectors.
+ * We will be adding 5 entries to the aux vectors. One for
+ * the the brandname and 4 for the brand specific aux vectors.
*/
- args->auxsize += 4 * sizeof (aux_entry_t);
+ args->auxsize += 5 * sizeof (aux_entry_t);
}
/* Hardware/Software capabilities */
@@ -541,17 +615,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
char *p;
struct vnode *nvp;
- dlnsize = dyphdr->p_filesz;
+ dlnsize = dyphdr->p_filesz + nsize;
if (dlnsize > MAXPATHLEN || dlnsize <= 0)
goto bad;
+ if (nsize != 0) {
+ bcopy(args->brand_nroot, dlnp, nsize - 1);
+ dlnp[nsize - 1] = '/';
+ }
+
/*
* Read in "interpreter" pathname.
*/
- if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
- (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
- CRED(), &resid)) != 0) {
+ if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
+ dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
+ 0, (rlim64_t)0, CRED(), &resid)) != 0) {
uprintf("%s: Cannot obtain interpreter pathname\n",
exec_file);
goto bad;
@@ -717,7 +796,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (hasauxv) {
int auxf = AF_SUN_HWCAPVERIFY;
/*
- * Note: AT_SUN_PLATFORM was filled in via exec_args()
+ * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
+ * exec_args()
*/
ADDAUX(aux, AT_BASE, voffset)
ADDAUX(aux, AT_FLAGS, at_flags)
@@ -787,6 +867,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
+ ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
}
ADDAUX(aux, AT_NULL, 0)
@@ -1205,6 +1286,7 @@ mapelfexec(
} else {
*voffset = 0;
}
+
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
switch (phdr->p_type) {
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_construct.c b/usr/src/uts/common/fs/bootfs/bootfs_construct.c
new file mode 100644
index 0000000000..87895d21bb
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_construct.c
@@ -0,0 +1,367 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This file takes care of reading the boot time modules and constructing them
+ * into the appropriate series of vnodes.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/vfs.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+kmem_cache_t *bootfs_node_cache;
+
+static const vattr_t bootfs_vattr_dir = {
+ AT_ALL, /* va_mask */
+ VDIR, /* va_type */
+ S_IFDIR | 0555, /* va_mode */
+ 0, /* va_uid */
+ 0, /* va_gid */
+ 0, /* va_fsid */
+ 0, /* va_nodeid */
+ 1, /* va_nlink */
+ 0, /* va_size */
+ 0, /* va_atime */
+ 0, /* va_mtime */
+ 0, /* va_ctime */
+ 0, /* va_rdev */
+ 0, /* va_blksize */
+ 0, /* va_nblocks */
+ 0 /* va_seq */
+};
+
+static const vattr_t bootfs_vattr_reg = {
+ AT_ALL, /* va_mask */
+ VREG, /* va_type */
+ S_IFREG | 0555, /* va_mode */
+ 0, /* va_uid */
+ 0, /* va_gid */
+ 0, /* va_fsid */
+ 0, /* va_nodeid */
+ 2, /* va_nlink */
+ 2, /* va_size */
+ 0, /* va_atime */
+ 0, /* va_mtime */
+ 0, /* va_ctime */
+ 0, /* va_rdev */
+ 0, /* va_blksize */
+ 1, /* va_nblocks */
+ 0 /* va_seq */
+};
+
+/*ARGSUSED*/
+int
+bootfs_node_constructor(void *buf, void *arg, int kmflags)
+{
+ bootfs_node_t *bnp = buf;
+
+ bnp->bvn_vnp = vn_alloc(kmflags);
+ if (bnp->bvn_vnp == NULL)
+ return (-1);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+bootfs_node_destructor(void *buf, void *arg)
+{
+ bootfs_node_t *bnp = buf;
+
+ vn_free(bnp->bvn_vnp);
+}
+
+static int
+bootfs_comparator(const void *a, const void *b)
+{
+ const bootfs_node_t *lfs, *rfs;
+ int ret;
+
+ lfs = a;
+ rfs = b;
+
+ ret = strcmp(lfs->bvn_name, rfs->bvn_name);
+ if (ret > 0)
+ ret = 1;
+ if (ret < 0)
+ ret = -1;
+ return (ret);
+}
+
+static void
+bootfs_node_init(bootfs_t *bfs, bootfs_node_t *bnp, const struct vattr *vap,
+ const char *name, size_t namelen)
+{
+ timestruc_t now;
+
+ vn_reinit(bnp->bvn_vnp);
+
+ bnp->bvn_vnp->v_flag |= VNOSWAP;
+ bnp->bvn_vnp->v_type = vap->va_type;
+ bnp->bvn_vnp->v_vfsp = bfs->bfs_vfsp;
+ bnp->bvn_vnp->v_rdev = 0;
+ bnp->bvn_vnp->v_data = (caddr_t)bnp;
+ vn_setops(bnp->bvn_vnp, bootfs_vnodeops);
+
+ bnp->bvn_name = kmem_alloc(namelen + 1, KM_SLEEP);
+ bcopy(name, bnp->bvn_name, namelen);
+ bnp->bvn_name[namelen] = '\0';
+ if (vap->va_type == VDIR) {
+ avl_create(&bnp->bvn_dir, bootfs_comparator,
+ sizeof (bootfs_node_t),
+ offsetof(bootfs_node_t, bvn_link));
+ }
+ bzero(&bnp->bvn_link, sizeof (avl_node_t));
+ bcopy(vap, &bnp->bvn_attr, sizeof (vattr_t));
+
+ gethrestime(&now);
+ bnp->bvn_attr.va_atime = now;
+ bnp->bvn_attr.va_ctime = now;
+ bnp->bvn_attr.va_mtime = now;
+ bnp->bvn_attr.va_fsid = makedevice(bootfs_major, bfs->bfs_minor);
+ bnp->bvn_attr.va_nodeid = bfs->bfs_ninode;
+ bnp->bvn_attr.va_blksize = PAGESIZE;
+ bfs->bfs_ninode++;
+ list_insert_tail(&bfs->bfs_nodes, bnp);
+}
+
+static void
+bootfs_mkroot(bootfs_t *bfs)
+{
+ bootfs_node_t *bnp;
+
+ bnp = kmem_cache_alloc(bootfs_node_cache, KM_SLEEP);
+ bootfs_node_init(bfs, bnp, &bootfs_vattr_dir, "/", 1);
+ bnp->bvn_vnp->v_flag |= VROOT;
+ bnp->bvn_parent = bnp;
+ bfs->bfs_rootvn = bnp;
+ bfs->bfs_stat.bfss_ndirs.value.ui32++;
+ vn_exists(bnp->bvn_vnp);
+}
+
+static int
+bootfs_mknode(bootfs_t *bfs, bootfs_node_t *parent, bootfs_node_t **outp,
+ const char *name, size_t namelen, const vattr_t *vap, uintptr_t addr,
+ uint64_t size)
+{
+ bootfs_node_t *bnp;
+ bootfs_node_t sn;
+ avl_index_t where;
+ char *buf;
+
+ ASSERT(parent->bvn_attr.va_type == VDIR);
+ buf = kmem_alloc(namelen + 1, KM_SLEEP);
+ bcopy(name, buf, namelen);
+ buf[namelen] = '\0';
+ sn.bvn_name = buf;
+ if ((bnp = avl_find(&parent->bvn_dir, &sn, &where)) != NULL) {
+ kmem_free(buf, namelen + 1);
+ /* Directories can collide, files cannot */
+ if (vap->va_type == VDIR) {
+ *outp = bnp;
+ return (0);
+ }
+ return (EEXIST);
+ }
+ kmem_free(buf, namelen + 1);
+
+ bnp = kmem_cache_alloc(bootfs_node_cache, KM_SLEEP);
+ bootfs_node_init(bfs, bnp, vap, name, namelen);
+ bnp->bvn_parent = parent;
+ avl_add(&parent->bvn_dir, bnp);
+ *outp = bnp;
+
+ if (vap->va_type == VDIR) {
+ parent->bvn_attr.va_size++;
+ parent->bvn_attr.va_nlink++;
+ bfs->bfs_stat.bfss_ndirs.value.ui32++;
+ } else {
+ bnp->bvn_addr = addr;
+ bnp->bvn_size = size;
+ bfs->bfs_stat.bfss_nfiles.value.ui32++;
+ bfs->bfs_stat.bfss_nbytes.value.ui64 += size;
+ bnp->bvn_attr.va_nblocks = P2ROUNDUP(size, 512) >> 9;
+ }
+
+ vn_exists(bnp->bvn_vnp);
+
+ return (0);
+}
+
+/*
+ * Given the address, size, and path a boot-time module would like, go through
+ * and create all of the directory entries that are required and then the file
+ * itself. If someone has passed in a module that has the same name as another
+ * one, we honor the first one.
+ */
+static int
+bootfs_construct_entry(bootfs_t *bfs, uintptr_t addr, uint64_t size,
+ const char *mname)
+{
+ char *sp;
+ size_t nlen;
+ int ret;
+ bootfs_node_t *nbnp;
+
+ const char *p = mname;
+ bootfs_node_t *bnp = bfs->bfs_rootvn;
+
+ if (*p == '\0')
+ return (EINVAL);
+
+ for (;;) {
+ /* First eliminate all leading / characters. */
+ while (*p == '/')
+ p++;
+
+ /* A name with all slashes or ending in a / */
+ if (*p == '\0')
+ return (EINVAL);
+
+ sp = strchr(p, '/');
+ if (sp == NULL)
+ break;
+ nlen = (ptrdiff_t)sp - (ptrdiff_t)p;
+ if (strncmp(p, ".", nlen) == 0) {
+ p = sp + 1;
+ continue;
+ }
+
+ if (strncmp(p, "..", nlen) == 0) {
+ bnp = bnp->bvn_parent;
+ p = sp + 1;
+ continue;
+ }
+
+ VERIFY(bootfs_mknode(bfs, bnp, &nbnp, p, nlen,
+ &bootfs_vattr_dir, addr, size) == 0);
+ p = sp + 1;
+ bnp = nbnp;
+ }
+
+ nlen = strlen(p);
+ ret = bootfs_mknode(bfs, bnp, &nbnp, p, nlen, &bootfs_vattr_reg,
+ addr, size);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+}
+
+/*
+ * We're going to go through every boot time module and construct the
+ * appropriate vnodes for them now. Because there are very few of these that
+ * exist, generally on the order of a handful, we're going to create them all
+ * when the file system is initialized and then tear them all down when the
+ * module gets unloaded.
+ *
+ * The information about the modules is contained in properties on the root of
+ * the devinfo tree. Specifically there are three properties per module:
+ *
+ * - module-size-%d int64_t size, in bytes, of the boot time module.
+ * - module-addr-%d The address of the boot time module
+ * - module-name-%d The string name of the boot time module
+ *
+ * Note that the module-size and module-addr fields are always 64-bit values
+ * regardless of being on a 32-bit or 64-bit kernel. module-name is a string
+ * property.
+ *
+ * There is no property that indicates the total number of such modules. Modules
+ * start at 0 and work their way up incrementally. The first time we can't find
+ * a module or a property, then we stop.
+ */
+void
+bootfs_construct(bootfs_t *bfs)
+{
+ uint_t id = 0, ndata;
+ char paddr[64], psize[64], pname[64], *mname;
+ dev_info_t *root;
+ uchar_t *datap;
+ uint64_t size = 0, addr = 0;
+ int ret;
+
+ bootfs_mkroot(bfs);
+ root = ddi_root_node();
+
+ for (;;) {
+ if (id == UINT32_MAX)
+ break;
+
+ if (snprintf(paddr, sizeof (paddr), "module-addr-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (snprintf(psize, sizeof (paddr), "module-size-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (snprintf(pname, sizeof (paddr), "module-name-%d", id) >
+ sizeof (paddr))
+ break;
+
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, paddr, &datap, &ndata) !=
+ DDI_PROP_SUCCESS)
+ break;
+
+ if (ndata == 8)
+ bcopy(datap, &addr, sizeof (uint64_t));
+ ddi_prop_free(datap);
+ if (ndata != 8)
+ break;
+
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, psize, &datap, &ndata) !=
+ DDI_PROP_SUCCESS)
+ break;
+ if (ndata == 8)
+ bcopy(datap, &size, sizeof (uint64_t));
+ ddi_prop_free(datap);
+ if (ndata != 8)
+ break;
+
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, root,
+ DDI_PROP_DONTPASS, pname, &mname) != DDI_PROP_SUCCESS)
+ break;
+
+ ret = bootfs_construct_entry(bfs, addr, size, mname);
+ if (ret == EINVAL)
+ bfs->bfs_stat.bfss_ndiscards.value.ui32++;
+ if (ret == EEXIST)
+ bfs->bfs_stat.bfss_ndups.value.ui32++;
+ ddi_prop_free(mname);
+
+ id++;
+ }
+}
+
+void
+bootfs_destruct(bootfs_t *bfs)
+{
+ bootfs_node_t *bnp;
+
+ while ((bnp = list_remove_head(&bfs->bfs_nodes)) != NULL) {
+ ASSERT(bnp->bvn_vnp->v_count == 1);
+ VN_RELE(bnp->bvn_vnp);
+ kmem_free(bnp->bvn_name, strlen(bnp->bvn_name) + 1);
+ kmem_cache_free(bootfs_node_cache, bnp);
+ }
+}
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c b/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c
new file mode 100644
index 0000000000..b87e4b738e
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_vfsops.c
@@ -0,0 +1,320 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/errno.h>
+#include <sys/modctl.h>
+#include <sys/types.h>
+#include <sys/mkdev.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/systm.h>
+#include <sys/id_space.h>
+#include <sys/cmn_err.h>
+#include <sys/ksynch.h>
+#include <sys/policy.h>
+#include <sys/mount.h>
+#include <sys/sysmacros.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+/*
+ * While booting, additional types of modules and files can be passed in to the
+ * loader. These include the familiar boot archive, as well as, a module hash
+ * and additional modules that are interpreted as files. As part of the handoff
+ * in early boot, information about these modules are saved as properties on the
+ * root of the devinfo tree, similar to other boot-time properties.
+ *
+ * This file system provides a read-only view of those additional files. Due to
+ * its limited scope, it has a slightly simpler construction than several other
+ * file systems. When mounted, it looks for the corresponding properties and
+ * creates bootfs_node_t's and vnodes for all of the corresponding files and
+ * directories that exist along the way. At this time, there are currently a
+ * rather small number of files passed in this way.
+ *
+ * This does lead to one behavior that folks used to other file systems might
+ * find peculiar. Because we are not always actively creating and destroying the
+ * required vnodes on demand, the count on the root vnode will not be going up
+ * accordingly with the existence of other vnodes. This means that a bootfs file
+ * system that is not in use will have all of its vnodes exist with a v_count of
+ * one.
+ */
+
+major_t bootfs_major;
+static int bootfs_fstype;
+static id_space_t *bootfs_idspace;
+static uint64_t bootfs_nactive;
+static kmutex_t bootfs_lock;
+
+static const char *bootfs_name = "bootfs";
+
+static int
+bootfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ int ret;
+ bootfs_t *bfs;
+ struct pathname dpn;
+ dev_t fsdev;
+
+ if ((ret = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (ret);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (uap->flags & MS_REMOUNT)
+ return (EBUSY);
+
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * We indicate that the backing store is bootfs. We don't want to use
+ * swap, because folks might think that this is putting all the data
+ * into memory ala tmpfs. Rather these modules are always in memory and
+ * there's nothing to be done about that.
+ */
+ vfs_setresource(vfsp, bootfs_name, 0);
+ bfs = kmem_zalloc(sizeof (bootfs_t), KM_NOSLEEP | KM_NORMALPRI);
+ if (bfs == NULL)
+ return (ENOMEM);
+
+ ret = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn);
+ if (ret != 0) {
+ kmem_free(bfs, sizeof (bfs));
+ return (ret);
+ }
+
+ bfs->bfs_minor = id_alloc(bootfs_idspace);
+ bfs->bfs_kstat = kstat_create_zone("bootfs", bfs->bfs_minor, "bootfs",
+ "fs", KSTAT_TYPE_NAMED,
+ sizeof (bootfs_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
+ if (bfs->bfs_kstat == NULL) {
+ id_free(bootfs_idspace, bfs->bfs_minor);
+ pn_free(&dpn);
+ kmem_free(bfs, sizeof (bfs));
+ return (ENOMEM);
+ }
+ bfs->bfs_kstat->ks_data = &bfs->bfs_stat;
+
+ fsdev = makedevice(bootfs_major, bfs->bfs_minor);
+ bfs->bfs_vfsp = vfsp;
+
+ vfsp->vfs_data = (caddr_t)bfs;
+ vfsp->vfs_fstype = bootfs_fstype;
+ vfsp->vfs_dev = fsdev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_RDONLY | VFS_NOSETUID | VFS_NOTRUNC |
+ VFS_UNLINKABLE;
+ vfs_make_fsid(&vfsp->vfs_fsid, fsdev, bootfs_fstype);
+ bfs->bfs_mntpath = kmem_alloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ bcopy(dpn.pn_path, bfs->bfs_mntpath, dpn.pn_pathlen);
+ bfs->bfs_mntpath[dpn.pn_pathlen] = '\0';
+ pn_free(&dpn);
+ list_create(&bfs->bfs_nodes, sizeof (bootfs_node_t),
+ offsetof(bootfs_node_t, bvn_alink));
+
+ kstat_named_init(&bfs->bfs_stat.bfss_nfiles, "nfiles",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndirs, "ndirs",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_nbytes, "nbytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndups, "ndup",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&bfs->bfs_stat.bfss_ndiscards, "ndiscard",
+ KSTAT_DATA_UINT32);
+
+ bootfs_construct(bfs);
+
+ kstat_install(bfs->bfs_kstat);
+
+ return (0);
+}
+
+static int
+bootfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ int ret;
+ bootfs_t *bfs = vfsp->vfs_data;
+ bootfs_node_t *bnp;
+
+ if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (ret);
+
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ for (bnp = list_head(&bfs->bfs_nodes); bnp != NULL;
+ bnp = list_next(&bfs->bfs_nodes, bnp)) {
+ mutex_enter(&bnp->bvn_vnp->v_lock);
+ if (bnp->bvn_vnp->v_count > 1) {
+ mutex_exit(&bnp->bvn_vnp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&bnp->bvn_vnp->v_lock);
+ }
+
+ kstat_delete(bfs->bfs_kstat);
+ bootfs_destruct(bfs);
+ list_destroy(&bfs->bfs_nodes);
+ kmem_free(bfs->bfs_mntpath, strlen(bfs->bfs_mntpath) + 1);
+ id_free(bootfs_idspace, bfs->bfs_minor);
+ kmem_free(bfs, sizeof (bootfs_t));
+ return (0);
+}
+
+static int
+bootfs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ bootfs_t *bfs;
+
+ bfs = (bootfs_t *)vfsp->vfs_data;
+ *vpp = bfs->bfs_rootvn->bvn_vnp;
+ VN_HOLD(*vpp)
+
+ return (0);
+}
+
+static int
+bootfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
+{
+ const bootfs_t *bfs = (bootfs_t *)vfsp;
+ dev32_t d32;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ sbp->f_blocks = bfs->bfs_stat.bfss_nbytes.value.ui64 >> PAGESHIFT;
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+
+ sbp->f_files = bfs->bfs_stat.bfss_nfiles.value.ui32 +
+ bfs->bfs_stat.bfss_ndirs.value.ui32;
+ sbp->f_ffree = 0;
+ sbp->f_favail = 0;
+
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strlcpy(sbp->f_basetype, bootfs_name, FSTYPSZ);
+ bzero(sbp->f_fstr, sizeof (sbp->f_fstr));
+
+ return (0);
+}
+
+static const fs_operation_def_t bootfs_vfsops_tmpl[] = {
+ VFSNAME_MOUNT, { .vfs_mount = bootfs_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = bootfs_unmount },
+ VFSNAME_ROOT, { .vfs_root = bootfs_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = bootfs_statvfs },
+};
+
+static int
+bootfs_init(int fstype, char *name)
+{
+ int ret;
+
+ bootfs_fstype = fstype;
+ ASSERT(bootfs_fstype != 0);
+
+ ret = vfs_setfsops(fstype, bootfs_vfsops_tmpl, NULL);
+ if (ret != 0)
+ return (ret);
+
+ ret = vn_make_ops(name, bootfs_vnodeops_template, &bootfs_vnodeops);
+ if (ret != 0) {
+ (void) vfs_freevfsops_by_type(bootfs_fstype);
+ return (ret);
+ }
+
+ bootfs_major = getudev();
+ if (bootfs_major == (major_t)-1) {
+ cmn_err(CE_WARN, "bootfs_init: Can't get unique device number");
+ bootfs_major = 0;
+ }
+
+ bootfs_nactive = 0;
+ return (0);
+}
+
+static mntopts_t bootfs_mntopts = {
+ 0, NULL
+};
+
+static vfsdef_t bootfs_vfsdef = {
+ VFSDEF_VERSION,
+ "bootfs",
+ bootfs_init,
+ VSW_HASPROTO|VSW_STATS,
+ &bootfs_mntopts
+};
+
+static struct modlfs bootfs_modlfs = {
+ &mod_fsops, "boot-time modules file system", &bootfs_vfsdef
+};
+
+static struct modlinkage bootfs_modlinkage = {
+ MODREV_1, &bootfs_modlfs, NULL
+};
+
+int
+_init(void)
+{
+ bootfs_node_cache = kmem_cache_create("bootfs_node_cache",
+ sizeof (bootfs_node_t), 0, bootfs_node_constructor,
+ bootfs_node_destructor, NULL, NULL, NULL, 0);
+ bootfs_idspace = id_space_create("bootfs_minors", 1, INT32_MAX);
+ mutex_init(&bootfs_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (mod_install(&bootfs_modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&bootfs_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ mutex_enter(&bootfs_lock);
+ if (bootfs_nactive > 0) {
+ mutex_exit(&bootfs_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&bootfs_lock);
+
+ err = mod_remove(&bootfs_modlinkage);
+ if (err != 0)
+ return (err);
+
+ (void) vfs_freevfsops_by_type(bootfs_fstype);
+ vn_freevnodeops(bootfs_vnodeops);
+ id_space_destroy(bootfs_idspace);
+ mutex_destroy(&bootfs_lock);
+ kmem_cache_destroy(bootfs_node_cache);
+ return (err);
+}
diff --git a/usr/src/uts/common/fs/bootfs/bootfs_vnops.c b/usr/src/uts/common/fs/bootfs/bootfs_vnops.c
new file mode 100644
index 0000000000..f63d0a4f24
--- /dev/null
+++ b/usr/src/uts/common/fs/bootfs/bootfs_vnops.c
@@ -0,0 +1,544 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * bootfs vnode operations
+ */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mman.h>
+#include <fs/fs_subr.h>
+#include <sys/policy.h>
+#include <sys/sysmacros.h>
+#include <sys/dirent.h>
+#include <sys/uio.h>
+#include <vm/pvn.h>
+#include <vm/hat.h>
+#include <vm/seg_map.h>
+#include <vm/seg_vn.h>
+#include <sys/vmsystm.h>
+
+#include <sys/fs/bootfs_impl.h>
+
+struct vnodeops *bootfs_vnodeops;
+
+/*ARGSUSED*/
+static int
+bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ int err;
+ ssize_t sres = uiop->uio_resid;
+ bootfs_node_t *bnp = vp->v_data;
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ if (uiop->uio_loffset < 0)
+ return (EINVAL);
+
+ if (uiop->uio_loffset >= bnp->bvn_size)
+ return (0);
+
+ err = 0;
+ while (uiop->uio_resid != 0) {
+ caddr_t base;
+ long offset, frem;
+ ulong_t poff, segoff;
+ size_t bytes;
+ int relerr;
+
+ offset = uiop->uio_loffset;
+ poff = offset & PAGEOFFSET;
+ bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
+
+ frem = bnp->bvn_size - offset;
+ if (frem <= 0) {
+ err = 0;
+ break;
+ }
+
+ /* Don't read past EOF */
+ bytes = MIN(bytes, frem);
+
+ /*
+ * Segmaps are likely larger than our page size, so make sure we
+ * have the proper offfset into the resulting segmap data.
+ */
+ segoff = (offset & PAGEMASK) & MAXBOFFSET;
+
+ base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
+ 1, S_READ);
+
+ err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
+ relerr = segmap_release(segkmap, base, 0);
+
+ if (err == 0)
+ err = relerr;
+
+ if (err != 0)
+ break;
+ }
+
+ /* Even if we had an error in a partial read, return success */
+ if (uiop->uio_resid > sres)
+ err = 0;
+
+ gethrestime(&bnp->bvn_attr.va_atime);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
+ cred_t *cr, int *rvalp, caller_context_t *ct)
+{
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ uint32_t mask;
+ bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
+
+ mask = vap->va_mask;
+ bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
+ vap->va_mask = mask;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ int shift = 0;
+ bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
+
+ if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
+ shift += 3;
+ if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
+ shift += 3;
+ }
+
+ return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
+ bpn->bvn_attr.va_mode << shift, mode));
+}
+
+/*ARGSUSED*/
+static int
+bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ avl_index_t where;
+ bootfs_node_t sn, *bnp;
+ bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
+
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ if (bpp->bvn_attr.va_type != VDIR)
+ return (ENOTDIR);
+
+ if (*nm == '\0' || strcmp(nm, ".") == 0) {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+
+ if (strcmp(nm, "..") == 0) {
+ VN_HOLD(bpp->bvn_parent->bvn_vnp);
+ *vpp = bpp->bvn_parent->bvn_vnp;
+ return (0);
+ }
+
+ sn.bvn_name = nm;
+ bnp = avl_find(&bpp->bvn_dir, &sn, &where);
+ if (bnp == NULL)
+ return (ENOENT);
+
+ VN_HOLD(bnp->bvn_vnp);
+ *vpp = bnp->bvn_vnp;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
+ dirent64_t *dp;
+ void *buf;
+ ulong_t bsize, brem;
+ offset_t coff, roff;
+ int dlen, ret;
+ bootfs_node_t *dnp;
+ boolean_t first = B_TRUE;
+
+ if (uiop->uio_loffset >= MAXOFF_T) {
+ if (eofp != NULL)
+ *eofp = 1;
+ return (0);
+ }
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (!(uiop->uio_iov->iov_len > 0))
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ roff = uiop->uio_loffset;
+ coff = 0;
+ brem = bsize = uiop->uio_iov->iov_len;
+ buf = kmem_alloc(bsize, KM_SLEEP);
+ dp = buf;
+
+ /*
+ * Recall that offsets here are done based on the name of the dirent
+ * excluding the null terminator. Therefore `.` is always at 0, `..` is
+ * always at 1, and then the first real dirent is at 3. This offset is
+ * what's actually stored when we update the offset in the structure.
+ */
+ if (roff == 0) {
+ dlen = DIRENT64_RECLEN(1);
+ if (first == B_TRUE) {
+ if (dlen > brem) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ first = B_FALSE;
+ }
+ dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
+ dp->d_off = 0;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ }
+
+ if (roff <= 1) {
+ dlen = DIRENT64_RECLEN(2);
+ if (first == B_TRUE) {
+ if (dlen > brem) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ first = B_FALSE;
+ }
+ dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
+ dp->d_off = 1;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ }
+
+ coff = 3;
+ for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
+ dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
+ size_t nlen = strlen(dnp->bvn_name);
+
+ if (roff > coff) {
+ coff += nlen;
+ continue;
+ }
+
+ dlen = DIRENT64_RECLEN(nlen);
+ if (dlen > brem) {
+ if (first == B_TRUE) {
+ kmem_free(buf, bsize);
+ return (EINVAL);
+ }
+ break;
+ }
+ first = B_FALSE;
+
+ dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
+ dp->d_off = coff;
+ dp->d_reclen = (ushort_t)dlen;
+ (void) strncpy(dp->d_name, dnp->bvn_name,
+ DIRENT64_NAMELEN(dlen));
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ brem -= dlen;
+ coff += nlen;
+ }
+
+ ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
+
+ if (ret == 0) {
+ if (dnp == NULL) {
+ coff++;
+ if (eofp != NULL)
+ *eofp = 1;
+ } else if (eofp != NULL) {
+ *eofp = 0;
+ }
+ uiop->uio_loffset = coff;
+ }
+ gethrestime(&bnp->bvn_attr.va_atime);
+ kmem_free(buf, bsize);
+ return (ret);
+}
+
+/*ARGSUSED*/
+static void
+bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+}
+
+/*ARGSUSED*/
+static int
+bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ if (write_lock != 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+}
+
+/*ARGSUSED*/
+static int
+bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
+ if (vp->v_type == VDIR)
+ return (0);
+ return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
+}
+
+/*
+ * We need to fill in a single page of a vnode's memory based on the actual data
+ * from the kernel. We'll use this node's sliding window into physical memory
+ * and update one page at a time.
+ */
+/*ARGSUSED*/
+static int
+bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr)
+{
+ bootfs_node_t *bnp = vp->v_data;
+ page_t *pp, *fpp;
+ pfn_t pfn;
+
+ for (;;) {
+ /* Easy case where the page exists */
+ pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED);
+ if (pp != NULL) {
+ if (pl != NULL) {
+ pl[0] = pp;
+ pl[1] = NULL;
+ } else {
+ page_unlock(pp);
+ }
+ return (0);
+ }
+
+ pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg,
+ addr);
+
+ /*
+ * If we didn't get the page, that means someone else beat us to
+ * creating this so we need to try again.
+ */
+ if (pp != NULL)
+ break;
+ }
+
+ pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
+ fpp = page_numtopp_nolock(pfn);
+
+ if (ppcopy(fpp, pp) == 0) {
+ pvn_read_done(pp, B_ERROR);
+ return (EIO);
+ }
+
+ if (pl != NULL) {
+ pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
+ } else {
+ pvn_io_done(pp);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr, caller_context_t *ct)
+{
+ int err;
+ bootfs_node_t *bnp = vp->v_data;
+
+ if (off + len > bnp->bvn_size + PAGEOFFSET)
+ return (EFAULT);
+
+ if (len <= PAGESIZE)
+ err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl,
+ plsz, seg, addr, rw, cr);
+ else
+ err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len,
+ protp, pl, plsz, seg, addr, rw, cr);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ int ret;
+ segvn_crargs_t vn_a;
+
+#ifdef _ILP32
+ if (len > MAXOFF_T)
+ return (ENOMEM);
+#endif
+
+ if (vp->v_flag & VNOMAP)
+ return (ENOSYS);
+
+ if (off < 0 || off > MAXOFFSET_T - off)
+ return (ENXIO);
+
+ if (vp->v_type != VREG)
+ return (ENODEV);
+
+ if (prot & PROT_WRITE)
+ return (ENOTSUP);
+
+ as_rangelock(as);
+ ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+ if (ret != 0) {
+ as_rangeunlock(as);
+ return (ret);
+ }
+
+ vn_a.vp = vp;
+ vn_a.offset = (u_offset_t)off;
+ vn_a.type = flags & MAP_TYPE;
+ vn_a.prot = prot;
+ vn_a.maxprot = maxprot;
+ vn_a.cred = cr;
+ vn_a.amp = NULL;
+ vn_a.flags = flags & ~MAP_TYPE;
+ vn_a.szc = 0;
+ vn_a.lgrp_mem_policy_flags = 0;
+
+ ret = as_map(as, *addrp, len, segvn_create, &vn_a);
+
+ as_rangeunlock(as);
+ return (ret);
+
+}
+
+/*ARGSUSED*/
+static int
+bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+static int
+bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
+ caller_context_t *ct)
+{
+ int ret;
+
+ switch (cmd) {
+ case _PC_TIMESTAMP_RESOLUTION:
+ *valp = 1L;
+ ret = 0;
+ break;
+ default:
+ ret = fs_pathconf(vp, cmd, valp, cr, ct);
+ }
+
+ return (ret);
+}
+
+const fs_operation_def_t bootfs_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = bootfs_open },
+ VOPNAME_CLOSE, { .vop_close = bootfs_close },
+ VOPNAME_READ, { .vop_read = bootfs_read },
+ VOPNAME_IOCTL, { .vop_ioctl = bootfs_ioctl },
+ VOPNAME_GETATTR, { .vop_getattr = bootfs_getattr },
+ VOPNAME_ACCESS, { .vop_access = bootfs_access },
+ VOPNAME_LOOKUP, { .vop_lookup = bootfs_lookup },
+ VOPNAME_READDIR, { .vop_readdir = bootfs_readdir },
+ VOPNAME_INACTIVE, { .vop_inactive = bootfs_inactive },
+ VOPNAME_RWLOCK, { .vop_rwlock = bootfs_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = bootfs_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = bootfs_seek },
+ VOPNAME_GETPAGE, { .vop_getpage = bootfs_getpage },
+ VOPNAME_MAP, { .vop_map = bootfs_map },
+ VOPNAME_ADDMAP, { .vop_addmap = bootfs_addmap },
+ VOPNAME_DELMAP, { .vop_delmap = bootfs_delmap },
+ VOPNAME_PATHCONF, { .vop_pathconf = bootfs_pathconf },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_nosupport },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c
index 4eaf38f484..3f637f4cf5 100644
--- a/usr/src/uts/common/fs/dev/sdev_netops.c
+++ b/usr/src/uts/common/fs/dev/sdev_netops.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -41,8 +42,102 @@
#include <sys/zone.h>
#include <sys/dls.h>
+static const char *devnet_zpath = "/dev/net/zone/";
struct vnodeops *devnet_vnodeops;
+static zoneid_t
+devnet_nodetozone(sdev_node_t *dv)
+{
+ char *zname = NULL, *dup;
+ zone_t *zone;
+ int duplen;
+ zoneid_t zid;
+
+ /*
+ * If in a non-global zone, always return it's zid no matter what the
+ * node is.
+ */
+ zid = getzoneid();
+ if (zid != GLOBAL_ZONEID)
+ return (zid);
+
+ /*
+ * If it doesn't have /dev/net/zone/ then it can't be a specific zone
+ * we're targetting.
+ */
+ if (strncmp(devnet_zpath, dv->sdev_path, strlen(devnet_zpath)) != 0)
+ return (GLOBAL_ZONEID);
+
+ if (dv->sdev_vnode->v_type == VDIR) {
+ zone = zone_find_by_name(dv->sdev_name);
+ } else {
+ /* Non directories have the form /dev/net/zone/%z/%s */
+ dup = strdup(dv->sdev_path);
+ duplen = strlen(dup);
+ zname = strrchr(dup, '/');
+ *zname = '\0';
+ zname--;
+ zname = strrchr(dup, '/');
+ zname++;
+ zone = zone_find_by_name(zname);
+ kmem_free(dup, duplen + 1);
+ }
+ if (zone == NULL)
+ return (GLOBAL_ZONEID);
+ zid = zone->zone_id;
+ zone_rele(zone);
+ return (zid);
+}
+
+static int
+devnet_mkdir(struct sdev_node *ddv, char *name)
+{
+ sdev_node_t *dv;
+ struct vattr va;
+ int ret;
+
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ dv = sdev_cache_lookup(ddv, name);
+ if (dv != NULL) {
+ SDEV_SIMPLE_RELE(dv);
+ return (EEXIST);
+ }
+
+ va = *sdev_getdefault_attr(VDIR);
+ gethrestime(&va.va_atime);
+ va.va_mtime = va.va_atime;
+ va.va_ctime = va.va_atime;
+
+ ret = sdev_mknode(ddv, name, &dv, &va, NULL, NULL, kcred, SDEV_READY);
+ if (ret != 0)
+ return (ret);
+ SDEV_SIMPLE_RELE(dv);
+ return (0);
+}
+
+/*
+ * We basically need to walk down the directory path to determine what we should
+ * do. At the top level of /dev/net, only the directory /dev/net/zone is valid,
+ * and it is always valid. Following on that, /dev/net/zone/%zonename is valid
+ * if and only if we can look up that zone name. If it's not, or it's some other
+ * name, then it's SDEV_VTOR_INVALID.
+ */
+static int
+devnet_dirvalidate(struct sdev_node *dv)
+{
+ zone_t *zonep;
+ char *path = "/dev/net/zone";
+
+ if (strcmp(path, dv->sdev_path) == 0)
+ return (SDEV_VTOR_VALID);
+
+ zonep = zone_find_by_name(dv->sdev_name);
+ if (zonep == NULL)
+ return (SDEV_VTOR_INVALID);
+ zone_rele(zonep);
+ return (SDEV_VTOR_VALID);
+}
+
/*
* Check if a net sdev_node is still valid - i.e. it represents a current
* network link.
@@ -60,11 +155,20 @@ devnet_validate(struct sdev_node *dv)
ASSERT(dv->sdev_state == SDEV_READY);
- if (dls_mgmt_get_linkid(dv->sdev_name, &linkid) != 0)
+ if (dv->sdev_vnode->v_type == VDIR)
+ return (devnet_dirvalidate(dv));
+
+ if (strncmp(devnet_zpath, dv->sdev_path, strlen(devnet_zpath)) == 0) {
+ ASSERT(SDEV_IS_GLOBAL(dv));
+ zoneid = devnet_nodetozone(dv);
+ } else {
+ zoneid = getzoneid();
+ }
+
+ if (dls_mgmt_get_linkid_in_zone(dv->sdev_name, &linkid, zoneid) != 0)
return (SDEV_VTOR_INVALID);
- if (SDEV_IS_GLOBAL(dv))
+ if (zoneid == GLOBAL_ZONEID)
return (SDEV_VTOR_VALID);
- zoneid = getzoneid();
return (zone_check_datalink(&zoneid, linkid) == 0 ?
SDEV_VTOR_VALID : SDEV_VTOR_INVALID);
}
@@ -74,13 +178,14 @@ devnet_validate(struct sdev_node *dv)
* a net entry when the node is not found in the cache.
*/
static int
-devnet_create_rvp(const char *nm, struct vattr *vap, dls_dl_handle_t *ddhp)
+devnet_create_rvp(const char *nm, struct vattr *vap, dls_dl_handle_t *ddhp,
+ zoneid_t zid)
{
timestruc_t now;
dev_t dev;
int error;
- if ((error = dls_devnet_open(nm, ddhp, &dev)) != 0) {
+ if ((error = dls_devnet_open_in_zone(nm, ddhp, &dev, zid)) != 0) {
sdcmn_err12(("devnet_create_rvp: not a valid vanity name "
"network node: %s\n", nm));
return (error);
@@ -116,6 +221,7 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
struct sdev_node *ddv = VTOSDEV(dvp);
struct sdev_node *dv = NULL;
dls_dl_handle_t ddh = NULL;
+ zone_t *zone;
struct vattr vattr;
int nmlen;
int error = ENOENT;
@@ -123,6 +229,9 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
if (SDEVTOV(ddv)->v_type != VDIR)
return (ENOTDIR);
+ if (!SDEV_IS_GLOBAL(ddv) && crgetzoneid(cred) == GLOBAL_ZONEID)
+ return (EPERM);
+
/*
* Empty name or ., return node itself.
*/
@@ -145,6 +254,12 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
rw_enter(&ddv->sdev_contents, RW_WRITER);
/*
+ * ZOMBIED parent does not allow new node creation, bail out early.
+ */
+ if (ddv->sdev_state == SDEV_ZOMBIE)
+ goto failed;
+
+ /*
* directory cache lookup:
*/
if ((dv = sdev_cache_lookup(ddv, nm)) != NULL) {
@@ -153,13 +268,42 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
goto found;
}
+ if (SDEV_IS_GLOBAL(ddv)) {
+ /*
+ * Check for /dev/net/zone
+ */
+ if (strcmp("zone", nm) == 0 && strcmp("/dev/net",
+ ddv->sdev_path) == 0) {
+ (void) devnet_mkdir(ddv, nm);
+ dv = sdev_cache_lookup(ddv, nm);
+ ASSERT(dv != NULL);
+ goto found;
+ }
+
+ /*
+ * Check for /dev/net/zone/%z. We can't use devnet_zpath due to
+ * its trailing slash.
+ */
+ if (strcmp("/dev/net/zone", ddv->sdev_path) == 0) {
+ zone = zone_find_by_name(nm);
+ if (zone == NULL)
+ goto failed;
+ (void) devnet_mkdir(ddv, nm);
+ zone_rele(zone);
+ dv = sdev_cache_lookup(ddv, nm);
+ ASSERT(dv != NULL);
+ goto found;
+ }
+ } else if (strcmp("/dev/net", ddv->sdev_path) != 0) {
+ goto failed;
+ }
+
/*
- * ZOMBIED parent does not allow new node creation, bail out early.
+ * We didn't find what we were looking for. What that is depends a lot
+ * on what directory we're in.
*/
- if (ddv->sdev_state == SDEV_ZOMBIE)
- goto failed;
- error = devnet_create_rvp(nm, &vattr, &ddh);
+ error = devnet_create_rvp(nm, &vattr, &ddh, devnet_nodetozone(ddv));
if (error != 0)
goto failed;
@@ -219,7 +363,7 @@ devnet_filldir_datalink(datalink_id_t linkid, void *arg)
if ((dv = sdev_cache_lookup(ddv, (char *)link)) != NULL)
goto found;
- if (devnet_create_rvp(link, &vattr, &ddh) != 0)
+ if (devnet_create_rvp(link, &vattr, &ddh, devnet_nodetozone(arg)) != 0)
return (0);
ASSERT(ddh != NULL);
@@ -244,16 +388,77 @@ found:
return (0);
}
+/*
+ * Fill in all the entries for the current zone.
+ */
static void
-devnet_filldir(struct sdev_node *ddv)
+devnet_fillzone(struct sdev_node *ddv, zoneid_t zid)
{
- sdev_node_t *dv, *next;
datalink_id_t linkid;
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ if (zid == GLOBAL_ZONEID) {
+ ASSERT(SDEV_IS_GLOBAL(ddv));
+ linkid = DATALINK_INVALID_LINKID;
+ do {
+ linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL,
+ DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE);
+ if (linkid != DATALINK_INVALID_LINKID)
+ (void) devnet_filldir_datalink(linkid, ddv);
+ } while (linkid != DATALINK_INVALID_LINKID);
+ } else {
+ (void) zone_datalink_walk(zid, devnet_filldir_datalink, ddv);
+ }
+}
+
+/*
+ * Callback for zone_walk when filling up /dev/net/zone/...
+ */
+static int
+devnet_fillzdir_cb(zone_t *zonep, void *arg)
+{
+ sdev_node_t *ddv = arg;
+
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ (void) devnet_mkdir(ddv, zonep->zone_name);
+ return (0);
+}
+
+/*
+ * Fill in a directory that isn't the top level /dev/net.
+ */
+static void
+devnet_fillzdir(struct sdev_node *ddv)
+{
+ zone_t *zonep;
+ char *path = "/dev/net/zone";
+
+ if (strcmp(path, ddv->sdev_path) == 0) {
+ (void) zone_walk(devnet_fillzdir_cb, ddv);
+ return;
+ }
+
+ zonep = zone_find_by_name(ddv->sdev_name);
+ if (zonep == NULL)
+ return;
+ devnet_fillzone(ddv, zonep->zone_id);
+ zone_rele(zonep);
+}
+
+static void
+devnet_filldir(struct sdev_node *ddv)
+{
+ int ret;
+ sdev_node_t *dv, *next;
+
ASSERT(RW_READ_HELD(&ddv->sdev_contents));
if (rw_tryupgrade(&ddv->sdev_contents) == NULL) {
rw_exit(&ddv->sdev_contents);
rw_enter(&ddv->sdev_contents, RW_WRITER);
+ if (ddv->sdev_state == SDEV_ZOMBIE) {
+ rw_exit(&ddv->sdev_contents);
+ return;
+ }
}
for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
@@ -276,31 +481,36 @@ devnet_filldir(struct sdev_node *ddv)
if (SDEVTOV(dv)->v_count > 0)
continue;
+
SDEV_HOLD(dv);
+
+ /*
+ * Clean out everything underneath before we remove ourselves.
+ */
+ ret = sdev_cleandir(dv, NULL, 0);
+ ASSERT(ret == 0);
/* remove the cache node */
(void) sdev_cache_update(ddv, &dv, dv->sdev_name,
SDEV_CACHE_DELETE);
SDEV_RELE(dv);
}
+ if (strcmp(ddv->sdev_path, "/dev/net") != 0) {
+ devnet_fillzdir(ddv);
+ goto done;
+ }
+
if (((ddv->sdev_flags & SDEV_BUILD) == 0) && !dls_devnet_rebuild())
goto done;
if (SDEV_IS_GLOBAL(ddv)) {
- linkid = DATALINK_INVALID_LINKID;
- do {
- linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL,
- DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE);
- if (linkid != DATALINK_INVALID_LINKID)
- (void) devnet_filldir_datalink(linkid, ddv);
- } while (linkid != DATALINK_INVALID_LINKID);
+ devnet_fillzone(ddv, GLOBAL_ZONEID);
+ (void) devnet_mkdir(ddv, "zone");
} else {
- (void) zone_datalink_walk(getzoneid(),
- devnet_filldir_datalink, ddv);
+ devnet_fillzone(ddv, getzoneid());
}
ddv->sdev_flags &= ~SDEV_BUILD;
-
done:
rw_downgrade(&ddv->sdev_contents);
}
@@ -319,6 +529,9 @@ devnet_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
ASSERT(sdvp);
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
if (uiop->uio_offset == 0)
devnet_filldir(sdvp);
diff --git a/usr/src/uts/common/fs/dev/sdev_plugin.c b/usr/src/uts/common/fs/dev/sdev_plugin.c
new file mode 100644
index 0000000000..885191175f
--- /dev/null
+++ b/usr/src/uts/common/fs/dev/sdev_plugin.c
@@ -0,0 +1,913 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Dynamic directory plugin interface for sdev.
+ *
+ * The sdev plugin interfaces provides a means for a dynamic directory based on
+ * in-kernel state to be simply created. Traditionally, dynamic directories were
+ * built into sdev itself. While these legacy plugins are useful, it makes more
+ * sense for these pieces of functionality to live with the individual drivers.
+ *
+ * The plugin interface requires folks to implement three interfaces and
+ * provides a series of callbacks that can be made in the context of those
+ * interfaces to interrogate the sdev_node_t without having to leak
+ * implementation details of the sdev_node_t. These interfaces are:
+ *
+ * o spo_validate
+ *
+ * Given a particular node, answer the question as to whether or not this
+ * entry is still valid. Here, plugins should use the name and the dev_t
+ * associated with the node to verify that it matches something that still
+ * exists.
+ *
+ * o spo_filldir
+ *
+ * Fill all the entries inside of a directory. Note that some of these entries
+ * may already exist.
+ *
+ * o spo_inactive
+ *
+ * The given node is no longer being used. This allows the consumer to
+ * potentially tear down anything that was being held open related to this.
+ * Note that this only fires when the given sdev_node_t becomes a zombie.
+ *
+ * During these callbacks a consumer is not allowed to register or unregister a
+ * plugin, especially their own. They may call the sdev_ctx style functions. All
+ * callbacks fire in a context where blocking is allowed (eg. the spl is below
+ * LOCK_LEVEL).
+ *
+ * When a plugin is added, we create its directory in the global zone. By doing
+ * that, we ensure that something isn't already there and that nothing else can
+ * come along and try and create something without our knowledge. We only have
+ * to create it in the GZ and not for all other instances of sdev because an
+ * instance of sdev that isn't at /dev does not have dynamic directories, and
+ * second, any instance of sdev present in a non-global zone cannot create
+ * anything, therefore we know that by it not being in the global zone's
+ * instance of sdev that we're good to go.
+ *
+ * Lock Ordering
+ * -------------
+ *
+ * The global sdev_plugin_lock must be held before any of the individual
+ * sdev_plugin_t`sp_lock. Further, once any plugin related lock has been held,
+ * it is not legal to take any holds on any sdev_node_t or to grab the
+ * sdev_node_t`contents_lock in any way.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/fs/sdev_impl.h>
+#include <sys/fs/sdev_plugin.h>
+#include <fs/fs_subr.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ksynch.h>
+#include <sys/sysmacros.h>
+#include <sys/list.h>
+#include <sys/ctype.h>
+
+kmutex_t sdev_plugin_lock;
+list_t sdev_plugin_list;
+kmem_cache_t *sdev_plugin_cache;
+struct vnodeops *sdev_plugin_vnops;
+
+#define SDEV_PLUGIN_NAMELEN 64
+
+typedef struct sdev_plugin {
+ list_node_t sp_link;
+ char sp_name[SDEV_PLUGIN_NAMELEN]; /* E */
+ int sp_nflags; /* E */
+ struct vnodeops *sp_vnops; /* E */
+ sdev_plugin_ops_t *sp_pops; /* E */
+ boolean_t sp_islegacy; /* E */
+ int (*sp_lvtor)(sdev_node_t *); /* E */
+ kmutex_t sp_lock; /* Protects everything below */
+ kcondvar_t sp_nodecv;
+ size_t sp_nnodes;
+} sdev_plugin_t;
+
+/* ARGSUSED */
+static int
+sdev_plugin_cache_constructor(void *buf, void *arg, int tags)
+{
+ sdev_plugin_t *spp = buf;
+ mutex_init(&spp->sp_lock, NULL, MUTEX_DRIVER, 0);
+ cv_init(&spp->sp_nodecv, NULL, CV_DRIVER, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+sdev_plugin_cache_destructor(void *buf, void *arg)
+{
+ sdev_plugin_t *spp = buf;
+ cv_destroy(&spp->sp_nodecv);
+ mutex_destroy(&spp->sp_lock);
+}
+
+enum vtype
+sdev_ctx_vtype(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_vnode->v_type);
+}
+
+const char *
+sdev_ctx_path(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_path);
+}
+
+const char *
+sdev_ctx_name(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_name);
+}
+
+/*
+ * Currently we only support psasing through a single flag -- SDEV_IS_GLOBAL.
+ */
+sdev_ctx_flags_t
+sdev_ctx_flags(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_flags & SDEV_GLOBAL);
+}
+
+/*
+ * Return some amount of private data specific to the vtype. In the case of a
+ * character or block device this is the device number.
+ */
+const void *
+sdev_ctx_vtype_data(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+ void *ret;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ switch (sdp->sdev_vnode->v_type) {
+ case VCHR:
+ case VBLK:
+ ret = (void *)(uintptr_t)(sdp->sdev_vnode->v_rdev);
+ break;
+ default:
+ ret = NULL;
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * Use the same rules as zones for a name. isalphanum + '-', '_', and '.'.
+ */
+static int
+sdev_plugin_name_isvalid(const char *c, int buflen)
+{
+ int i;
+
+ for (i = 0; i < buflen; i++, c++) {
+ if (*c == '\0')
+ return (1);
+
+ if (!isalnum(*c) && *c != '-' && *c != '_' && *c != '.')
+ return (0);
+ }
+ /* Never found a null terminator */
+ return (0);
+}
+
+static int
+sdev_plugin_mknode(sdev_plugin_t *spp, sdev_node_t *sdvp, char *name,
+ vattr_t *vap)
+{
+ int ret;
+ sdev_node_t *svp;
+
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+ ASSERT(spp != NULL);
+ svp = sdev_cache_lookup(sdvp, name);
+ if (svp != NULL) {
+ SDEV_SIMPLE_RELE(svp);
+ return (EEXIST);
+ }
+
+ ret = sdev_mknode(sdvp, name, &svp, vap, NULL, NULL, kcred,
+ SDEV_READY);
+ if (ret != 0)
+ return (ret);
+ SDEV_SIMPLE_RELE(svp);
+
+ return (0);
+}
+
+/*
+ * Plugin node creation callbacks
+ */
+int
+sdev_plugin_mkdir(sdev_ctx_t ctx, char *name)
+{
+ sdev_node_t *sdvp;
+ timestruc_t now;
+ struct vattr vap;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
+ return (EINVAL);
+
+ sdvp = (sdev_node_t *)ctx;
+ ASSERT(sdvp->sdev_private != NULL);
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+
+ vap = *sdev_getdefault_attr(VDIR);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+
+ return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
+}
+
+int
+sdev_plugin_mknod(sdev_ctx_t ctx, char *name, mode_t mode, dev_t dev)
+{
+ sdev_node_t *sdvp;
+ timestruc_t now;
+ struct vattr vap;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
+ return (EINVAL);
+
+ sdvp = (sdev_node_t *)ctx;
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+ if (mode != S_IFCHR && mode != S_IFBLK)
+ return (EINVAL);
+
+ ASSERT(sdvp->sdev_private != NULL);
+
+ vap = *sdev_getdefault_attr(mode == S_IFCHR ? VCHR : VBLK);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+ vap.va_rdev = dev;
+ vap.va_mode = mode | 0666;
+
+ /* Despite the similar name, this is in fact a different function */
+ return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
+
+}
+
+static int
+sdev_plugin_validate(sdev_node_t *sdp)
+{
+ int ret;
+ sdev_plugin_t *spp;
+
+ ASSERT(sdp->sdev_private != NULL);
+ spp = sdp->sdev_private;
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+ rw_enter(&sdp->sdev_contents, RW_READER);
+ ret = spp->sp_pops->spo_validate((uintptr_t)sdp);
+ rw_exit(&sdp->sdev_contents);
+ return (ret);
+}
+
+static void
+sdev_plugin_validate_dir(sdev_node_t *sdvp)
+{
+ int ret;
+ sdev_node_t *svp, *next;
+
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+
+ for (svp = SDEV_FIRST_ENTRY(sdvp); svp != NULL; svp = next) {
+
+ next = SDEV_NEXT_ENTRY(sdvp, svp);
+ ASSERT(svp->sdev_state != SDEV_ZOMBIE);
+ /* skip nodes that aren't ready */
+ if (svp->sdev_state == SDEV_INIT)
+ continue;
+
+ switch (sdev_plugin_validate(svp)) {
+ case SDEV_VTOR_VALID:
+ case SDEV_VTOR_SKIP:
+ continue;
+ case SDEV_VTOR_INVALID:
+ case SDEV_VTOR_STALE:
+ break;
+ }
+
+ SDEV_HOLD(svp);
+
+ /*
+ * Clean out everything underneath this node before we
+ * remove it.
+ */
+ if (svp->sdev_vnode->v_type == VDIR) {
+ ret = sdev_cleandir(svp, NULL, 0);
+ ASSERT(ret == 0);
+ }
+ /* remove the cache node */
+ (void) sdev_cache_update(sdvp, &svp, svp->sdev_name,
+ SDEV_CACHE_DELETE);
+ SDEV_RELE(svp);
+ }
+}
+
+/* ARGSUSED */
+static int
+sdev_plugin_vop_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
+ int *eofp, caller_context_t *ct_unused, int flags_unused)
+{
+ int ret;
+ sdev_node_t *sdvp = VTOSDEV(dvp);
+ sdev_plugin_t *spp;
+
+ ASSERT(RW_READ_HELD(&sdvp->sdev_contents));
+
+ /* Sanity check we're not a zombie before we do anyting else */
+ if (sdvp->sdev_state == SDEV_ZOMBIE)
+ return (ENOENT);
+
+ spp = sdvp->sdev_private;
+ ASSERT(spp != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
+ if (uiop->uio_offset == 0) {
+ /*
+ * We upgrade to a write lock and grab the plugin's lock along
+ * the way. We're almost certainly going to get creation
+ * callbacks, so this is the only safe way to go.
+ */
+ if (rw_tryupgrade(&sdvp->sdev_contents) == 0) {
+ rw_exit(&sdvp->sdev_contents);
+ rw_enter(&sdvp->sdev_contents, RW_WRITER);
+ if (sdvp->sdev_state == SDEV_ZOMBIE) {
+ rw_downgrade(&sdvp->sdev_contents);
+ return (ENOENT);
+ }
+ }
+
+ sdev_plugin_validate_dir(sdvp);
+ ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
+ rw_downgrade(&sdvp->sdev_contents);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
+}
+
+/*
+ * If we don't have a callback function that returns a failure, then sdev will
+ * try to create a node for us which violates all of our basic assertions. To
+ * work around that we create our own callback for devname_lookup_func which
+ * always returns ENOENT as at this point either it was created with the filldir
+ * callback or it was not.
+ */
+/*ARGSUSED*/
+static int
+sdev_plugin_vop_lookup_cb(sdev_node_t *ddv, char *nm, void **arg, cred_t *cred,
+ void *unused, char *unused2)
+{
+ return (ENOENT);
+}
+
+/* ARGSUSED */
+static int
+sdev_plugin_vop_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
+ struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
+ caller_context_t *ct, int *direntflags, pathname_t *realpnp)
+{
+ int ret;
+ sdev_node_t *sdvp;
+ sdev_plugin_t *spp;
+
+ /* execute access is required to search the directory */
+ if ((ret = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
+ return (ret);
+
+ sdvp = VTOSDEV(dvp);
+ spp = sdvp->sdev_private;
+ ASSERT(spp != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
+ /*
+ * Go straight for the write lock.
+ */
+ rw_enter(&sdvp->sdev_contents, RW_WRITER);
+ if (sdvp->sdev_state == SDEV_ZOMBIE) {
+ rw_exit(&sdvp->sdev_contents);
+ return (ENOENT);
+ }
+ sdev_plugin_validate_dir(sdvp);
+ ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
+ rw_exit(&sdvp->sdev_contents);
+ if (ret != 0)
+ return (ret);
+
+ return (devname_lookup_func(sdvp, nm, vpp, cred,
+ sdev_plugin_vop_lookup_cb, SDEV_VATTR));
+}
+
+/*
+ * sdev is not a good citizen. We get inactive callbacks whenever a vnode goes
+ * to zero, but isn't necessairily a zombie yet. As such, to make things easier
+ * for users, we only fire the inactive callback when the node becomes a zombie
+ * and thus will be torn down here.
+ */
+static void
+sdev_plugin_vop_inactive_cb(struct vnode *dvp)
+{
+ sdev_node_t *sdp = VTOSDEV(dvp);
+ sdev_plugin_t *spp = sdp->sdev_private;
+
+ rw_enter(&sdp->sdev_contents, RW_READER);
+ if (sdp->sdev_state != SDEV_ZOMBIE) {
+ rw_exit(&sdp->sdev_contents);
+ return;
+ }
+ spp->sp_pops->spo_inactive((uintptr_t)sdp);
+ mutex_enter(&spp->sp_lock);
+ VERIFY(spp->sp_nnodes > 0);
+ spp->sp_nnodes--;
+ cv_signal(&spp->sp_nodecv);
+ mutex_exit(&spp->sp_lock);
+ rw_exit(&sdp->sdev_contents);
+}
+
+/*ARGSUSED*/
+static void
+sdev_plugin_vop_inactive(struct vnode *dvp, struct cred *cred,
+ caller_context_t *ct)
+{
+ sdev_node_t *sdp = VTOSDEV(dvp);
+ sdev_plugin_t *spp = sdp->sdev_private;
+ ASSERT(sdp->sdev_private != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ devname_inactive_func(dvp, cred, sdev_plugin_vop_inactive_cb);
+}
+
+const fs_operation_def_t sdev_plugin_vnodeops_tbl[] = {
+ VOPNAME_READDIR, { .vop_readdir = sdev_plugin_vop_readdir },
+ VOPNAME_LOOKUP, { .vop_lookup = sdev_plugin_vop_lookup },
+ VOPNAME_INACTIVE, { .vop_inactive = sdev_plugin_vop_inactive },
+ VOPNAME_CREATE, { .error = fs_nosys },
+ VOPNAME_REMOVE, { .error = fs_nosys },
+ VOPNAME_MKDIR, { .error = fs_nosys },
+ VOPNAME_RMDIR, { .error = fs_nosys },
+ VOPNAME_SYMLINK, { .error = fs_nosys },
+ VOPNAME_SETSECATTR, { .error = fs_nosys },
+ NULL, NULL
+};
+
+/*
+ * construct a new template with overrides from vtab
+ */
+static fs_operation_def_t *
+sdev_merge_vtab(const fs_operation_def_t tab[])
+{
+ fs_operation_def_t *new;
+ const fs_operation_def_t *tab_entry;
+
+ /* make a copy of standard vnode ops table */
+ new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
+ bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
+
+ /* replace the overrides from tab */
+ for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
+ fs_operation_def_t *std_entry = new;
+ while (std_entry->name) {
+ if (strcmp(tab_entry->name, std_entry->name) == 0) {
+ std_entry->func = tab_entry->func;
+ break;
+ }
+ std_entry++;
+ }
+ }
+
+ return (new);
+}
+
+/* free memory allocated by sdev_merge_vtab */
+static void
+sdev_free_vtab(fs_operation_def_t *new)
+{
+ kmem_free(new, sdev_vnodeops_tbl_size);
+}
+
+/*
+ * Register a new plugin.
+ */
+sdev_plugin_hdl_t
+sdev_plugin_register(const char *name, sdev_plugin_ops_t *ops, int *errp)
+{
+ int ret, err;
+ sdev_plugin_t *spp, *iter;
+ vnode_t *vp, *nvp;
+ sdev_node_t *sdp, *slp;
+ timestruc_t now;
+ struct vattr vap;
+
+ /*
+ * Some consumers don't care about why they failed. To keep the code
+ * simple, we'll just pretend they gave us something.
+ */
+ if (errp == NULL)
+ errp = &err;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (ops->spo_version != 1) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (ops->spo_validate == NULL || ops->spo_filldir == NULL ||
+ ops->spo_inactive == NULL) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if ((ops->spo_flags & ~SDEV_PLUGIN_FLAGS_MASK) != 0) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
+ (void) strlcpy(spp->sp_name, name, SDEV_PLUGIN_NAMELEN);
+
+ spp->sp_pops = ops;
+ spp->sp_nflags = SDEV_DYNAMIC | SDEV_VTOR;
+ if (ops->spo_flags & SDEV_PLUGIN_NO_NCACHE)
+ spp->sp_nflags |= SDEV_NO_NCACHE;
+ if (ops->spo_flags & SDEV_PLUGIN_SUBDIR)
+ spp->sp_nflags |= SDEV_SUBDIR;
+ spp->sp_vnops = sdev_plugin_vnops;
+ spp->sp_islegacy = B_FALSE;
+ spp->sp_lvtor = NULL;
+ spp->sp_nnodes = 0;
+
+ /*
+ * Make sure it's unique, nothing exists with this name already, and add
+ * it to the list. We also need to go through and grab the sdev
+ * root node as we cannot grab any sdev node locks once we've grabbed
+ * the sdev_plugin_lock. We effectively assert that if a directory is
+ * not present in the GZ's /dev, then it doesn't exist in any of the
+ * local zones.
+ */
+ ret = vn_openat("/dev", UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, -1);
+ if (ret != 0) {
+ *errp = ret;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+ /* Make sure we have the real vnode */
+ if (VOP_REALVP(vp, &nvp, NULL) == 0) {
+ VN_HOLD(nvp);
+ VN_RELE(vp);
+ vp = nvp;
+ nvp = NULL;
+ }
+ VERIFY(vp->v_op == sdev_vnodeops);
+ sdp = VTOSDEV(vp);
+ rw_enter(&sdp->sdev_contents, RW_WRITER);
+ slp = sdev_cache_lookup(sdp, spp->sp_name);
+ if (slp != NULL) {
+ SDEV_RELE(slp);
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+ *errp = EEXIST;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+
+ mutex_enter(&sdev_plugin_lock);
+ for (iter = list_head(&sdev_plugin_list); iter != NULL;
+ iter = list_next(&sdev_plugin_list, iter)) {
+ if (strcmp(spp->sp_name, iter->sp_name) == 0) {
+ mutex_exit(&sdev_plugin_lock);
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+ *errp = EEXIST;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+ }
+
+ list_insert_tail(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ /*
+ * Now go ahead and create the top level directory for the global zone.
+ */
+ vap = *sdev_getdefault_attr(VDIR);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+
+ (void) sdev_plugin_mknode(spp, sdp, spp->sp_name, &vap);
+
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+
+ return ((sdev_plugin_hdl_t)spp);
+}
+
+static void
+sdev_plugin_unregister_cb(sdev_node_t *rdp, void *arg)
+{
+ sdev_plugin_t *spp = arg;
+ sdev_node_t *sdp;
+
+ rw_enter(&rdp->sdev_contents, RW_WRITER);
+ sdp = sdev_cache_lookup(rdp, spp->sp_name);
+ /* If it doesn't exist, we're done here */
+ if (sdp == NULL) {
+ rw_exit(&rdp->sdev_contents);
+ return;
+ }
+
+ /*
+ * We first delete the directory before recursively marking everything
+ * else stale. This ordering should ensure that we don't accidentally
+ * miss anything.
+ */
+ sdev_cache_update(rdp, &sdp, spp->sp_name, SDEV_CACHE_DELETE);
+ sdev_stale(sdp);
+ SDEV_RELE(sdp);
+ rw_exit(&rdp->sdev_contents);
+}
+
+/*
+ * Remove a plugin. This will block until everything has become a zombie, thus
+ * guaranteeing the caller that nothing will call into them again once this call
+ * returns. While the call is ongoing, it could be called into. Note that while
+ * this is ongoing, it will block other mounts.
+ */
+int
+sdev_plugin_unregister(sdev_plugin_hdl_t hdl)
+{
+ sdev_plugin_t *spp = (sdev_plugin_t *)hdl;
+ if (spp->sp_islegacy)
+ return (EINVAL);
+
+ mutex_enter(&sdev_plugin_lock);
+ list_remove(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ sdev_mnt_walk(sdev_plugin_unregister_cb, spp);
+ mutex_enter(&spp->sp_lock);
+ while (spp->sp_nnodes > 0)
+ cv_wait(&spp->sp_nodecv, &spp->sp_lock);
+ mutex_exit(&spp->sp_lock);
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (0);
+}
+
+/*
+ * Register an old sdev style plugin to deal with what used to be in the vtab.
+ */
+static int
+sdev_plugin_register_legacy(struct sdev_vop_table *vtp)
+{
+ sdev_plugin_t *spp;
+
+ spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
+ (void) strlcpy(spp->sp_name, vtp->vt_name, SDEV_PLUGIN_NAMELEN);
+ spp->sp_islegacy = B_TRUE;
+ spp->sp_pops = NULL;
+ spp->sp_nflags = vtp->vt_flags;
+ spp->sp_lvtor = vtp->vt_vtor;
+ spp->sp_nnodes = 0;
+
+ if (vtp->vt_service != NULL) {
+ fs_operation_def_t *templ;
+ templ = sdev_merge_vtab(vtp->vt_service);
+ if (vn_make_ops(vtp->vt_name,
+ (const fs_operation_def_t *)templ,
+ &spp->sp_vnops) != 0) {
+ cmn_err(CE_WARN, "%s: malformed vnode ops\n",
+ vtp->vt_name);
+ sdev_free_vtab(templ);
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (1);
+ }
+
+ if (vtp->vt_global_vops) {
+ *(vtp->vt_global_vops) = spp->sp_vnops;
+ }
+
+ sdev_free_vtab(templ);
+ } else {
+ spp->sp_vnops = sdev_vnodeops;
+ }
+
+ /*
+ * No need to check for EEXIST here. These are loaded as a part of the
+ * sdev's initialization function. Further, we don't have to create them
+ * as that's taken care of in sdev's mount for the GZ.
+ */
+ mutex_enter(&sdev_plugin_lock);
+ list_insert_tail(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ return (0);
+}
+
+/*
+ * We need to match off of the sdev_path, not the sdev_name. We are only allowed
+ * to exist directly under /dev.
+ */
+static sdev_plugin_t *
+sdev_match(sdev_node_t *dv)
+{
+ int vlen;
+ const char *path;
+ sdev_plugin_t *spp;
+
+ if (strlen(dv->sdev_path) <= 5)
+ return (NULL);
+
+ if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
+ return (NULL);
+ path = dv->sdev_path + 5;
+
+ mutex_enter(&sdev_plugin_lock);
+
+ for (spp = list_head(&sdev_plugin_list); spp != NULL;
+ spp = list_next(&sdev_plugin_list, spp)) {
+ if (strcmp(spp->sp_name, path) == 0) {
+ mutex_exit(&sdev_plugin_lock);
+ return (spp);
+ }
+
+ if (spp->sp_nflags & SDEV_SUBDIR) {
+ vlen = strlen(spp->sp_name);
+ if ((strncmp(spp->sp_name, path,
+ vlen - 1) == 0) && path[vlen] == '/') {
+ mutex_exit(&sdev_plugin_lock);
+ return (spp);
+ }
+
+ }
+ }
+
+ mutex_exit(&sdev_plugin_lock);
+ return (NULL);
+}
+
+void
+sdev_set_no_negcache(sdev_node_t *dv)
+{
+ char *path;
+ sdev_plugin_t *spp;
+
+ ASSERT(dv->sdev_path);
+ path = dv->sdev_path + strlen("/dev/");
+
+ mutex_enter(&sdev_plugin_lock);
+ for (spp = list_head(&sdev_plugin_list); spp != NULL;
+ spp = list_next(&sdev_plugin_list, spp)) {
+ if (strcmp(spp->sp_name, path) == 0) {
+ if (spp->sp_nflags & SDEV_NO_NCACHE)
+ dv->sdev_flags |= SDEV_NO_NCACHE;
+ break;
+ }
+ }
+ mutex_exit(&sdev_plugin_lock);
+}
+
+struct vnodeops *
+sdev_get_vop(sdev_node_t *dv)
+{
+ char *path;
+ sdev_plugin_t *spp;
+
+ path = dv->sdev_path;
+ ASSERT(path);
+
+ /* gets the relative path to /dev/ */
+ path += 5;
+
+ if ((spp = sdev_match(dv)) != NULL) {
+ dv->sdev_flags |= spp->sp_nflags;
+ if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
+ (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
+ dv->sdev_flags |= SDEV_PERSIST;
+ return (spp->sp_vnops);
+ }
+
+ /* child inherits the persistence of the parent */
+ if (SDEV_IS_PERSIST(dv->sdev_dotdot))
+ dv->sdev_flags |= SDEV_PERSIST;
+ return (sdev_vnodeops);
+}
+
+void *
+sdev_get_vtor(sdev_node_t *dv)
+{
+ sdev_plugin_t *spp;
+
+ if (dv->sdev_private == NULL) {
+ spp = sdev_match(dv);
+ if (spp == NULL)
+ return (NULL);
+ } else {
+ spp = dv->sdev_private;
+ }
+
+ if (spp->sp_islegacy)
+ return ((void *)spp->sp_lvtor);
+ else
+ return ((void *)sdev_plugin_validate);
+}
+
+void
+sdev_plugin_nodeready(sdev_node_t *sdp)
+{
+ sdev_plugin_t *spp;
+
+ ASSERT(RW_WRITE_HELD(&sdp->sdev_contents));
+ ASSERT(sdp->sdev_private == NULL);
+
+ spp = sdev_match(sdp);
+ if (spp == NULL)
+ return;
+ if (spp->sp_islegacy)
+ return;
+ sdp->sdev_private = spp;
+ mutex_enter(&spp->sp_lock);
+ spp->sp_nnodes++;
+ mutex_exit(&spp->sp_lock);
+}
+
+int
+sdev_plugin_init(void)
+{
+ sdev_vop_table_t *vtp;
+ fs_operation_def_t *templ;
+
+ sdev_plugin_cache = kmem_cache_create("sdev_plugin",
+ sizeof (sdev_plugin_t), 0, sdev_plugin_cache_constructor,
+ sdev_plugin_cache_destructor, NULL, NULL, NULL, 0);
+ if (sdev_plugin_cache == NULL)
+ return (1);
+ mutex_init(&sdev_plugin_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&sdev_plugin_list, sizeof (sdev_plugin_t),
+ offsetof(sdev_plugin_t, sp_link));
+
+ /*
+ * Register all of the legacy vnops
+ */
+ for (vtp = &vtab[0]; vtp->vt_name != NULL; vtp++)
+ if (sdev_plugin_register_legacy(vtp) != 0)
+ return (1);
+
+ templ = sdev_merge_vtab(sdev_plugin_vnodeops_tbl);
+ if (vn_make_ops("sdev_plugin",
+ (const fs_operation_def_t *)templ,
+ &sdev_plugin_vnops) != 0) {
+ sdev_free_vtab(templ);
+ return (1);
+ }
+
+ sdev_free_vtab(templ);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c
index b4b27e6285..16439a66a6 100644
--- a/usr/src/uts/common/fs/dev/sdev_subr.c
+++ b/usr/src/uts/common/fs/dev/sdev_subr.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -150,12 +150,6 @@ vattr_t sdev_vattr_chr = {
kmem_cache_t *sdev_node_cache; /* sdev_node cache */
int devtype; /* fstype */
-/* static */
-static struct vnodeops *sdev_get_vop(struct sdev_node *);
-static void sdev_set_no_negcache(struct sdev_node *);
-static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
-static void sdev_free_vtab(fs_operation_def_t *);
-
static void
sdev_prof_free(struct sdev_node *dv)
{
@@ -318,6 +312,7 @@ sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
/* overwritten for VLNK nodes */
dv->sdev_symlink = NULL;
+ list_link_init(&dv->sdev_plist);
vp = SDEVTOV(dv);
vn_reinit(vp);
@@ -406,6 +401,7 @@ sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
} else {
dv->sdev_nlink = 1;
}
+ sdev_plugin_nodeready(dv);
if (!(SDEV_IS_GLOBAL(dv))) {
dv->sdev_origin = (struct sdev_node *)args;
@@ -502,37 +498,22 @@ sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
return (dv);
}
-/* directory dependent vop table */
-struct sdev_vop_table {
- char *vt_name; /* subdirectory name */
- const fs_operation_def_t *vt_service; /* vnodeops table */
- struct vnodeops *vt_vops; /* constructed vop */
- struct vnodeops **vt_global_vops; /* global container for vop */
- int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
- int vt_flags;
-};
-
-/*
- * A nice improvement would be to provide a plug-in mechanism
- * for this table instead of a const table.
- */
-static struct sdev_vop_table vtab[] =
-{
- { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
+struct sdev_vop_table vtab[] = {
+ { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
SDEV_DYNAMIC | SDEV_VTOR },
- { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
+ { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
SDEV_DYNAMIC | SDEV_VTOR },
- { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
+ { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
- { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
+ { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
- { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
- SDEV_DYNAMIC | SDEV_VTOR },
+ { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
+ SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
- { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
+ { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
/*
@@ -547,132 +528,14 @@ static struct sdev_vop_table vtab[] =
* preventing a mkdir.
*/
- { "lofi", NULL, NULL, NULL, NULL,
+ { "lofi", NULL, NULL, NULL,
SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
- { "rlofi", NULL, NULL, NULL, NULL,
+ { "rlofi", NULL, NULL, NULL,
SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
- { NULL, NULL, NULL, NULL, NULL, 0}
+ { NULL, NULL, NULL, NULL, 0}
};
-/*
- * We need to match off of the sdev_path, not the sdev_name. We are only allowed
- * to exist directly under /dev.
- */
-struct sdev_vop_table *
-sdev_match(struct sdev_node *dv)
-{
- int vlen;
- int i;
- const char *path;
-
- if (strlen(dv->sdev_path) <= 5)
- return (NULL);
-
- if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
- return (NULL);
- path = dv->sdev_path + 5;
-
- for (i = 0; vtab[i].vt_name; i++) {
- if (strcmp(vtab[i].vt_name, path) == 0)
- return (&vtab[i]);
- if (vtab[i].vt_flags & SDEV_SUBDIR) {
- vlen = strlen(vtab[i].vt_name);
- if ((strncmp(vtab[i].vt_name, path,
- vlen - 1) == 0) && path[vlen] == '/')
- return (&vtab[i]);
- }
-
- }
- return (NULL);
-}
-
-/*
- * sets a directory's vnodeops if the directory is in the vtab;
- */
-static struct vnodeops *
-sdev_get_vop(struct sdev_node *dv)
-{
- struct sdev_vop_table *vtp;
- char *path;
-
- path = dv->sdev_path;
- ASSERT(path);
-
- /* gets the relative path to /dev/ */
- path += 5;
-
- /* gets the vtab entry it matches */
- if ((vtp = sdev_match(dv)) != NULL) {
- dv->sdev_flags |= vtp->vt_flags;
- if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
- (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
- dv->sdev_flags |= SDEV_PERSIST;
-
- if (vtp->vt_vops) {
- if (vtp->vt_global_vops)
- *(vtp->vt_global_vops) = vtp->vt_vops;
-
- return (vtp->vt_vops);
- }
-
- if (vtp->vt_service) {
- fs_operation_def_t *templ;
- templ = sdev_merge_vtab(vtp->vt_service);
- if (vn_make_ops(vtp->vt_name,
- (const fs_operation_def_t *)templ,
- &vtp->vt_vops) != 0) {
- cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
- vtp->vt_name);
- /*NOTREACHED*/
- }
- if (vtp->vt_global_vops) {
- *(vtp->vt_global_vops) = vtp->vt_vops;
- }
- sdev_free_vtab(templ);
-
- return (vtp->vt_vops);
- }
-
- return (sdev_vnodeops);
- }
-
- /* child inherits the persistence of the parent */
- if (SDEV_IS_PERSIST(dv->sdev_dotdot))
- dv->sdev_flags |= SDEV_PERSIST;
-
- return (sdev_vnodeops);
-}
-
-static void
-sdev_set_no_negcache(struct sdev_node *dv)
-{
- int i;
- char *path;
-
- ASSERT(dv->sdev_path);
- path = dv->sdev_path + strlen("/dev/");
-
- for (i = 0; vtab[i].vt_name; i++) {
- if (strcmp(vtab[i].vt_name, path) == 0) {
- if (vtab[i].vt_flags & SDEV_NO_NCACHE)
- dv->sdev_flags |= SDEV_NO_NCACHE;
- break;
- }
- }
-}
-
-void *
-sdev_get_vtor(struct sdev_node *dv)
-{
- struct sdev_vop_table *vtp;
-
- vtp = sdev_match(dv);
- if (vtp)
- return ((void *)vtp->vt_vtor);
- else
- return (NULL);
-}
/*
* Build the base root inode
@@ -952,8 +815,11 @@ sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
dv->sdev_path = NULL;
}
- if (!SDEV_IS_GLOBAL(dv))
+ if (!SDEV_IS_GLOBAL(dv)) {
sdev_prof_free(dv);
+ if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
+ SDEV_RELE(dv->sdev_origin);
+ }
if (SDEVTOV(dv)->v_type == VDIR) {
ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
@@ -967,6 +833,7 @@ sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
(void) memset((void *)&dv->sdev_instance_data, 0,
sizeof (dv->sdev_instance_data));
vn_invalid(SDEVTOV(dv));
+ dv->sdev_private = NULL;
kmem_cache_free(sdev_node_cache, dv);
}
@@ -2948,46 +2815,6 @@ sdev_modctl_devexists(const char *path)
return (error);
}
-extern int sdev_vnodeops_tbl_size;
-
-/*
- * construct a new template with overrides from vtab
- */
-static fs_operation_def_t *
-sdev_merge_vtab(const fs_operation_def_t tab[])
-{
- fs_operation_def_t *new;
- const fs_operation_def_t *tab_entry;
-
- /* make a copy of standard vnode ops table */
- new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
- bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
-
- /* replace the overrides from tab */
- for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
- fs_operation_def_t *std_entry = new;
- while (std_entry->name) {
- if (strcmp(tab_entry->name, std_entry->name) == 0) {
- std_entry->func = tab_entry->func;
- break;
- }
- std_entry++;
- }
- if (std_entry->name == NULL)
- cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
- tab_entry->name);
- }
-
- return (new);
-}
-
-/* free memory allocated by sdev_merge_vtab */
-static void
-sdev_free_vtab(fs_operation_def_t *new)
-{
- kmem_free(new, sdev_vnodeops_tbl_size);
-}
-
/*
* a generic setattr() function
*
diff --git a/usr/src/uts/common/fs/dev/sdev_vfsops.c b/usr/src/uts/common/fs/dev/sdev_vfsops.c
index ea9cb6374a..6f32f47635 100644
--- a/usr/src/uts/common/fs/dev/sdev_vfsops.c
+++ b/usr/src/uts/common/fs/dev/sdev_vfsops.c
@@ -169,7 +169,13 @@ devinit(int fstype, char *name)
if ((devmajor = getudev()) == (major_t)-1) {
cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
- return (1);
+ return (ENXIO);
+ }
+
+ if (sdev_plugin_init() != 0) {
+ cmn_err(CE_WARN, "%s: failed to set init plugin subsystem",
+ sdev_vfssw.name);
+ return (EIO);
}
/* initialize negative cache */
@@ -332,6 +338,7 @@ sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
ASSERT(sdev_origins);
dv->sdev_flags &= ~SDEV_GLOBAL;
dv->sdev_origin = sdev_origins->sdev_root;
+ SDEV_HOLD(dv->sdev_origin);
} else {
sdev_ncache_setup();
rw_enter(&dv->sdev_contents, RW_WRITER);
@@ -504,3 +511,17 @@ sdev_mntinfo_rele(struct sdev_data *mntinfo)
SDEVTOV(mntinfo->sdev_root)->v_count--;
mutex_exit(&sdev_lock);
}
+
+void
+sdev_mnt_walk(void (*func)(struct sdev_node *, void *), void *arg)
+{
+ struct sdev_data *mntinfo;
+
+ mutex_enter(&sdev_lock);
+ mntinfo = sdev_mntinfo;
+ while (mntinfo != NULL) {
+ func(mntinfo->sdev_root, arg);
+ mntinfo = mntinfo->sdev_next;
+ }
+ mutex_exit(&sdev_lock);
+}
diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c
index 6e56000ffe..56204c6741 100644
--- a/usr/src/uts/common/fs/fifofs/fifosubr.c
+++ b/usr/src/uts/common/fs/fifofs/fifosubr.c
@@ -614,9 +614,12 @@ fifo_stropen(vnode_t **vpp, int flag, cred_t *crp, int dotwist, int lockheld)
/*
* The other end of the pipe is almost closed so
* reject any other open on this end of the pipe
- * This only happens with a pipe mounted under namefs
+ * This normally only happens with a pipe mounted under namefs, but
+ * we can also see an open via proc/fd, which should still succeed.
+ * To indicate the proc/fd case the FKLYR flag is passed.
*/
- if ((fnp->fn_flag & (FIFOCLOSE|ISPIPE)) == (FIFOCLOSE|ISPIPE)) {
+ if ((fnp->fn_flag & (FIFOCLOSE|ISPIPE)) == (FIFOCLOSE|ISPIPE) &&
+ (flag & FKLYR) == 0) {
fifo_cleanup(oldvp, flag);
cv_broadcast(&fnp->fn_wait_cv);
if (!lockheld)
diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c
index ac89e430c7..fee2924093 100644
--- a/usr/src/uts/common/fs/fifofs/fifovnops.c
+++ b/usr/src/uts/common/fs/fifofs/fifovnops.c
@@ -27,7 +27,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
/*
* FIFOFS file system vnode operations. This file system
@@ -1832,17 +1834,16 @@ fifo_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
}
/*
- * if we happened to get something, return
+ * if we happened to get something and we're not edge-triggered, return
*/
-
- if ((*reventsp = (short)retevents) != 0) {
+ if ((*reventsp = (short)retevents) != 0 && !(events & POLLET)) {
mutex_exit(&fnp->fn_lock->flk_lock);
return (0);
}
/*
- * If poll() has not found any events yet, set up event cell
- * to wake up the poll if a requested event occurs on this
+ * If poll() has not found any events yet or we're edge-triggered, set
+ * up event cell to wake up the poll if a requested event occurs on this
* pipe/fifo.
*/
if (!anyyet) {
diff --git a/usr/src/uts/common/fs/hsfs/hsfs_vnops.c b/usr/src/uts/common/fs/hsfs/hsfs_vnops.c
index 66d4b6cfef..5375f883ae 100644
--- a/usr/src/uts/common/fs/hsfs/hsfs_vnops.c
+++ b/usr/src/uts/common/fs/hsfs/hsfs_vnops.c
@@ -1585,8 +1585,7 @@ hsfs_getpage(
/* does not support write */
if (rw == S_WRITE) {
- panic("write attempt on READ ONLY HSFS");
- /*NOTREACHED*/
+ return (EROFS);
}
if (vp->v_flag & VNOMAP) {
@@ -1798,6 +1797,9 @@ hsfs_map(
if (vp->v_flag & VNOMAP)
return (ENOSYS);
+ if (prot & PROT_WRITE)
+ return (ENOSYS);
+
if (off > HS_MAXFILEOFF || off < 0 ||
(off + len) < 0 || (off + len) > HS_MAXFILEOFF)
return (ENXIO);
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c
new file mode 100644
index 0000000000..05ee2c6e09
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c
@@ -0,0 +1,640 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/policy.h>
+#include <sys/fs/hyprlofs_info.h>
+
+static int hldir_make_hlnode(hlnode_t *, hlfsmount_t *, vattr_t *, enum de_op,
+ vnode_t *, hlnode_t **, cred_t *);
+static int hldiraddentry(hlnode_t *, hlnode_t *, char *);
+
+
+#define HL_HASH_SIZE 8192 /* must be power of 2 */
+#define HL_MUTEX_SIZE 64
+
+static hldirent_t *hl_hashtable[HL_HASH_SIZE];
+static kmutex_t hl_hashmutex[HL_MUTEX_SIZE];
+
+#define HL_HASH_INDEX(a) ((a) & (HL_HASH_SIZE-1))
+#define HL_MUTEX_INDEX(a) ((a) & (HL_MUTEX_SIZE-1))
+
+#define HYPRLOFS_HASH(tp, name, hash) \
+ { \
+ char Xc, *Xcp; \
+ hash = (uint_t)(uintptr_t)(tp) >> 8; \
+ for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
+ hash = (hash << 4) + hash + (uint_t)Xc; \
+ }
+
+void
+hyprlofs_hash_init(void)
+{
+ int ix;
+
+ for (ix = 0; ix < HL_MUTEX_SIZE; ix++)
+ mutex_init(&hl_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
+}
+
+static void
+hyprlofs_hash_in(hldirent_t *h)
+{
+ uint_t hash;
+ hldirent_t **prevpp;
+ kmutex_t *hmtx;
+
+ HYPRLOFS_HASH(h->hld_parent, h->hld_name, hash);
+ h->hld_hash = hash;
+ prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ h->hld_link = *prevpp;
+ *prevpp = h;
+ mutex_exit(hmtx);
+}
+
+/* Remove hldirent *h from the hash list. */
+static void
+hyprlofs_hash_out(hldirent_t *h)
+{
+ uint_t hash;
+ hldirent_t **prevpp;
+ kmutex_t *hmtx;
+
+ hash = h->hld_hash;
+ prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ while (*prevpp != h)
+ prevpp = &(*prevpp)->hld_link;
+ *prevpp = h->hld_link;
+ mutex_exit(hmtx);
+}
+
+static hldirent_t *
+hyprlofs_hash_lookup(char *name, hlnode_t *parent, uint_t hold,
+ hlnode_t **found)
+{
+ hldirent_t *l;
+ uint_t hash;
+ kmutex_t *hmtx;
+ hlnode_t *hnp;
+
+ HYPRLOFS_HASH(parent, name, hash);
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ l = hl_hashtable[HL_HASH_INDEX(hash)];
+ while (l) {
+ if (l->hld_hash == hash && l->hld_parent == parent &&
+ strcmp(l->hld_name, name) == 0) {
+ /*
+ * Ensure that the hlnode that we put a hold on is the
+ * same one that we pass back. Thus the temp. var
+ * hnp is necessary.
+ */
+ hnp = l->hld_hlnode;
+ if (hold) {
+ ASSERT(hnp);
+ hlnode_hold(hnp);
+ }
+ if (found)
+ *found = hnp;
+ mutex_exit(hmtx);
+ return (l);
+ } else {
+ l = l->hld_link;
+ }
+ }
+ mutex_exit(hmtx);
+ return (NULL);
+}
+
+/*
+ * Search directory 'parent' for entry 'name'.
+ *
+ * The calling thread can't hold the write version of the rwlock for the
+ * directory being searched
+ *
+ * On success *foundtp points to the found hlnode with its vnode held.
+ */
+int
+hyprlofs_dirlookup(hlnode_t *parent, char *name, hlnode_t **foundtp, cred_t *cr)
+{
+ int error;
+
+ *foundtp = NULL;
+ if (parent->hln_type != VDIR)
+ return (ENOTDIR);
+
+ if ((error = hyprlofs_taccess(parent, VEXEC, cr)))
+ return (error);
+
+ if (*name == '\0') {
+ hlnode_hold(parent);
+ *foundtp = parent;
+ return (0);
+ }
+
+ /*
+ * Search the directory for the matching name. We need the lock
+ * protecting the hln_dir list so that it doesn't change out from
+ * underneath us. hyprlofs_hash_lookup() will pass back the hlnode
+ * with a hold on it.
+ */
+ if (hyprlofs_hash_lookup(name, parent, 1, foundtp) != NULL) {
+ ASSERT(*foundtp);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+/*
+ * Enter a directory entry (either a file or subdir, depending on op) for
+ * 'name' and 'hp' into directory 'dir'
+ */
+int
+hyprlofs_direnter(
+ hlfsmount_t *hm,
+ hlnode_t *dir, /* target directory to make entry in */
+ char *name, /* name of entry */
+ enum de_op op, /* entry operation */
+ vnode_t *realvp, /* real vnode */
+ vattr_t *va,
+ hlnode_t **hpp, /* return hlnode */
+ cred_t *cr)
+{
+ hldirent_t *hdp;
+ hlnode_t *found = NULL;
+ hlnode_t *hp;
+ int error = 0;
+ char *s;
+
+ /* hln_rwlock is held to serialize direnter and dirdeletes */
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ /* Don't allow '/' characters in pathname component */
+ for (s = name; *s; s++)
+ if (*s == '/')
+ return (EACCES);
+
+ if (name[0] == '\0')
+ panic("hyprlofs_direnter: NULL name");
+
+ /*
+ * This might be a "dangling detached directory". It could have been
+ * removed, but a reference to it kept in u_cwd. Don't bother searching
+ * it, and with any luck the user will get tired of dealing with us and
+ * cd to some absolute pathway. This is in ufs, too.
+ */
+ if (dir->hln_nlink == 0) {
+ return (ENOENT);
+ }
+
+ /* Search for the entry. Return "found" if it exists. */
+ hdp = hyprlofs_hash_lookup(name, dir, 1, &found);
+
+ if (hdp) {
+ ASSERT(found);
+ switch (op) {
+ case DE_CREATE:
+ case DE_MKDIR:
+ if (hpp) {
+ *hpp = found;
+ error = EEXIST;
+ } else {
+ hlnode_rele(found);
+ }
+ break;
+ }
+ } else {
+
+ /*
+ * The entry does not exist. Check write perms in dir to see if
+ * entry can be created.
+ */
+ if ((error = hyprlofs_taccess(dir, VWRITE, cr)))
+ return (error);
+
+ /* Make new hlnode and directory entry as required. */
+ if ((error = hldir_make_hlnode(dir, hm, va, op, realvp, &hp,
+ cr)))
+ return (error);
+
+ if ((error = hldiraddentry(dir, hp, name))) {
+ /* Unmake the inode we just made. */
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ if ((hp->hln_type) == VDIR) {
+ ASSERT(hdp == NULL);
+ /* cleanup allocs made by hyprlofs_dirinit() */
+ hyprlofs_dirtrunc(hp);
+ }
+ mutex_enter(&hp->hln_tlock);
+ hp->hln_nlink = 0;
+ mutex_exit(&hp->hln_tlock);
+ gethrestime(&hp->hln_ctime);
+ rw_exit(&hp->hln_rwlock);
+ hlnode_rele(hp);
+ hp = NULL;
+ } else if (hpp) {
+ *hpp = hp;
+ } else {
+ hlnode_rele(hp);
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Delete entry hp of name "nm" from dir. Free dir entry space and decrement
+ * link count on hlnode(s).
+ */
+int
+hyprlofs_dirdelete(hlnode_t *dir, hlnode_t *hp, char *nm, enum dr_op op,
+ cred_t *cr)
+{
+ hldirent_t *hpdp;
+ int error;
+ size_t namelen;
+ hlnode_t *hnp;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(RW_WRITE_HELD(&hp->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ if (nm[0] == '\0')
+ panic("hyprlofs_dirdelete: NULL name for %p", (void *)hp);
+
+ /* return error if removing . or .. */
+ if (nm[0] == '.') {
+ if (nm[1] == '\0')
+ return (EINVAL);
+ if (nm[1] == '.' && nm[2] == '\0')
+ return (EEXIST); /* thus in ufs */
+ }
+
+ if ((error = hyprlofs_taccess(dir, VEXEC|VWRITE, cr)) != 0)
+ return (error);
+
+ if (dir->hln_dir == NULL)
+ return (ENOENT);
+
+ hpdp = hyprlofs_hash_lookup(nm, dir, 0, &hnp);
+ if (hpdp == NULL) {
+ /*
+ * If it is gone, some other thread got here first!
+ * Return error ENOENT.
+ */
+ return (ENOENT);
+ }
+
+ /*
+ * If the hlnode in the hldirent changed (shouldn't happen since we
+ * don't support rename) then original is gone, so return that status
+ * (same as UFS).
+ */
+ if (hp != hnp)
+ return (ENOENT);
+
+ hyprlofs_hash_out(hpdp);
+
+ /* Take hpdp out of the directory list. */
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+ if (hpdp->hld_prev) {
+ hpdp->hld_prev->hld_next = hpdp->hld_next;
+ }
+ if (hpdp->hld_next) {
+ hpdp->hld_next->hld_prev = hpdp->hld_prev;
+ }
+
+ /*
+ * If the roving slot pointer happens to match hpdp, point it at the
+ * previous dirent.
+ */
+ if (dir->hln_dir->hld_prev == hpdp) {
+ dir->hln_dir->hld_prev = hpdp->hld_prev;
+ }
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ /* hpdp points to the correct directory entry */
+ namelen = strlen(hpdp->hld_name) + 1;
+
+ kmem_free(hpdp, sizeof (hldirent_t) + namelen);
+ dir->hln_size -= (sizeof (hldirent_t) + namelen);
+ dir->hln_dirents--;
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+ hp->hln_ctime = now;
+
+ ASSERT(hp->hln_nlink > 0);
+ DECR_COUNT(&hp->hln_nlink, &hp->hln_tlock);
+ if (op == DR_RMDIR && hp->hln_type == VDIR) {
+ hyprlofs_dirtrunc(hp);
+ ASSERT(hp->hln_nlink == 0);
+ }
+ return (0);
+}
+
+/*
+ * hyprlofs_dirinit initializes a dir with '.' and '..' entries without
+ * checking perms and locking
+ */
+void
+hyprlofs_dirinit(
+ hlnode_t *parent, /* parent of directory to initialize */
+ hlnode_t *dir) /* the new directory */
+{
+ hldirent_t *dot, *dotdot;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&parent->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ dot = kmem_zalloc(sizeof (hldirent_t) + 2, KM_SLEEP);
+ dotdot = kmem_zalloc(sizeof (hldirent_t) + 3, KM_SLEEP);
+
+ /* Initialize the entries */
+ dot->hld_hlnode = dir;
+ dot->hld_offset = 0;
+ dot->hld_name = (char *)dot + sizeof (hldirent_t);
+ dot->hld_name[0] = '.';
+ dot->hld_parent = dir;
+ hyprlofs_hash_in(dot);
+
+ dotdot->hld_hlnode = parent;
+ dotdot->hld_offset = 1;
+ dotdot->hld_name = (char *)dotdot + sizeof (hldirent_t);
+ dotdot->hld_name[0] = '.';
+ dotdot->hld_name[1] = '.';
+ dotdot->hld_parent = dir;
+ hyprlofs_hash_in(dotdot);
+
+ /* Initialize directory entry list. */
+ dot->hld_next = dotdot;
+ dot->hld_prev = dotdot;
+ dotdot->hld_next = NULL;
+ dotdot->hld_prev = dot;
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ /*
+ * Since hyprlofs_dirinit is called with both dir and parent being the
+ * same for the root vnode, we need to increment this before we set
+ * hln_nlink = 2 below.
+ */
+ INCR_COUNT(&parent->hln_nlink, &parent->hln_tlock);
+ parent->hln_ctime = now;
+
+ dir->hln_dir = dot;
+ dir->hln_size = 2 * sizeof (hldirent_t) + 5; /* dot and dotdot */
+ dir->hln_dirents = 2;
+ dir->hln_nlink = 2;
+}
+
+
+/*
+ * hyprlofs_dirtrunc removes all dir entries under this dir.
+ */
+void
+hyprlofs_dirtrunc(hlnode_t *dir)
+{
+ hldirent_t *hdp;
+ hlnode_t *tp;
+ size_t namelen;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ if (dir->hln_looped)
+ return;
+
+ for (hdp = dir->hln_dir; hdp; hdp = dir->hln_dir) {
+ ASSERT(hdp->hld_next != hdp);
+ ASSERT(hdp->hld_prev != hdp);
+ ASSERT(hdp->hld_hlnode);
+
+ dir->hln_dir = hdp->hld_next;
+ namelen = strlen(hdp->hld_name) + 1;
+
+ /*
+ * Adjust the link counts to account for this dir entry removal.
+ */
+ tp = hdp->hld_hlnode;
+
+ ASSERT(tp->hln_nlink > 0);
+ DECR_COUNT(&tp->hln_nlink, &tp->hln_tlock);
+
+ hyprlofs_hash_out(hdp);
+
+ kmem_free(hdp, sizeof (hldirent_t) + namelen);
+ dir->hln_size -= (sizeof (hldirent_t) + namelen);
+ dir->hln_dirents--;
+ }
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ ASSERT(dir->hln_dir == NULL);
+ ASSERT(dir->hln_size == 0);
+ ASSERT(dir->hln_dirents == 0);
+}
+
+static int
+hldiraddentry(
+ hlnode_t *dir, /* target directory to make entry in */
+ hlnode_t *hp, /* new hlnode */
+ char *name)
+{
+ hldirent_t *hdp, *hpdp;
+ size_t namelen, alloc_size;
+ timestruc_t now;
+
+ /*
+ * Make sure the parent dir wasn't removed from underneath the caller.
+ */
+ if (dir->hln_dir == NULL)
+ return (ENOENT);
+
+ /* Check that everything is on the same FS. */
+ if (hp->hln_vnode->v_vfsp != dir->hln_vnode->v_vfsp)
+ return (EXDEV);
+
+ /* Alloc and init dir entry */
+ namelen = strlen(name) + 1;
+ alloc_size = namelen + sizeof (hldirent_t);
+ hdp = kmem_zalloc(alloc_size, KM_NORMALPRI | KM_NOSLEEP);
+ if (hdp == NULL)
+ return (ENOSPC);
+
+ dir->hln_size += alloc_size;
+ dir->hln_dirents++;
+ hdp->hld_hlnode = hp;
+ hdp->hld_parent = dir;
+
+ /* The dir entry and its name were allocated sequentially. */
+ hdp->hld_name = (char *)hdp + sizeof (hldirent_t);
+ (void) strcpy(hdp->hld_name, name);
+
+ hyprlofs_hash_in(hdp);
+
+ /*
+ * Some utilities expect the size of a directory to remain fairly
+ * static. For example, a routine which unlinks files between calls to
+ * readdir(); the size of the dir changes from underneath it and so the
+ * real dir offset in bytes is invalid. To circumvent this problem, we
+ * initialize a dir entry with a phony offset, and use this offset to
+ * determine end of file in hyprlofs_readdir.
+ */
+ hpdp = dir->hln_dir->hld_prev;
+ /*
+ * Install at first empty "slot" in directory list.
+ */
+ while (hpdp->hld_next != NULL && (hpdp->hld_next->hld_offset -
+ hpdp->hld_offset) <= 1) {
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+ ASSERT(hpdp->hld_next->hld_offset > hpdp->hld_offset);
+ hpdp = hpdp->hld_next;
+ }
+ hdp->hld_offset = hpdp->hld_offset + 1;
+
+ /*
+ * If we're at the end of the dirent list and the offset (which is
+ * necessarily the largest offset in this dir) is more than twice the
+ * number of dirents, that means the dir is 50% holes. At this point
+ * we reset the slot pointer back to the beginning of the dir so we
+ * start using the holes. The idea is that if there are N dirents,
+ * there must also be N holes, so we can satisfy the next N creates by
+ * walking at most 2N entries; thus the average cost of a create is
+ * constant. Note that we use the first dirent's hld_prev as the roving
+ * slot pointer. This saves a word in every dirent.
+ */
+ if (hpdp->hld_next == NULL && hpdp->hld_offset > 2 * dir->hln_dirents)
+ dir->hln_dir->hld_prev = dir->hln_dir->hld_next;
+ else
+ dir->hln_dir->hld_prev = hdp;
+
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ hdp->hld_next = hpdp->hld_next;
+ if (hdp->hld_next) {
+ hdp->hld_next->hld_prev = hdp;
+ }
+ hdp->hld_prev = hpdp;
+ hpdp->hld_next = hdp;
+
+ ASSERT(hdp->hld_next != hdp);
+ ASSERT(hdp->hld_prev != hdp);
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ return (0);
+}
+
+static int
+hldir_make_hlnode(hlnode_t *dir, hlfsmount_t *hm, vattr_t *va, enum de_op op,
+ vnode_t *realvp, hlnode_t **newnode, cred_t *cr)
+{
+ hlnode_t *hp;
+ enum vtype type;
+
+ ASSERT(va != NULL);
+ ASSERT(op == DE_CREATE || op == DE_MKDIR);
+ if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
+ ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
+ return (EOVERFLOW);
+ type = va->va_type;
+ hp = kmem_zalloc(sizeof (hlnode_t), KM_SLEEP);
+ hyprlofs_node_init(hm, hp, va, cr);
+
+ hp->hln_vnode->v_rdev = hp->hln_rdev = NODEV;
+ hp->hln_vnode->v_type = type;
+ hp->hln_uid = crgetuid(cr);
+
+ /*
+ * To determine the gid of the created file:
+ * If the directory's set-gid bit is set, set the gid to the gid
+ * of the parent dir, otherwise, use the process's gid.
+ */
+ if (dir->hln_mode & VSGID)
+ hp->hln_gid = dir->hln_gid;
+ else
+ hp->hln_gid = crgetgid(cr);
+
+ /*
+ * If we're creating a dir and the parent dir has the set-GID bit set,
+ * set it on the new dir. Otherwise, if the user is neither privileged
+ * nor a member of the file's new group, clear the file's set-GID bit.
+ */
+ if (dir->hln_mode & VSGID && type == VDIR)
+ hp->hln_mode |= VSGID;
+ else {
+ if ((hp->hln_mode & VSGID) &&
+ secpolicy_vnode_setids_setgids(cr, hp->hln_gid) != 0)
+ hp->hln_mode &= ~VSGID;
+ }
+
+ if (va->va_mask & AT_ATIME)
+ hp->hln_atime = va->va_atime;
+ if (va->va_mask & AT_MTIME)
+ hp->hln_mtime = va->va_mtime;
+
+ if (op == DE_MKDIR) {
+ hyprlofs_dirinit(dir, hp);
+ hp->hln_looped = 0;
+ } else {
+ hp->hln_realvp = realvp;
+ hp->hln_size = va->va_size;
+ hp->hln_looped = 1;
+ }
+
+ *newnode = hp;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c
new file mode 100644
index 0000000000..1d857309f3
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/t_lock.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/time.h>
+#include <sys/cmn_err.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/vfs.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/atomic.h>
+#include <sys/policy.h>
+#include <sys/fs/hyprlofs_info.h>
+
+#define MODESHIFT 3
+
+/* Initialize a hlnode and add it to file list under mount point. */
+void
+hyprlofs_node_init(hlfsmount_t *hm, hlnode_t *h, vattr_t *vap, cred_t *cr)
+{
+ vnode_t *vp;
+ timestruc_t now;
+
+ ASSERT(vap != NULL);
+
+ rw_init(&h->hln_rwlock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&h->hln_tlock, NULL, MUTEX_DEFAULT, NULL);
+ h->hln_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ h->hln_mask = 0;
+ h->hln_type = vap->va_type;
+ h->hln_nodeid = (ino64_t)(uint32_t)((uintptr_t)h >> 3);
+ h->hln_nlink = 1;
+ h->hln_size = 0;
+
+ if (cr == NULL) {
+ h->hln_uid = vap->va_uid;
+ h->hln_gid = vap->va_gid;
+ } else {
+ h->hln_uid = crgetuid(cr);
+ h->hln_gid = crgetgid(cr);
+ }
+
+ h->hln_fsid = hm->hlm_dev;
+ h->hln_rdev = vap->va_rdev;
+ h->hln_blksize = PAGESIZE;
+ h->hln_nblocks = 0;
+ gethrestime(&now);
+ h->hln_atime = now;
+ h->hln_mtime = now;
+ h->hln_ctime = now;
+ h->hln_seq = 0;
+ h->hln_dir = NULL;
+
+ h->hln_vnode = vn_alloc(KM_SLEEP);
+ vp = HLNTOV(h);
+ vn_setops(vp, hyprlofs_vnodeops);
+ vp->v_vfsp = hm->hlm_vfsp;
+ vp->v_type = vap->va_type;
+ vp->v_rdev = vap->va_rdev;
+ vp->v_data = (caddr_t)h;
+ mutex_enter(&hm->hlm_contents);
+ /*
+ * Increment the pseudo generation number for this hlnode. Since
+ * hlnodes are allocated and freed, there really is no particular
+ * generation number for a new hlnode. Just fake it by using a
+ * counter in each file system.
+ */
+ h->hln_gen = hm->hlm_gen++;
+
+ /*
+ * Add new hlnode to end of linked list of hlnodes for this hyprlofs
+ * Root dir is handled specially in hyprlofs_mount.
+ */
+ if (hm->hlm_rootnode != (hlnode_t *)NULL) {
+ h->hln_forw = NULL;
+ h->hln_back = hm->hlm_rootnode->hln_back;
+ h->hln_back->hln_forw = hm->hlm_rootnode->hln_back = h;
+ }
+ mutex_exit(&hm->hlm_contents);
+ vn_exists(vp);
+}
+
+int
+hyprlofs_taccess(void *vtp, int mode, cred_t *cr)
+{
+ hlnode_t *hp = vtp;
+ int shift = 0;
+
+ /* Check access based on owner, group and public perms in hlnode. */
+ if (crgetuid(cr) != hp->hln_uid) {
+ shift += MODESHIFT;
+ if (groupmember(hp->hln_gid, cr) == 0)
+ shift += MODESHIFT;
+ }
+
+ return (secpolicy_vnode_access2(cr, HLNTOV(hp), hp->hln_uid,
+ hp->hln_mode << shift, mode));
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c
new file mode 100644
index 0000000000..c582a8cac2
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c
@@ -0,0 +1,614 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Hyperlofs is a hybrid file system combining features of the tmpfs(7FS) and
+ * lofs(7FS) file systems. It is modeled on code from both of these file
+ * systems.
+ *
+ * The purpose is to create a high performance name space for files on which
+ * applications will compute. Given a large number of data files with various
+ * owners, we want to construct a view onto those files such that only a subset
+ * is visible to the applications and such that the view can be changed very
+ * quickly as compute progresses. Entries in the name space are not mounts and
+ * thus do not appear in the mnttab. Entries in the name space are allowed to
+ * refer to files on different backing file systems. Intermediate directories
+ * in the name space exist only in-memory, ala tmpfs. There are no leaf nodes
+ * in the name space except for entries that refer to backing files ala lofs.
+ *
+ * The name space is managed via ioctls issued on the mounted file system and
+ * is mostly read-only for the compute applications. That is, applications
+ * cannot create new files in the name space. If a file is unlinked by an
+ * application, that only removes the file from the name space, the backing
+ * file remains in place. It is possible for applications to write-through to
+ * the backing files if the file system is mounted read-write.
+ *
+ * The name space is managed via the HYPRLOFS_ADD_ENTRIES, HYPRLOFS_RM_ENTRIES,
+ * and HYPRLOFS_RM_ALL ioctls on the top-level mount.
+ *
+ * The HYPRLOFS_ADD_ENTRIES ioctl specifies path(s) to the backing file(s) and
+ * the name(s) for the file(s) in the name space. The name(s) may be path(s)
+ * which will be relative to the root of the mount and thus cannot begin with
+ * a /. If the name is a path, it does not have to correspond to any backing
+ * path. The intermediate directories will only exist in the name space. The
+ * entry(ies) will be added to the name space.
+ *
+ * The HYPRLOFS_RM_ENTRIES ioctl specifies the name(s) of the file(s) in the
+ * name space which should be removed. The name(s) may be path(s) which will
+ * be relative to the root of the mount and thus cannot begin with a /. The
+ * named entry(ies) will be removed.
+ *
+ * The HYPRLOFS_RM_ALL ioctl will remove all mappings from the name space.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/time.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <sys/debug.h>
+#include <sys/systm.h>
+#include <sys/mntent.h>
+#include <fs/fs_subr.h>
+#include <vm/page.h>
+#include <vm/anon.h>
+#include <sys/model.h>
+#include <sys/policy.h>
+
+#include <sys/fs/swapnode.h>
+#include <sys/fs/hyprlofs_info.h>
+
+static int hyprlofsfstype;
+
+/*
+ * hyprlofs vfs operations.
+ */
+static int hyprlofsinit(int, char *);
+static int hyprlofs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
+static int hyprlofs_unmount(vfs_t *, int, cred_t *);
+static int hyprlofs_root(vfs_t *, vnode_t **);
+static int hyprlofs_statvfs(vfs_t *, struct statvfs64 *);
+static int hyprlofs_vget(vfs_t *, vnode_t **, struct fid *);
+
+/*
+ * Loadable module wrapper
+ */
+#include <sys/modctl.h>
+
+static mntopts_t hyprlofs_mntopts;
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "hyprlofs",
+ hyprlofsinit,
+ VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
+ &hyprlofs_mntopts
+};
+
+static mntopts_t hyprlofs_mntopts = {
+ 0, NULL
+};
+
+/*
+ * Module linkage information
+ */
+static struct modlfs modlfs = {
+ &mod_fsops, "filesystem for hyprlofs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlfs, NULL
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ int error;
+
+ error = mod_remove(&modlinkage);
+ if (error)
+ return (error);
+ /*
+ * Tear down the operations vectors
+ */
+ (void) vfs_freevfsops_by_type(hyprlofsfstype);
+ vn_freevnodeops(hyprlofs_vnodeops);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * The following are patchable variables limiting the amount of system
+ * resources hyprlofs can use.
+ *
+ * hyprlofs_maxkmem limits the amount of kernel kmem_alloc memory hyprlofs can
+ * use for it's data structures (e.g. hlnodes, directory entries). It is set
+ * as a percentage of physical memory which is determined when hyprlofs is
+ * first used in the system.
+ *
+ * hyprlofs_minfree is the minimum amount of swap space that hyprlofs leaves for
+ * the rest of the system. If the amount of free swap space in the system
+ * (i.e. anoninfo.ani_free) drops below hyprlofs_minfree, hyprlofs anon
+ * allocations will fail.
+ */
+size_t hyprlofs_maxkmem = 0;
+size_t hyprlofs_minfree = 0;
+size_t hyprlofs_kmemspace; /* bytes of kernel heap used by all hyprlofs */
+
+static major_t hyprlofs_major;
+static minor_t hyprlofs_minor;
+static kmutex_t hyprlofs_minor_lock;
+
+/*
+ * initialize global hyprlofs locks and hashes when loading hyprlofs module
+ */
+static int
+hyprlofsinit(int fstype, char *name)
+{
+ static const fs_operation_def_t hl_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = hyprlofs_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = hyprlofs_unmount },
+ VFSNAME_ROOT, { .vfs_root = hyprlofs_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = hyprlofs_statvfs },
+ VFSNAME_VGET, { .vfs_vget = hyprlofs_vget },
+ NULL, NULL
+ };
+ int error;
+ extern void hyprlofs_hash_init();
+
+ hyprlofs_hash_init();
+ hyprlofsfstype = fstype;
+ ASSERT(hyprlofsfstype != 0);
+
+ error = vfs_setfsops(fstype, hl_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "hyprlofsinit: bad vfs ops template");
+ return (error);
+ }
+
+ error = vn_make_ops(name, hyprlofs_vnodeops_template,
+ &hyprlofs_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "hyprlofsinit: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * hyprlofs_minfree is an absolute limit of swap space which still
+ * allows other processes to execute. Set it if its not patched.
+ */
+ if (hyprlofs_minfree == 0)
+ hyprlofs_minfree = btopr(HYPRLOFSMINFREE);
+
+ if ((hyprlofs_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN,
+ "hyprlofsinit: Can't get unique device number.");
+ hyprlofs_major = 0;
+ }
+ mutex_init(&hyprlofs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
+ return (0);
+}
+
+static int
+hyprlofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ hlfsmount_t *hm = NULL;
+ hlnode_t *hp;
+ struct pathname dpn;
+ int error;
+ vattr_t rattr;
+ int got_attrs;
+
+ if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (error);
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (uap->flags & MS_REMOUNT)
+ return (EBUSY);
+
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /* Having the resource be anything but "swap" doesn't make sense. */
+ vfs_setresource(vfsp, "swap", 0);
+
+ if ((error = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE,
+ &dpn)) != 0)
+ goto out;
+
+ if ((hm = kmem_zalloc(sizeof (hlfsmount_t),
+ KM_NORMALPRI | KM_NOSLEEP)) == NULL) {
+ pn_free(&dpn);
+ error = ENOMEM;
+ goto out;
+ }
+
+ /* Get an available minor device number for this mount */
+ mutex_enter(&hyprlofs_minor_lock);
+ do {
+ hyprlofs_minor = (hyprlofs_minor + 1) & L_MAXMIN32;
+ hm->hlm_dev = makedevice(hyprlofs_major, hyprlofs_minor);
+ } while (vfs_devismounted(hm->hlm_dev));
+ mutex_exit(&hyprlofs_minor_lock);
+
+ /*
+ * Set but don't bother entering the mutex since hlfsmount is not on
+ * the mount list yet.
+ */
+ mutex_init(&hm->hlm_contents, NULL, MUTEX_DEFAULT, NULL);
+
+ hm->hlm_vfsp = vfsp;
+
+ vfsp->vfs_data = (caddr_t)hm;
+ vfsp->vfs_fstype = hyprlofsfstype;
+ vfsp->vfs_dev = hm->hlm_dev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_NOTRUNC;
+ vfs_make_fsid(&vfsp->vfs_fsid, hm->hlm_dev, hyprlofsfstype);
+ hm->hlm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ (void) strcpy(hm->hlm_mntpath, dpn.pn_path);
+
+ /* allocate and initialize root hlnode structure */
+ bzero(&rattr, sizeof (vattr_t));
+ rattr.va_mode = (mode_t)(S_IFDIR | 0777);
+ rattr.va_type = VDIR;
+ rattr.va_rdev = 0;
+ hp = kmem_zalloc(sizeof (hlnode_t), KM_SLEEP);
+ hyprlofs_node_init(hm, hp, &rattr, cr);
+
+ /* Get the mode, uid, and gid from the underlying mount point. */
+ rattr.va_mask = AT_MODE|AT_UID|AT_GID;
+ got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
+
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ HLNTOV(hp)->v_flag |= VROOT;
+
+ /*
+ * If the getattr succeeded, use its results, otherwise allow the
+ * previously set defaults to prevail.
+ */
+ if (got_attrs == 0) {
+ hp->hln_mode = rattr.va_mode;
+ hp->hln_uid = rattr.va_uid;
+ hp->hln_gid = rattr.va_gid;
+ }
+
+ /*
+ * Initialize linked list of hlnodes so that the back pointer of the
+ * root hlnode always points to the last one on the list and the
+ * forward pointer of the last node is null
+ */
+ hp->hln_back = hp;
+ hp->hln_forw = NULL;
+ hp->hln_nlink = 0;
+ hm->hlm_rootnode = hp;
+
+ hyprlofs_dirinit(hp, hp);
+
+ rw_exit(&hp->hln_rwlock);
+
+ pn_free(&dpn);
+ error = 0;
+
+out:
+ return (error);
+}
+
+static int
+hyprlofs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hnp, *cancel;
+ vnode_t *vp;
+ int error;
+
+ if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (error);
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ /*
+ * forced unmount is not supported by this file system
+ * and thus, ENOTSUP, is being returned.
+ */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ mutex_enter(&hm->hlm_contents);
+
+ /*
+ * If there are no open files, only the root node should have a ref cnt.
+ * With hlm_contents held, nothing can be added or removed. There may
+ * be some dirty pages. To prevent fsflush from disrupting the unmount,
+ * put a hold on each node while scanning. If we find a previously
+ * referenced node, undo the holds we have placed and fail EBUSY.
+ */
+ hnp = hm->hlm_rootnode;
+ if (HLNTOV(hnp)->v_count > 1) {
+ mutex_exit(&hm->hlm_contents);
+ return (EBUSY);
+ }
+
+ for (hnp = hnp->hln_forw; hnp; hnp = hnp->hln_forw) {
+ if ((vp = HLNTOV(hnp))->v_count > 0) {
+ cancel = hm->hlm_rootnode->hln_forw;
+ while (cancel != hnp) {
+ vp = HLNTOV(cancel);
+ ASSERT(vp->v_count > 0);
+ VN_RELE(vp);
+ cancel = cancel->hln_forw;
+ }
+ mutex_exit(&hm->hlm_contents);
+ return (EBUSY);
+ }
+ VN_HOLD(vp);
+ }
+
+ /* We can drop the mutex now because no one can find this mount */
+ mutex_exit(&hm->hlm_contents);
+
+ /*
+ * Free all alloc'd memory associated with this FS. To do this, we go
+ * through the file list twice, once to remove all the dir entries, and
+ * then to remove all the files.
+ */
+
+ /* Remove all directory entries */
+ for (hnp = hm->hlm_rootnode; hnp; hnp = hnp->hln_forw) {
+ rw_enter(&hnp->hln_rwlock, RW_WRITER);
+ if (hnp->hln_type == VDIR)
+ hyprlofs_dirtrunc(hnp);
+ rw_exit(&hnp->hln_rwlock);
+ }
+
+ ASSERT(hm->hlm_rootnode);
+
+ /*
+ * All links are gone, v_count is keeping nodes in place. VN_RELE
+ * should make the node disappear, unless somebody is holding pages
+ * against it. Wait and retry until it disappears.
+ *
+ * We re-acquire the lock to prevent others who have a HOLD on a hlnode
+ * from blowing it away (in hyprlofs_inactive) while we're trying to
+ * get to it here. Once we have a HOLD on it we know it'll stick around.
+ */
+ mutex_enter(&hm->hlm_contents);
+
+ /* Remove all the files (except the rootnode) backwards. */
+ while ((hnp = hm->hlm_rootnode->hln_back) != hm->hlm_rootnode) {
+ mutex_exit(&hm->hlm_contents);
+ /* Note we handled the link count in pass 2 above. */
+ vp = HLNTOV(hnp);
+ VN_RELE(vp);
+ mutex_enter(&hm->hlm_contents);
+ /*
+ * It's still there after the RELE. Someone else like pageout
+ * has a hold on it so wait a bit and then try again.
+ */
+ if (hnp == hm->hlm_rootnode->hln_back) {
+ VN_HOLD(vp);
+ mutex_exit(&hm->hlm_contents);
+ delay(hz / 4);
+ mutex_enter(&hm->hlm_contents);
+ }
+ }
+ mutex_exit(&hm->hlm_contents);
+
+ VN_RELE(HLNTOV(hm->hlm_rootnode));
+
+ ASSERT(hm->hlm_mntpath);
+
+ kmem_free(hm->hlm_mntpath, strlen(hm->hlm_mntpath) + 1);
+
+ mutex_destroy(&hm->hlm_contents);
+ kmem_free(hm, sizeof (hlfsmount_t));
+
+ return (0);
+}
+
+/* Return root hlnode for given vnode */
+static int
+hyprlofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hp = hm->hlm_rootnode;
+ vnode_t *vp;
+
+ ASSERT(hp);
+
+ vp = HLNTOV(hp);
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+hyprlofs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ ulong_t blocks;
+ dev32_t d32;
+ zoneid_t eff_zid;
+ struct zone *zp;
+
+ /*
+ * The FS may have been mounted by the GZ on behalf of the NGZ. In
+ * that case, the hlfsmount zone_id will be the global zone. We want
+ * to show the swap cap inside the zone in this case, even though the
+ * FS was mounted by the GZ.
+ */
+ if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
+ zp = curproc->p_zone;
+ else
+ zp = hm->hlm_vfsp->vfs_zone;
+
+ if (zp == NULL)
+ eff_zid = GLOBAL_ZONEUNIQID;
+ else
+ eff_zid = zp->zone_id;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ /*
+ * Find the amount of available physical and memory swap
+ */
+ mutex_enter(&anoninfo_lock);
+ ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
+ blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
+ mutex_exit(&anoninfo_lock);
+
+ if (blocks > hyprlofs_minfree)
+ sbp->f_bfree = blocks - hyprlofs_minfree;
+ else
+ sbp->f_bfree = 0;
+
+ sbp->f_bavail = sbp->f_bfree;
+
+ /*
+ * Total number of blocks is what's available plus what's been used
+ */
+ sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree);
+
+ if (eff_zid != GLOBAL_ZONEUNIQID &&
+ zp->zone_max_swap_ctl != UINT64_MAX) {
+ /*
+ * If the fs is used by a NGZ with a swap cap, then report the
+ * capped size.
+ */
+ rctl_qty_t cap, used;
+ pgcnt_t pgcap, pgused;
+
+ mutex_enter(&zp->zone_mem_lock);
+ cap = zp->zone_max_swap_ctl;
+ used = zp->zone_max_swap;
+ mutex_exit(&zp->zone_mem_lock);
+
+ pgcap = btop(cap);
+ pgused = btop(used);
+
+ sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
+ }
+
+ /*
+ * This is fairly inaccurate since it doesn't take into account the
+ * names stored in the directory entries.
+ */
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
+ (sizeof (hlnode_t) + sizeof (hldirent_t));
+
+ sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strcpy(sbp->f_basetype, vfssw[hyprlofsfstype].vsw_name);
+ (void) strncpy(sbp->f_fstr, hm->hlm_mntpath, sizeof (sbp->f_fstr));
+ /*
+ * ensure null termination
+ */
+ sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
+ sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sbp->f_namemax = MAXNAMELEN - 1;
+ return (0);
+}
+
+static int
+hyprlofs_vget(vfs_t *vfsp, vnode_t **vpp, struct fid *fidp)
+{
+ hlfid_t *hfid;
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hp = NULL;
+
+ hfid = (hlfid_t *)fidp;
+ *vpp = NULL;
+
+ mutex_enter(&hm->hlm_contents);
+ for (hp = hm->hlm_rootnode; hp; hp = hp->hln_forw) {
+ mutex_enter(&hp->hln_tlock);
+ if (hp->hln_nodeid == hfid->hlfid_ino) {
+ /*
+ * If the gen numbers don't match we know the file
+ * won't be found since only one hlnode can have this
+ * number at a time.
+ */
+ if (hp->hln_gen != hfid->hlfid_gen ||
+ hp->hln_nlink == 0) {
+ mutex_exit(&hp->hln_tlock);
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+ }
+ *vpp = (vnode_t *)HLNTOV(hp);
+
+ VN_HOLD(*vpp);
+
+ if ((hp->hln_mode & S_ISVTX) &&
+ !(hp->hln_mode & (S_IXUSR | S_IFDIR))) {
+ mutex_enter(&(*vpp)->v_lock);
+ (*vpp)->v_flag |= VISSWAP;
+ mutex_exit(&(*vpp)->v_lock);
+ }
+ mutex_exit(&hp->hln_tlock);
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+ }
+ mutex_exit(&hp->hln_tlock);
+ }
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c
new file mode 100644
index 0000000000..10b226ea8b
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c
@@ -0,0 +1,1426 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/t_lock.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/user.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/flock.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/cred.h>
+#include <sys/dirent.h>
+#include <sys/pathname.h>
+#include <sys/fs/hyprlofs.h>
+#include <sys/fs/hyprlofs_info.h>
+#include <sys/mman.h>
+#include <vm/pvn.h>
+#include <sys/cmn_err.h>
+#include <sys/buf.h>
+#include <sys/policy.h>
+#include <fs/fs_subr.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+static int hyprlofs_add_entry(vnode_t *, char *, char *, cred_t *,
+ caller_context_t *);
+static int hyprlofs_rm_entry(vnode_t *, char *, cred_t *, caller_context_t *,
+ int);
+static int hyprlofs_rm_all(vnode_t *, cred_t *, caller_context_t *, int);
+static int hyprlofs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
+ int);
+static int hyprlofs_get_all(vnode_t *, intptr_t, cred_t *, caller_context_t *,
+ int);
+
+/*
+ * This is a somewhat arbitrary upper limit on the number of entries we can
+ * pass in on a single add/rm ioctl call. This is only used to validate that
+ * the input list looks sane.
+ */
+#define MAX_IOCTL_PARAMS 100000
+
+static int
+hyprlofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *rvp;
+ int error;
+
+ rvp = REALVP(*vpp);
+
+ if (VTOHLN(*vpp)->hln_looped == 0)
+ return (0);
+
+ /*
+ * looped back, pass through to real vnode. Need to hold new reference
+ * to vp since VOP_OPEN() may decide to release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ ASSERT(rvp->v_count > 1);
+ VN_RELE(rvp);
+
+ return (error);
+}
+
+static int
+hyprlofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 0) {
+ cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
+ cleanshares(vp, ttoproc(curthread)->p_pid);
+ return (0);
+ }
+
+ return (VOP_CLOSE(REALVP(vp), flag, count, offset, cr, ct));
+}
+
+static int
+hyprlofs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ return (VOP_READ(REALVP(vp), uiop, ioflag, cr, ct));
+}
+
+static int
+hyprlofs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* We don't support writing to non-regular files */
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ if (vn_is_readonly(vp))
+ return (EROFS);
+
+ return (VOP_WRITE(REALVP(vp), uiop, ioflag, cr, ct));
+}
+
+/* ARGSUSED */
+static int
+hyprlofs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
+ cred_t *cr, int *rvalp, caller_context_t *ct)
+{
+ int len, cnt, error;
+ int i;
+ model_t model;
+ char path[MAXPATHLEN];
+ char nm[MAXPATHLEN];
+
+ /* We only support the hyprlofs ioctls on the root vnode */
+ if (!(vp->v_flag & VROOT))
+ return (ENOTTY);
+
+ /*
+ * Check if managing hyprlofs is allowed.
+ */
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES || cmd == HYPRLOFS_RM_ENTRIES) {
+ model = get_udatamodel();
+
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_entries_t ebuf;
+ hyprlofs_entry_t *e;
+
+ if (copyin((void *)data, &ebuf, sizeof (ebuf)))
+ return (EFAULT);
+ cnt = ebuf.hle_len;
+ if (cnt > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ len = sizeof (hyprlofs_entry_t) * cnt;
+
+ e = kmem_alloc(len, KM_SLEEP);
+ if (copyin((void *)(ebuf.hle_entries), e, len)) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (e[i].hle_nlen == 0 ||
+ e[i].hle_nlen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin(e[i].hle_name, nm, e[i].hle_nlen)
+ != 0) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+ nm[e[i].hle_nlen] = '\0';
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES) {
+ if (e[i].hle_plen == 0 ||
+ e[i].hle_plen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin(e[i].hle_path, path,
+ e[i].hle_plen) != 0) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+ path[e[i].hle_plen] = '\0';
+
+ if ((error = hyprlofs_add_entry(vp,
+ path, nm, cr, ct)) != 0) {
+ kmem_free(e, len);
+ return (error);
+ }
+ } else {
+ if ((error = hyprlofs_rm_entry(vp, nm,
+ cr, ct, flag)) != 0) {
+ kmem_free(e, len);
+ return (error);
+ }
+ }
+ }
+
+ kmem_free(e, len);
+ return (0);
+
+ } else {
+ hyprlofs_entries32_t ebuf32;
+ hyprlofs_entry32_t *e32;
+
+ if (copyin((void *)data, &ebuf32, sizeof (ebuf32)))
+ return (EFAULT);
+
+ cnt = ebuf32.hle_len;
+ if (cnt > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ len = sizeof (hyprlofs_entry32_t) * cnt;
+
+ e32 = kmem_alloc(len, KM_SLEEP);
+ if (copyin((void *)(unsigned long)(ebuf32.hle_entries),
+ e32, len)) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (e32[i].hle_nlen == 0 ||
+ e32[i].hle_nlen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin((void *)(unsigned long)
+ e32[i].hle_name, nm,
+ e32[i].hle_nlen) != 0) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+ nm[e32[i].hle_nlen] = '\0';
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES) {
+ if (e32[i].hle_plen == 0 ||
+ e32[i].hle_plen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin((void *)(unsigned long)
+ e32[i].hle_path, path,
+ e32[i].hle_plen) != 0) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+ path[e32[i].hle_plen] = '\0';
+
+ if ((error = hyprlofs_add_entry(vp,
+ path, nm, cr, ct)) != 0) {
+ kmem_free(e32, len);
+ return (error);
+ }
+ } else {
+ if ((error = hyprlofs_rm_entry(vp, nm,
+ cr, ct, flag)) != 0) {
+ kmem_free(e32, len);
+ return (error);
+ }
+ }
+ }
+
+ kmem_free(e32, len);
+ return (0);
+ }
+ }
+
+ if (cmd == HYPRLOFS_RM_ALL) {
+ return (hyprlofs_rm_all(vp, cr, ct, flag));
+ }
+
+ if (cmd == HYPRLOFS_GET_ENTRIES) {
+ return (hyprlofs_get_all(vp, data, cr, ct, flag));
+ }
+
+ return (ENOTTY);
+}
+
+/*ARGSUSED2*/
+static int
+hyprlofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+
+ mutex_enter(&tp->hln_tlock);
+ vap->va_type = vp->v_type;
+ vap->va_mode = tp->hln_mode & MODEMASK;
+ vap->va_uid = tp->hln_uid;
+ vap->va_gid = tp->hln_gid;
+ vap->va_fsid = tp->hln_fsid;
+ vap->va_nodeid = (ino64_t)tp->hln_nodeid;
+ vap->va_nlink = tp->hln_nlink;
+ vap->va_size = (u_offset_t)tp->hln_size;
+ vap->va_atime = tp->hln_atime;
+ vap->va_mtime = tp->hln_mtime;
+ vap->va_ctime = tp->hln_ctime;
+ vap->va_blksize = PAGESIZE;
+ vap->va_rdev = tp->hln_rdev;
+ vap->va_seq = tp->hln_seq;
+
+ vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size)));
+ mutex_exit(&tp->hln_tlock);
+ return (0);
+}
+
+/*ARGSUSED4*/
+static int
+hyprlofs_setattr(vnode_t *vp, vattr_t *vap, int flags,
+ cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+ int error = 0;
+ vattr_t *get;
+ long mask;
+
+ /*
+ * Cannot set these attributes
+ */
+ if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR))
+ return (EINVAL);
+
+ mutex_enter(&tp->hln_tlock);
+
+ get = &tp->hln_attr;
+ /*
+ * Change file access modes. Must be owner or have sufficient
+ * privileges.
+ */
+ error = secpolicy_vnode_setattr(cr, vp, vap, get, flags,
+ hyprlofs_taccess, tp);
+
+ if (error)
+ goto out;
+
+ mask = vap->va_mask;
+
+ if (mask & AT_MODE) {
+ get->va_mode &= S_IFMT;
+ get->va_mode |= vap->va_mode & ~S_IFMT;
+ }
+
+ if (mask & AT_UID)
+ get->va_uid = vap->va_uid;
+ if (mask & AT_GID)
+ get->va_gid = vap->va_gid;
+ if (mask & AT_ATIME)
+ get->va_atime = vap->va_atime;
+ if (mask & AT_MTIME)
+ get->va_mtime = vap->va_mtime;
+
+ if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME))
+ gethrestime(&tp->hln_ctime);
+
+out:
+ mutex_exit(&tp->hln_tlock);
+ return (error);
+}
+
+static int
+hyprlofs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+ int error;
+
+ if (mode & VWRITE) {
+ if (vp->v_type == VREG && vn_is_readonly(vp))
+ return (EROFS);
+ }
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_ACCESS(REALVP(vp), mode, flags, cr, ct));
+
+ mutex_enter(&tp->hln_tlock);
+ error = hyprlofs_taccess(tp, mode, cr);
+ mutex_exit(&tp->hln_tlock);
+ return (error);
+}
+
+/* ARGSUSED3 */
+static int
+hyprlofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *ntp = NULL;
+ int error;
+
+ if (VTOHLN(dvp)->hln_looped == 1)
+ return (VOP_LOOKUP(REALVP(dvp), nm, vpp, pnp, flags, rdir,
+ cr, ct, direntflags, realpnp));
+
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ /* Null component name is a synonym for directory being searched. */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+ ASSERT(tp);
+
+ if ((error = hyprlofs_dirlookup(tp, nm, &ntp, cr)) == 0) {
+ ASSERT(ntp);
+ *vpp = HLNTOV(ntp);
+ }
+ return (error);
+}
+
+/*
+ * Create the loopback from the hyprlofs vnode to the real vnode.
+ */
+static int
+hyprlofs_loopback(vnode_t *dvp, vnode_t *rvp, char *nm, vattr_t *vap,
+ int mode, cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *parent;
+ hlfsmount_t *tm;
+ int error;
+ hlnode_t *oldtp;
+ vnode_t *vp;
+
+ parent = (hlnode_t *)VTOHLN(dvp);
+ tm = (hlfsmount_t *)VTOHLM(dvp);
+ error = 0;
+ oldtp = NULL;
+
+ if (vap->va_type == VREG && (vap->va_mode & VSVTX)) {
+ /* we don't support the sticky bit */
+ vap->va_mode &= ~VSVTX;
+ } else if (vap->va_type == VNON) {
+ return (EINVAL);
+ }
+
+ /* Null component name is a synonym for directory being searched. */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ oldtp = parent;
+ } else {
+ error = hyprlofs_dirlookup(parent, nm, &oldtp, cr);
+ }
+
+ if (error == 0) { /* name found */
+ ASSERT(oldtp);
+
+ rw_enter(&oldtp->hln_rwlock, RW_WRITER);
+
+ /*
+ * if create/read-only an existing directory, allow it
+ */
+ if ((oldtp->hln_type == VDIR) && (mode & VWRITE))
+ error = EISDIR;
+ else {
+ error = hyprlofs_taccess(oldtp, mode, cr);
+ }
+
+ if (error) {
+ rw_exit(&oldtp->hln_rwlock);
+ hlnode_rele(oldtp);
+ return (error);
+ }
+
+ vp = HLNTOV(oldtp);
+ rw_exit(&oldtp->hln_rwlock);
+
+ if (vp->v_type == VREG) {
+ hlnode_rele(oldtp);
+ return (EEXIST);
+ }
+
+ vnevent_create(vp, ct);
+ return (0);
+ }
+
+ if (error != ENOENT)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ error = hyprlofs_direnter(tm, parent, nm, DE_CREATE, rvp, vap, NULL,
+ cr);
+ rw_exit(&parent->hln_rwlock);
+
+ return (error);
+}
+
+/*
+ * Create an in-memory directory based on the add-entry ioctl name.
+ * If the dir exists, return EEXIST but still also return node in vpp.
+ */
+static int
+hyprlofs_mkdir(vnode_t *dvp, char *nm, vattr_t *va, vnode_t **vpp, cred_t *cr)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *self = NULL;
+ hlfsmount_t *tm = (hlfsmount_t *)VTOHLM(dvp);
+ int error;
+
+ /*
+ * Might be dangling directory. Catch it here, because a ENOENT return
+ * from hyprlofs_dirlookup() is a valid return.
+ */
+ if (parent->hln_nlink == 0)
+ return (ENOENT);
+
+ error = hyprlofs_dirlookup(parent, nm, &self, cr);
+ if (error == 0) {
+ ASSERT(self);
+ hlnode_rele(self);
+ /* We can't loop in under a looped in directory */
+ if (self->hln_looped)
+ return (EACCES);
+ *vpp = HLNTOV(self);
+ return (EEXIST);
+ }
+ if (error != ENOENT)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ error = hyprlofs_direnter(tm, parent, nm, DE_MKDIR, (vnode_t *)NULL,
+ va, &self, cr);
+ rw_exit(&parent->hln_rwlock);
+
+ if (error == 0 || error == EEXIST) {
+ hlnode_rele(self);
+ *vpp = HLNTOV(self);
+ }
+
+ return (error);
+}
+
+/*
+ * Loop in a file or directory into the namespace.
+ */
+static int
+hyprlofs_add_entry(vnode_t *vp, char *fspath, char *fsname,
+ cred_t *cr, caller_context_t *ct)
+{
+ int error;
+ char *p, *pnm;
+ vnode_t *realvp, *dvp;
+ vattr_t va;
+
+ /*
+ * Get vnode for the real file/dir. We'll have a hold on realvp which
+ * we won't vn_rele until hyprlofs_inactive.
+ */
+ if ((error = lookupname(fspath, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &realvp)) != 0)
+ return (error);
+
+ /* no devices allowed */
+ if (IS_DEVVP(realvp)) {
+ VN_RELE(realvp);
+ return (ENODEV);
+ }
+
+ /*
+ * realvp may be an AUTOFS node, in which case we perform a VOP_ACCESS
+ * to trigger the mount of the intended filesystem. This causes a
+ * loopback mount of the intended filesystem instead of the AUTOFS
+ * filesystem.
+ */
+ if ((error = VOP_ACCESS(realvp, 0, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ /*
+ * We're interested in the top most filesystem. This is specially
+ * important when fspath is a trigger AUTOFS node, since we're really
+ * interested in mounting the filesystem AUTOFS mounted as result of
+ * the VOP_ACCESS() call not the AUTOFS node itself.
+ */
+ if (vn_mountedvfs(realvp) != NULL) {
+ if ((error = traverse(&realvp)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+ }
+
+ va.va_type = VNON;
+ /*
+ * If the target name is a path, make sure we have all of the
+ * intermediate directories, creating them if necessary.
+ */
+ dvp = vp;
+ pnm = p = fsname;
+
+ /* path cannot be absolute */
+ if (*p == '/') {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ for (p = strchr(pnm, '/'); p != NULL; p = strchr(pnm, '/')) {
+ if (va.va_type == VNON)
+ /* use the top-level dir as the template va for mkdir */
+ if ((error = VOP_GETATTR(vp, &va, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ *p = '\0';
+
+ /* Path component cannot be empty or relative */
+ if (pnm[0] == '\0' || (pnm[0] == '.' && pnm[1] == '.')) {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ if ((error = hyprlofs_mkdir(dvp, pnm, &va, &dvp, cr)) != 0 &&
+ error != EEXIST) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ *p = '/';
+ pnm = p + 1;
+ }
+
+ /* The file name is required */
+ if (pnm[0] == '\0') {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ /* Now use the real file's va as the template va */
+ if ((error = VOP_GETATTR(realvp, &va, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ /* Make the vnode */
+ error = hyprlofs_loopback(dvp, realvp, pnm, &va, va.va_mode, cr, ct);
+ if (error != 0)
+ VN_RELE(realvp);
+ return (error);
+}
+
+/*
+ * Remove a looped in file from the namespace.
+ */
+static int
+hyprlofs_rm_entry(vnode_t *dvp, char *fsname, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int error;
+ char *p, *pnm;
+ hlnode_t *parent;
+ hlnode_t *fndtp;
+
+ pnm = p = fsname;
+
+ /* path cannot be absolute */
+ if (*p == '/')
+ return (EINVAL);
+
+ /*
+ * If the target name is a path, get the containing dir and simple
+ * file name.
+ */
+ parent = (hlnode_t *)VTOHLN(dvp);
+ for (p = strchr(pnm, '/'); p != NULL; p = strchr(pnm, '/')) {
+ *p = '\0';
+
+ /* Path component cannot be empty or relative */
+ if (pnm[0] == '\0' || (pnm[0] == '.' && pnm[1] == '.'))
+ return (EINVAL);
+
+ if ((error = hyprlofs_dirlookup(parent, pnm, &fndtp, cr)) != 0)
+ return (error);
+
+ dvp = HLNTOV(fndtp);
+ parent = fndtp;
+ pnm = p + 1;
+ }
+
+ /* The file name is required */
+ if (pnm[0] == '\0')
+ return (EINVAL);
+
+ /* Remove the entry from the parent dir */
+ return (hyprlofs_remove(dvp, pnm, cr, ct, flags));
+}
+
+/*
+ * Remove all looped in files from the namespace.
+ */
+static int
+hyprlofs_rm_all(vnode_t *dvp, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int error = 0;
+ hlnode_t *hp = (hlnode_t *)VTOHLN(dvp);
+ hldirent_t *hdp;
+
+ hlnode_hold(hp);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ goto done;
+ }
+
+ hdp = hp->hln_dir;
+ while (hdp) {
+ hlnode_t *fndhp;
+
+ if (strcmp(hdp->hld_name, ".") == 0 ||
+ strcmp(hdp->hld_name, "..") == 0) {
+ hdp = hdp->hld_next;
+ continue;
+ }
+
+ /* This holds the fndhp vnode */
+ error = hyprlofs_dirlookup(hp, hdp->hld_name, &fndhp, cr);
+ if (error != 0)
+ goto done;
+ hlnode_rele(fndhp);
+
+ if (fndhp->hln_looped == 0) {
+ /* recursively remove contents of this subdir */
+ if (fndhp->hln_type == VDIR) {
+ vnode_t *tvp = HLNTOV(fndhp);
+
+ error = hyprlofs_rm_all(tvp, cr, ct, flags);
+ if (error != 0)
+ goto done;
+ }
+ }
+
+ /* remove the entry */
+ error = hyprlofs_remove(dvp, hdp->hld_name, cr, ct, flags);
+ if (error != 0)
+ goto done;
+
+ hdp = hp->hln_dir;
+ }
+
+done:
+ hlnode_rele(hp);
+ return (error);
+}
+
+/*
+ * Get a list of all looped in files in the namespace.
+ */
+static int
+hyprlofs_get_all_entries(vnode_t *dvp, hyprlofs_curr_entry_t *hcp,
+ char *prefix, int *pcnt, int n_max,
+ cred_t *cr, caller_context_t *ct, int flags)
+{
+ int error = 0;
+ int too_big = 0;
+ int cnt;
+ int len;
+ hlnode_t *hp = (hlnode_t *)VTOHLN(dvp);
+ hldirent_t *hdp;
+ char *path;
+
+ cnt = *pcnt;
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ hlnode_hold(hp);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ goto done;
+ }
+
+ hdp = hp->hln_dir;
+ while (hdp) {
+ hlnode_t *fndhp;
+ vnode_t *tvp;
+
+ if (strcmp(hdp->hld_name, ".") == 0 ||
+ strcmp(hdp->hld_name, "..") == 0) {
+ hdp = hdp->hld_next;
+ continue;
+ }
+
+ /* This holds the fndhp vnode */
+ error = hyprlofs_dirlookup(hp, hdp->hld_name, &fndhp, cr);
+ if (error != 0)
+ goto done;
+ hlnode_rele(fndhp);
+
+ if (fndhp->hln_looped == 0) {
+ /* recursively get contents of this subdir */
+ VERIFY(fndhp->hln_type == VDIR);
+ tvp = HLNTOV(fndhp);
+
+ if (*prefix == '\0')
+ (void) strlcpy(path, hdp->hld_name, MAXPATHLEN);
+ else
+ (void) snprintf(path, MAXPATHLEN, "%s/%s",
+ prefix, hdp->hld_name);
+
+ error = hyprlofs_get_all_entries(tvp, hcp, path,
+ &cnt, n_max, cr, ct, flags);
+
+ if (error == E2BIG) {
+ too_big = 1;
+ error = 0;
+ }
+ if (error != 0)
+ goto done;
+ } else {
+ if (cnt < n_max) {
+ char *p;
+
+ if (*prefix == '\0')
+ (void) strlcpy(path, hdp->hld_name,
+ MAXPATHLEN);
+ else
+ (void) snprintf(path, MAXPATHLEN,
+ "%s/%s", prefix, hdp->hld_name);
+
+ len = strlen(path);
+ ASSERT(len <= MAXPATHLEN);
+ if (copyout(path, (void *)(hcp[cnt].hce_name),
+ len)) {
+ error = EFAULT;
+ goto done;
+ }
+
+ tvp = REALVP(HLNTOV(fndhp));
+ if (tvp->v_path == NULL) {
+ p = "<unknown>";
+ } else {
+ p = tvp->v_path;
+ }
+ len = strlen(p);
+ ASSERT(len <= MAXPATHLEN);
+ if (copyout(p, (void *)(hcp[cnt].hce_path),
+ len)) {
+ error = EFAULT;
+ goto done;
+ }
+ }
+
+ cnt++;
+ if (cnt > n_max)
+ too_big = 1;
+ }
+
+ hdp = hdp->hld_next;
+ }
+
+done:
+ hlnode_rele(hp);
+ kmem_free(path, MAXPATHLEN);
+
+ *pcnt = cnt;
+ if (error == 0 && too_big == 1)
+ error = E2BIG;
+
+ return (error);
+}
+
+/*
+ * Return a list of all looped in files in the namespace.
+ */
+static int
+hyprlofs_get_all(vnode_t *dvp, intptr_t data, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int limit, cnt, error;
+ model_t model;
+ hyprlofs_curr_entry_t *e;
+
+ model = get_udatamodel();
+
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_curr_entries_t ebuf;
+
+ if (copyin((void *)data, &ebuf, sizeof (ebuf)))
+ return (EFAULT);
+ limit = ebuf.hce_cnt;
+ e = ebuf.hce_entries;
+ if (limit > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+
+ } else {
+ hyprlofs_curr_entries32_t ebuf32;
+
+ if (copyin((void *)data, &ebuf32, sizeof (ebuf32)))
+ return (EFAULT);
+
+ limit = ebuf32.hce_cnt;
+ e = (hyprlofs_curr_entry_t *)(unsigned long)
+ (ebuf32.hce_entries);
+ if (limit > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ }
+
+ cnt = 0;
+ error = hyprlofs_get_all_entries(dvp, e, "", &cnt, limit, cr, ct,
+ flags);
+
+ if (error == 0 || error == E2BIG) {
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_curr_entries_t ebuf;
+
+ ebuf.hce_cnt = cnt;
+ if (copyout(&ebuf, (void *)data, sizeof (ebuf)))
+ return (EFAULT);
+
+ } else {
+ hyprlofs_curr_entries32_t ebuf32;
+
+ ebuf32.hce_cnt = cnt;
+ if (copyout(&ebuf32, (void *)data, sizeof (ebuf32)))
+ return (EFAULT);
+ }
+ }
+
+ return (error);
+}
+
+/* ARGSUSED3 */
+static int
+hyprlofs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ int error;
+ hlnode_t *hp = NULL;
+
+ /* This holds the hp vnode */
+ error = hyprlofs_dirlookup(parent, nm, &hp, cr);
+ if (error)
+ return (error);
+
+ ASSERT(hp);
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+
+ error = hyprlofs_dirdelete(parent, hp, nm, DR_REMOVE, cr);
+
+ rw_exit(&hp->hln_rwlock);
+ rw_exit(&parent->hln_rwlock);
+ vnevent_remove(HLNTOV(hp), dvp, nm, ct);
+
+ /*
+ * We've now dropped the dir link so by rele-ing our vnode we should
+ * clean up in hyprlofs_inactive.
+ */
+ hlnode_rele(hp);
+
+ return (error);
+}
+
+/* ARGSUSED4 */
+static int
+hyprlofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *self = NULL;
+ vnode_t *vp;
+ int error = 0;
+
+ /* Return error if removing . or .. */
+ if (strcmp(nm, ".") == 0)
+ return (EINVAL);
+ if (strcmp(nm, "..") == 0)
+ return (EEXIST); /* Should be ENOTEMPTY */
+ error = hyprlofs_dirlookup(parent, nm, &self, cr);
+ if (error)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ rw_enter(&self->hln_rwlock, RW_WRITER);
+
+ vp = HLNTOV(self);
+ if (vp == dvp || vp == cdir) {
+ error = EINVAL;
+ goto done1;
+ }
+ if (self->hln_type != VDIR) {
+ error = ENOTDIR;
+ goto done1;
+ }
+
+ /*
+ * When a dir is looped in, we only remove the in-memory dir, not the
+ * backing dir.
+ */
+ if (self->hln_looped == 0) {
+ mutex_enter(&self->hln_tlock);
+ if (self->hln_nlink > 2) {
+ mutex_exit(&self->hln_tlock);
+ error = EEXIST;
+ goto done1;
+ }
+ mutex_exit(&self->hln_tlock);
+
+ if (vn_vfswlock(vp)) {
+ error = EBUSY;
+ goto done1;
+ }
+ if (vn_mountedvfs(vp) != NULL) {
+ error = EBUSY;
+ goto done;
+ }
+
+ /*
+ * Check for an empty directory, i.e. only includes entries for
+ * "." and ".."
+ */
+ if (self->hln_dirents > 2) {
+ error = EEXIST; /* SIGH should be ENOTEMPTY */
+ /*
+ * Update atime because checking hln_dirents is
+ * equivalent to reading the directory
+ */
+ gethrestime(&self->hln_atime);
+ goto done;
+ }
+
+ error = hyprlofs_dirdelete(parent, self, nm, DR_RMDIR, cr);
+ } else {
+ error = hyprlofs_dirdelete(parent, self, nm, DR_REMOVE, cr);
+ }
+
+done:
+ if (self->hln_looped == 0)
+ vn_vfsunlock(vp);
+done1:
+ rw_exit(&self->hln_rwlock);
+ rw_exit(&parent->hln_rwlock);
+ vnevent_rmdir(HLNTOV(self), dvp, nm, ct);
+
+ /*
+ * We've now dropped the dir link so by rele-ing our vnode we should
+ * clean up in hyprlofs_inactive.
+ */
+ hlnode_rele(self);
+
+ return (error);
+}
+
+static int
+hyprlofs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hldirent_t *hdp;
+ int error = 0;
+ size_t namelen;
+ struct dirent64 *dp;
+ ulong_t offset;
+ ulong_t total_bytes_wanted;
+ long outcount = 0;
+ long bufsize;
+ int reclen;
+ caddr_t outbuf;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_READDIR(REALVP(vp), uiop, cr, eofp, ct, flags));
+
+ if (uiop->uio_loffset >= MAXOFF_T) {
+ if (eofp)
+ *eofp = 1;
+ return (0);
+ }
+ /* assuming syscall has already called hln_rwlock */
+ ASSERT(RW_READ_HELD(&hp->hln_rwlock));
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ return (0);
+ }
+
+ /* Get space for multiple dir entries */
+ total_bytes_wanted = uiop->uio_iov->iov_len;
+ bufsize = total_bytes_wanted + sizeof (struct dirent64);
+ outbuf = kmem_alloc(bufsize, KM_SLEEP);
+
+ dp = (struct dirent64 *)((uintptr_t)outbuf);
+
+ offset = 0;
+ hdp = hp->hln_dir;
+ while (hdp) {
+ namelen = strlen(hdp->hld_name); /* no +1 needed */
+ offset = hdp->hld_offset;
+ if (offset >= uiop->uio_offset) {
+ reclen = (int)DIRENT64_RECLEN(namelen);
+ if (outcount + reclen > total_bytes_wanted) {
+ if (!outcount)
+ /* Buffer too small for any entries. */
+ error = EINVAL;
+ break;
+ }
+ ASSERT(hdp->hld_hlnode != NULL);
+
+ /* zero out uninitialized bytes */
+ (void) strncpy(dp->d_name, hdp->hld_name,
+ DIRENT64_NAMELEN(reclen));
+ dp->d_reclen = (ushort_t)reclen;
+ dp->d_ino = (ino64_t)hdp->hld_hlnode->hln_nodeid;
+ dp->d_off = (offset_t)hdp->hld_offset + 1;
+ dp = (struct dirent64 *)
+ ((uintptr_t)dp + dp->d_reclen);
+ outcount += reclen;
+ ASSERT(outcount <= bufsize);
+ }
+ hdp = hdp->hld_next;
+ }
+
+ if (!error)
+ error = uiomove(outbuf, outcount, UIO_READ, uiop);
+
+ if (!error) {
+ /*
+ * If we reached the end of the list our offset should now be
+ * just past the end.
+ */
+ if (!hdp) {
+ offset += 1;
+ if (eofp)
+ *eofp = 1;
+ } else if (eofp)
+ *eofp = 0;
+ uiop->uio_offset = offset;
+ }
+ gethrestime(&hp->hln_atime);
+ kmem_free(outbuf, bufsize);
+ return (error);
+}
+
+static int
+hyprlofs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_FSYNC(REALVP(vp), syncflag, cr, ct));
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+hyprlofs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vp->v_vfsp);
+
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+
+ mutex_enter(&hp->hln_tlock);
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count >= 1);
+
+ /*
+ * If we don't have the last hold or the link count is non-zero,
+ * there's nothing to do except drop our hold.
+ */
+ if (vp->v_count > 1 || hp->hln_nlink != 0) {
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&hp->hln_tlock);
+ rw_exit(&hp->hln_rwlock);
+ return;
+ }
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&hp->hln_tlock);
+
+ /* release hold on the real vnode now */
+ if (hp->hln_looped == 1 && hp->hln_realvp != NULL)
+ VN_RELE(hp->hln_realvp);
+
+ /* Here's our chance to send invalid event while we're between locks */
+ vn_invalid(HLNTOV(hp));
+
+ mutex_enter(&hm->hlm_contents);
+ if (hp->hln_forw == NULL)
+ hm->hlm_rootnode->hln_back = hp->hln_back;
+ else
+ hp->hln_forw->hln_back = hp->hln_back;
+ hp->hln_back->hln_forw = hp->hln_forw;
+ mutex_exit(&hm->hlm_contents);
+ rw_exit(&hp->hln_rwlock);
+ rw_destroy(&hp->hln_rwlock);
+ mutex_destroy(&hp->hln_tlock);
+ vn_free(HLNTOV(hp));
+ kmem_free(hp, sizeof (hlnode_t));
+}
+
+static int
+hyprlofs_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hlfid_t *hfid;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_FID(REALVP(vp), fidp, ct));
+
+ if (fidp->fid_len < (sizeof (hlfid_t) - sizeof (ushort_t))) {
+ fidp->fid_len = sizeof (hlfid_t) - sizeof (ushort_t);
+ return (ENOSPC);
+ }
+
+ hfid = (hlfid_t *)fidp;
+ bzero(hfid, sizeof (hlfid_t));
+ hfid->hlfid_len = (int)sizeof (hlfid_t) - sizeof (ushort_t);
+
+ hfid->hlfid_ino = hp->hln_nodeid;
+ hfid->hlfid_gen = hp->hln_gen;
+
+ return (0);
+}
+
+static int
+hyprlofs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_GETPAGE(REALVP(vp), off, len, protp, pl, plsz, seg, addr,
+ rw, cr, ct));
+}
+
+int
+hyprlofs_putpage(vnode_t *vp, offset_t off, size_t len, int flags,
+ cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_PUTPAGE(REALVP(vp), off, len, flags, cr, ct));
+}
+
+static int
+hyprlofs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_MAP(REALVP(vp), off, as, addrp, len, prot, maxprot, flags,
+ cr, ct));
+}
+
+static int
+hyprlofs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_ADDMAP(REALVP(vp), off, as, addr, len, prot, maxprot,
+ flags, cr, ct));
+}
+
+static int
+hyprlofs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_DELMAP(REALVP(vp), off, as, addr, len, prot, maxprot,
+ flags, cr, ct));
+}
+
+static int
+hyprlofs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
+ offset_t offset, cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_SPACE(REALVP(vp), cmd, bfp, flag, offset, cr, ct));
+}
+
+static int
+hyprlofs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 0)
+ return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+
+ return (VOP_SEEK(REALVP(vp), ooff, noffp, ct));
+}
+
+static int
+hyprlofs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ hlnode_t *hp = VTOHLN(vp);
+
+ if (hp->hln_looped == 1)
+ return (VOP_RWLOCK(REALVP(vp), write_lock, ct));
+
+ if (write_lock) {
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ } else {
+ rw_enter(&hp->hln_rwlock, RW_READER);
+ }
+ return (write_lock);
+}
+
+static void
+hyprlofs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ hlnode_t *hp = VTOHLN(vp);
+
+ if (hp->hln_looped == 1) {
+ VOP_RWUNLOCK(REALVP(vp), write_lock, ct);
+ return;
+ }
+
+ rw_exit(&hp->hln_rwlock);
+}
+
+static int
+hyprlofs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
+ caller_context_t *ct)
+{
+ int error;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_PATHCONF(REALVP(vp), cmd, valp, cr, ct));
+
+ switch (cmd) {
+ case _PC_XATTR_ENABLED:
+ case _PC_XATTR_EXISTS:
+ case _PC_SATTR_ENABLED:
+ case _PC_SATTR_EXISTS:
+ error = EINVAL;
+ break;
+ case _PC_TIMESTAMP_RESOLUTION:
+ /* nanosecond timestamp resolution */
+ *valp = 1L;
+ error = 0;
+ break;
+ default:
+ error = fs_pathconf(vp, cmd, valp, cr, ct);
+ }
+ return (error);
+}
+
+
+struct vnodeops *hyprlofs_vnodeops;
+
+const fs_operation_def_t hyprlofs_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = hyprlofs_open },
+ VOPNAME_CLOSE, { .vop_close = hyprlofs_close },
+ VOPNAME_READ, { .vop_read = hyprlofs_read },
+ VOPNAME_WRITE, { .vop_write = hyprlofs_write },
+ VOPNAME_IOCTL, { .vop_ioctl = hyprlofs_ioctl },
+ VOPNAME_GETATTR, { .vop_getattr = hyprlofs_getattr },
+ VOPNAME_SETATTR, { .vop_setattr = hyprlofs_setattr },
+ VOPNAME_ACCESS, { .vop_access = hyprlofs_access },
+ VOPNAME_LOOKUP, { .vop_lookup = hyprlofs_lookup },
+ VOPNAME_CREATE, { .error = fs_error },
+ VOPNAME_REMOVE, { .vop_remove = hyprlofs_remove },
+ VOPNAME_LINK, { .error = fs_error },
+ VOPNAME_RENAME, { .error = fs_error },
+ VOPNAME_MKDIR, { .error = fs_error },
+ VOPNAME_RMDIR, { .vop_rmdir = hyprlofs_rmdir },
+ VOPNAME_READDIR, { .vop_readdir = hyprlofs_readdir },
+ VOPNAME_SYMLINK, { .error = fs_error },
+ VOPNAME_READLINK, { .error = fs_error },
+ VOPNAME_FSYNC, { .vop_fsync = hyprlofs_fsync },
+ VOPNAME_INACTIVE, { .vop_inactive = hyprlofs_inactive },
+ VOPNAME_FID, { .vop_fid = hyprlofs_fid },
+ VOPNAME_RWLOCK, { .vop_rwlock = hyprlofs_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = hyprlofs_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = hyprlofs_seek },
+ VOPNAME_SPACE, { .vop_space = hyprlofs_space },
+ VOPNAME_GETPAGE, { .vop_getpage = hyprlofs_getpage },
+ VOPNAME_PUTPAGE, { .vop_putpage = hyprlofs_putpage },
+ VOPNAME_MAP, { .vop_map = hyprlofs_map },
+ VOPNAME_ADDMAP, { .vop_addmap = hyprlofs_addmap },
+ VOPNAME_DELMAP, { .vop_delmap = hyprlofs_delmap },
+ VOPNAME_PATHCONF, { .vop_pathconf = hyprlofs_pathconf },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_subr.c b/usr/src/uts/common/fs/lxproc/lxpr_subr.c
new file mode 100644
index 0000000000..3c1405d4af
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_subr.c
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/varargs.h>
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lxproc.h"
+
+#define LXPRCACHE_NAME "lxpr_cache"
+
+static int lxpr_node_constructor(void *, void *, int);
+static void lxpr_node_destructor(void *, void *);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+};
+
+int lxpr_bufsize = 4000;
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ int bufsize = lxpr_bufsize;
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + bufsize, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = bufsize;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = (off_t)offset;
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+ size_t size = (uintptr_t)uiobuf->pos - (uintptr_t)uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg + size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr + (off - beg), size - (off - beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uintptr_t remain = (uintptr_t)uiobuf->buffsize -
+ ((uintptr_t)uiobuf->pos - (uintptr_t)uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len + 1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+ proc_t *p;
+ kmutex_t *mp;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process
+ */
+ p = prfind((pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ if (p->p_flag & SEXITING) {
+ /*
+ * This process is exiting -- let it go.
+ */
+ mutex_exit(mp);
+ return (NULL);
+ }
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (pid + 1);
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + fd);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ lxpnp->lxpr_pid = ((p->p_pid ==
+ curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+
+ /*
+ * Zombie check. p_stat is officially protected by pidlock,
+ * but we can't grab pidlock here because we already hold
+ * p_lock. Luckily if we look at the process exit code
+ * we see that p_stat only transisions from SRUN to SZOMB
+ * while p_lock is held. Aside from this, the only other
+ * p_stat transition that we need to be aware about is
+ * SIDL to SRUN, but that's not a problem since lxpr_lock()
+ * ignores nodes in the SIDL state so we'll never get a node
+ * that isn't already in the SRUN state.
+ */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ /* Zombie check. see locking comment above */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp =
+ up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ break;
+
+ case LXPR_PID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_NETDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c b/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c
new file mode 100644
index 0000000000..1bb7bd3823
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c
@@ -0,0 +1,367 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+
+#include "lxproc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lxproc",
+ lxpr_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "generic linux procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxpr_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxpr_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialize cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc", 0);
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * allocate the first vnode
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+
+ (void) strcpy(sp->f_fstr, "lxproc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_vnops.c b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
new file mode 100644
index 0000000000..e13e26093e
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
@@ -0,0 +1,3094 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lxproc -- a loosely Linux-compatible /proc
+ *
+ * The aspiration here is to provide something that sufficiently approximates
+ * the Linux /proc implementation for purposes of offering some compatibility
+ * for simple Linux /proc readers (e.g., ps/top/htop). However, it is not
+ * intended to exactly mimic Linux semantics; when choosing between offering
+ * compatibility and telling the truth, we emphatically pick the truth. A
+ * particular glaring example of this is the Linux notion of "tasks" (that is,
+ * threads), which -- due to historical misadventures on Linux -- allocate their
+ * identifiers from the process identifier space. (That is, each thread has in
+ * effect a pid.) Some Linux /proc readers have come to depend on this
+ * attribute, and become confused when threads appear with proper identifiers,
+ * so we simply opt for the pre-2.6 behavior, and do not present the tasks
+ * directory at all. Similarly, when choosing between offering compatibility
+ * and remaining consistent with our broader security model, we (obviously)
+ * choose security over compatibility. In short, this is meant to be a best
+ * effort -- no more.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+
+/* Dependent on procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lxproc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+/*
+ * The lxproc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxpr_open },
+ VOPNAME_CLOSE, { .vop_close = lxpr_close },
+ VOPNAME_READ, { .vop_read = lxpr_read },
+ VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxpr_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
+ VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
+ VOPNAME_FSYNC, { .error = lxpr_sync },
+ VOPNAME_SEEK, { .error = lxpr_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
+ VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
+ NULL, NULL
+};
+
+/*
+ * file contents of an lxproc directory.
+ */
+static lxpr_dirent_t lxpr_dir[] = {
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" }
+};
+
+#define PROCDIRFILES (sizeof (lxpr_dir) / sizeof (lxpr_dir[0]))
+
+/*
+ * Contents of an /lxproc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * contents of /lxproc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * These are the major signal number differences between Linux and native:
+ *
+ * ====================================
+ * | Number | Linux | Native |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a native signal, nor does every native
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ */
+static int
+lxpr_sigmap[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT,
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: illumos SIGWAITING */
+ -1, /* 33: illumos SIGLWP */
+ -1, /* 34: illumos SIGFREEZE */
+ -1, /* 35: illumos SIGTHAW */
+ -1, /* 36: illumos SIGCANCEL */
+ -1, /* 37: illumos SIGLOST */
+ -1, /* 38: illumos SIGXRES */
+ -1, /* 39: illumos SIGJVM1 */
+ -1, /* 40: illumos SIGJVM2 */
+ -1, /* 41: illumos SIGINFO */
+ LX_SIGRTMIN, /* 42: illumos _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMAX
+};
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /*
+ * We only allow reading in this file systrem
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ /*
+ * If we are opening an underlying file only allow regular files
+ * reject the open for anything but a regular file.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
+ error = EACCES;
+ else {
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_empty, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_empty, /* /proc/devices */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_empty, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_kmsg, /* /proc/kmsg */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+};
+
+/*
+ * Array of lookup functions, indexed by /lxproc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in lxproc is human readable
+ * and not binary structures there do not have to be different read variants
+ * depending on whether the reading process model is 32- or 64-bit.
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
+ &lxpnp->lxpr_cons_ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+ }
+
+ lxpr_read_function[type](lxpnp, uiobuf);
+
+ if (type == LXPR_KMSG) {
+ if ((error = ldi_close(lxpnp->lxpr_cons_ldih, FREAD, cr)) != 0)
+ return (error);
+ }
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with Linux: the Linux cmdline returns argv
+ * with the correct separation using \0 between the arguments, but we cannot do
+ * that without copying the real argv from the correct process context. This
+ * is too difficult to attempt so we pretend that the entire cmdline is just
+ * argv[0]. This is good enough for ps and htop to display correctly, but might
+ * cause some other things not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char *buf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ buf = PTOU(p)->u_argv != 0 ? PTOU(p)->u_psargs : PTOU(p)->u_comm;
+
+ lxpr_uiobuf_write(uiobuf, buf, strlen(buf) + 1);
+ lxpr_unlock(p);
+}
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ caddr_t saddr;
+ caddr_t eaddr;
+ int type;
+ char prot[5];
+ uint32_t offset;
+ vnode_t *vp;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = seg->s_base;
+ pbuf->eaddr = seg->s_base+seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ u_longlong_t inode = 0;
+
+ *buf = '\0';
+ if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
+ NULL) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (*buf != '\0') {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %lld %s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %lld\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize;
+ size_t rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = btopr(as->a_resvsize);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ int ngroups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ size_t vsize;
+ size_t rss;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%u\t%u\t%u\t%u\n"
+ "Gid:\t%u\t%u\t%u\t%u\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ up->u_comm,
+ status,
+ pid, /* thread group id - same as pid */
+ pid,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ p->p_fno_ctl);
+
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%u ",
+ groups[i]);
+ }
+ crfree(cr);
+
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ 0l,
+ ptok(rss),
+ 0l,
+ btok(p->p_stksize),
+ ptok(rss),
+ 0l);
+ }
+
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+
+ for (i = 1; i < NSIG; i++) {
+ lx_sig = lxpr_sigmap[i];
+
+ if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i - 1] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i - 1] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n"
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0],
+ /* Can't do anything with linux capabilities */
+ 0,
+ 0,
+ 0);
+
+ lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri;
+ caddr_t wchan;
+ processorid_t cpu;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = (gid_t)-1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP) ?
+ curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ psdev = p->p_sessp->s_dev;
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S'; break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R'; break;
+ case TS_ZOMB:
+ stat = 'Z'; break;
+ case TS_STOPPED:
+ stat = 'T'; break;
+ default:
+ stat = '!'; break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_id;
+ thread_unlock(t);
+ } else {
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d (%s) %c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld "
+ "%d %d %d "
+ "%lu "
+ "%lu "
+ "%lu %ld %llu "
+ "%lu %lu %u "
+ "%lu %lu "
+ "%lu %lu %lu %lu "
+ "%lu "
+ "%lu %lu "
+ "%d "
+ "%d"
+ "\n",
+ pid, PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+ pri, nice, p->p_lwpcnt,
+ 0l, /* itrealvalue (time before next SIGALRM) */
+ PTOU(p)->u_ticks,
+ vsize, rss, p->p_vmem_ctl,
+ 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+ 0l, 0l, /* kstkesp, kstkeip */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+ wchan,
+ 0l, 0l, /* nswap, cnswap */
+ 0, /* exit_signal */
+ cpu);
+
+ lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ /*
+ * Data about each interface should go here, but that shouldn't be added
+ * unless there is an lxproc reader that actually makes use of it (and
+ * doesn't need anything else that we refuse to provide)...
+ */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
+{
+ ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
+ mblk_t *mp;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
+
+ if (ldi_getmsg(lh, &mp, NULL) == 0) {
+ /*
+ * lxproc doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
+ * enough for uptime and other simple lxproc readers to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+ zone_t *zone = LXPTOZ(lxpnp);
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = zone == global_zone ?
+ &avenrun[0] : zone->zone_avenrun;
+ }
+
+ /*
+ * If we're in the non-global zone, we'll report the total number of
+ * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
+ * otherwise will just use nthread (which will include kernel threads,
+ * but should be good enough for lxproc).
+ */
+ nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ int global = zone == global_zone;
+ long total_mem, free_mem, total_swap, used_swap;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
+ total_mem = physmem * PAGESIZE;
+ free_mem = freemem * PAGESIZE;
+ } else {
+ total_mem = zone->zone_phys_mem_ctl;
+ free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
+ }
+
+ if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
+ total_swap = k_anoninfo.ani_max * PAGESIZE;
+ used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+ } else {
+ mutex_enter(&zone->zone_mem_lock);
+ total_swap = zone->zone_max_swap_ctl;
+ used_swap = zone->zone_max_swap;
+ mutex_exit(&zone->zone_mem_lock);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ " total: used: free: shared: buffers: cached:\n"
+ "Mem: %8lu %8lu %8lu %8u %8u %8u\n"
+ "Swap: %8lu %8lu %8lu\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
+ total_swap, used_swap, total_swap - used_swap,
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap - used_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+
+ vfs_list_read_lock();
+
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "/ / %s %s 0 0\n",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) :
+ mntpt;
+ }
+ } else {
+ resource = "-";
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s %s %s %s 0 0\n",
+ resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ }
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices. But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "major minor #blocks name rio rmerge rsect ruse "
+ "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file. Note that
+ * we don't lie here -- we don't pretend that we're Linux. If lxproc is to
+ * be used in a Linux-branded zone, there will need to be a mount option to
+ * indicate that Linux should be more fully mimicked.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) "
+ "#%s SMP %s\n",
+ utsname.sysname, utsname.release,
+#if defined(__GNUC__)
+ "gcc",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__,
+#else
+ "Sun C",
+ __SUNPRO_C / 0x100,
+ (__SUNPRO_C & 0xff) / 0x10,
+ __SUNPRO_C & 0xf,
+#endif
+ utsname.version,
+ "00:00:00 00/00/00");
+}
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ ulong_t cpu_nrunnable_cum = 0;
+ ulong_t w_io_cum = 0;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ lxpr_uiobuf_printf(uiobuf, "cpu %lu %lu %lu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
+
+ /* Do per processor stats */
+ do {
+ int i;
+
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
+ cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
+ 0L, irq_ticks, 0L);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+}
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+ hrtime_t birthtime;
+ hrtime_t centi_sec = 10000000; /* 10^7 */
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Getting the Zone zsched process startup time */
+ birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
+ up_cs = (gethrtime() - birthtime) / centi_sec;
+ up_s = up_cs / 100;
+ up_cs %= 100;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = vp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is a read only file system */
+ if (mode & VWRITE)
+ return (EROFS);
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (lxpnp->lxpr_type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
+ }
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only one of owner, group, public. If not
+ * owner, then check group. If not a member of the group, then check
+ * public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode if that's where we are trying to go.
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches. Note: null component name
+ * denotes that the current directory is being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ vnode_t *vp = NULL;
+ proc_t *p;
+ file_t *fp;
+ uint_t fd;
+ int c;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ /*
+ * convert the string rendition of the filename
+ * to a file descriptor
+ */
+ fd = 0;
+ while ((c = *comp++) != '\0') {
+ int ofd;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ ofd = fd;
+ fd = 10*fd + c - '0';
+ /* integer overflow */
+ if (fd / 10 != ofd)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ p = lxpr_lock(dlxpnp->lxpr_pid);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * If the process is a zombie or system process
+ * it can't have any open files.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * get us a fresh node/vnode
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we dereference into fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * get open file info
+ */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ /*
+ * ensure the fd is still kosher.
+ * it may have gone between the readdir and
+ * the lookup
+ */
+ if (fip->fi_list[fd].uf_file == NULL) {
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ }
+
+ if ((fp = ufp->uf_file) != NULL)
+ vp = fp->f_vnode;
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+
+ if (vp == NULL) {
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will be able to
+ * find the underlying vnode. The vnode is held on the realvp.
+ */
+ lxpnp->lxpr_realvp = vp;
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our file system structure
+ * except those that are pid names. These change as pids are created/
+ * deleted etc., so we just look for a number as the first char to see
+ * if we are we doing pid lookups.
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10 * pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading or it doesn't
+ * really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lxpr node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lxpr_dir, PROCDIRFILES));
+}
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ ASSERT(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order: "." and ".." then the
+ * unique lxproc files, then the directories corresponding to the
+ * running processes. We have defined this as the ordering because
+ * it allows us to more easily keep track of where we are betwen calls
+ * to getdents(). If the number of processes changes between calls
+ * then we can't lose track of where we are in the lxproc files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lxpr_dir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, a PID of 0,
+ * and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc
+ * structure
+ */
+ pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+ p->p_pid : 1);
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, in the increment of this for
+ * the loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp != NULL) {
+ *eofp = (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+ return (0);
+}
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ p = prfind((lxpnp->lxpr_pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+ int ceof;
+ proc_t *p;
+ int fddirsize = -1;
+ uf_info_t *fip;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL)
+ return (ENOENT);
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+ fddirsize = 0;
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we iterate over its fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fddirsize == -1)
+ fddirsize = fip->fi_nfiles;
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ goto out;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ goto out;
+ }
+
+ if (eofp != NULL) {
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+out:
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ return (error);
+}
+
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ pid_t pid;
+ int error = 0;
+
+ /* must be a symbolic link file */
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
+ return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
+ return (error);
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process
+ */
+ pid = ((curproc->p_pid !=
+ curproc->p_zone->zone_proc_initpid)
+ ? curproc->p_pid : 1);
+
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes.
+ */
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_EXE:
+ return (EACCES);
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * Nothing to sync but this function must never fail
+ */
+ return (0);
+}
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
+ vp1 = rvp;
+ }
+
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
+ vp2 = rvp;
+ }
+
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp, ct) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxproc.h b/usr/src/uts/common/fs/lxproc/lxproc.h
new file mode 100644
index 0000000000..50cafdac0a
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxproc.h
@@ -0,0 +1,273 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LXPROC_H
+#define _LXPROC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG 64 /* Linux _NSIG */
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+#define LXPRMAXNAMELEN 14
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char d_name[LXPRMAXNAMELEN];
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ ino_t lxpr_ino; /* node id */
+ ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
index 207a708771..2176dcb9de 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
index 291e5cd337..450cc22683 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
@@ -29,7 +29,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -3352,10 +3352,9 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
if (nvp)
vnevent_rename_dest(nvp, ndvp, nnm, ct);
- if (odvp != ndvp)
- vnevent_rename_dest_dir(ndvp, ct);
ASSERT(ovp != NULL);
vnevent_rename_src(ovp, odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct);
}
if (nvp) {
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
index 1752a28542..2350454d9c 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
index b9ba9a6ead..6b1be81ca8 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
@@ -32,7 +32,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -8059,8 +8059,9 @@ link_call:
* vnode if it already existed.
*/
if (error == 0) {
- vnode_t *tvp;
+ vnode_t *tvp, *tovp;
rnode4_t *trp;
+
/*
* Notify the vnode. Each links is represented by
* a different vnode, in nfsv4.
@@ -8073,23 +8074,20 @@ link_call:
vnevent_rename_dest(tvp, ndvp, nnm, ct);
}
- /*
- * if the source and destination directory are not the
- * same notify the destination directory.
- */
- if (VTOR4(odvp) != VTOR4(ndvp)) {
- trp = VTOR4(ndvp);
- tvp = ndvp;
- if (IS_SHADOW(ndvp, trp))
- tvp = RTOV4(trp);
- vnevent_rename_dest_dir(tvp, ct);
- }
-
trp = VTOR4(ovp);
- tvp = ovp;
+ tovp = ovp;
if (IS_SHADOW(ovp, trp))
+ tovp = RTOV4(trp);
+
+ vnevent_rename_src(tovp, odvp, onm, ct);
+
+ trp = VTOR4(ndvp);
+ tvp = ndvp;
+
+ if (IS_SHADOW(ndvp, trp))
tvp = RTOV4(trp);
- vnevent_rename_src(tvp, odvp, onm, ct);
+
+ vnevent_rename_dest_dir(tvp, tovp, nnm, ct);
}
if (nvp) {
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index 7e94c62734..93ccd28122 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -23,6 +23,7 @@
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 Joyent, Inc. All rights reserved.
*/
/*
@@ -2570,6 +2571,9 @@ nfs_srvinit(void)
{
int error;
+ if (getzoneid() != GLOBAL_ZONEID)
+ return (EACCES);
+
error = nfs_exportinit();
if (error != 0)
return (error);
diff --git a/usr/src/uts/common/fs/nfs/nfs_vfsops.c b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
index 57b21778b4..ffd5380a86 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c
index 4ac6450381..77d3541208 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c
@@ -26,7 +26,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -2687,11 +2687,9 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
if (nvp)
vnevent_rename_dest(nvp, ndvp, nnm, ct);
- if (odvp != ndvp)
- vnevent_rename_dest_dir(ndvp, ct);
-
ASSERT(ovp != NULL);
vnevent_rename_src(ovp, odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct);
}
if (nvp) {
diff --git a/usr/src/uts/common/fs/pcfs/pc_dir.c b/usr/src/uts/common/fs/pcfs/pc_dir.c
index a9ee604b7c..21a0b1a4bd 100644
--- a/usr/src/uts/common/fs/pcfs/pc_dir.c
+++ b/usr/src/uts/common/fs/pcfs/pc_dir.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
@@ -822,10 +826,10 @@ top:
return (error);
}
}
-out:
- vnevent_rename_src(PCTOV(pcp), PCTOV(dp), snm, ctp);
- if (dp != tdp) {
- vnevent_rename_dest_dir(PCTOV(tdp), ctp);
+
+ if (error == 0) {
+ vnevent_rename_src(PCTOV(pcp), PCTOV(dp), snm, ctp);
+ vnevent_rename_dest_dir(PCTOV(tdp), PCTOV(pcp), tnm, ctp);
}
VN_RELE(PCTOV(pcp));
diff --git a/usr/src/uts/common/fs/portfs/port_vnops.c b/usr/src/uts/common/fs/portfs/port_vnops.c
index b2f5088e06..ab95c0a1f8 100644
--- a/usr/src/uts/common/fs/portfs/port_vnops.c
+++ b/usr/src/uts/common/fs/portfs/port_vnops.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#include <sys/types.h>
#include <sys/vnode.h>
#include <sys/vfs_opreg.h>
@@ -294,14 +298,10 @@ port_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
levents |= POLLOUT;
levents &= events;
*reventsp = levents;
- if (levents == 0) {
- if (!anyyet) {
- *phpp = &pp->port_pollhd;
- portq->portq_flags |=
- events & POLLIN ? PORTQ_POLLIN : 0;
- portq->portq_flags |=
- events & POLLOUT ? PORTQ_POLLOUT : 0;
- }
+ if ((levents == 0 && !anyyet) || (events & POLLET)) {
+ *phpp = &pp->port_pollhd;
+ portq->portq_flags |= events & POLLIN ? PORTQ_POLLIN : 0;
+ portq->portq_flags |= events & POLLOUT ? PORTQ_POLLOUT : 0;
}
mutex_exit(&portq->portq_mutex);
return (0);
diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c
index a73a64a4a4..a5679a8afb 100644
--- a/usr/src/uts/common/fs/proc/prcontrol.c
+++ b/usr/src/uts/common/fs/proc/prcontrol.c
@@ -2276,9 +2276,17 @@ pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
return (EPERM);
if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
return (EINVAL);
- if ((zptr = zone_find_by_id(zoneid)) == NULL)
- return (EINVAL);
+ /*
+ * We cannot hold p_lock when we call zone_find_by_id since that can
+ * lead to a deadlock. zone_find_by_id() takes zonehash_lock.
+ * zone_enter() can hold the zonehash_lock and needs p_lock when it
+ * calls task_join.
+ */
mutex_exit(&p->p_lock);
+ if ((zptr = zone_find_by_id(zoneid)) == NULL) {
+ mutex_enter(&p->p_lock);
+ return (EINVAL);
+ }
mutex_enter(&p->p_crlock);
oldcred = p->p_cred;
crhold(oldcred);
diff --git a/usr/src/uts/common/fs/proc/prdata.h b/usr/src/uts/common/fs/proc/prdata.h
index 8ea516bf82..d3898ca9ea 100644
--- a/usr/src/uts/common/fs/proc/prdata.h
+++ b/usr/src/uts/common/fs/proc/prdata.h
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
diff --git a/usr/src/uts/common/fs/proc/prvnops.c b/usr/src/uts/common/fs/proc/prvnops.c
index 411c9b8b0b..2c562d80eb 100644
--- a/usr/src/uts/common/fs/proc/prvnops.c
+++ b/usr/src/uts/common/fs/proc/prvnops.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -2686,6 +2686,103 @@ prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
#endif
}
+/*
+ * We make pr_write_psinfo_fname() somewhat simpler by asserting at compile
+ * time that PRFNSZ has the same definition as MAXCOMLEN.
+ */
+#if PRFNSZ != MAXCOMLEN
+#error PRFNSZ/MAXCOMLEN mismatch
+#endif
+
+static int
+pr_write_psinfo_fname(prnode_t *pnp, uio_t *uiop)
+{
+ char fname[PRFNSZ];
+ int offset = offsetof(psinfo_t, pr_fname), error;
+
+#ifdef _SYSCALL32_IMPL
+ if (curproc->p_model != DATAMODEL_LP64)
+ offset = offsetof(psinfo32_t, pr_fname);
+#endif
+
+ /*
+ * If this isn't a write to pr_fname (or if the size doesn't match
+ * PRFNSZ) return.
+ */
+ if (uiop->uio_offset != offset || uiop->uio_resid != PRFNSZ)
+ return (0);
+
+ if ((error = uiomove(fname, PRFNSZ, UIO_WRITE, uiop)) != 0)
+ return (error);
+
+ fname[PRFNSZ - 1] = '\0';
+
+ if ((error = prlock(pnp, ZNO)) != 0)
+ return (error);
+
+ bcopy(fname, pnp->pr_common->prc_proc->p_user.u_comm, PRFNSZ);
+
+ prunlock(pnp);
+
+ return (0);
+}
+
+/*
+ * We make pr_write_psinfo_psargs() somewhat simpler by asserting at compile
+ * time that PRARGSZ has the same definition as PSARGSZ.
+ */
+#if PRARGSZ != PSARGSZ
+#error PRARGSZ/PSARGSZ mismatch
+#endif
+
+static int
+pr_write_psinfo_psargs(prnode_t *pnp, uio_t *uiop)
+{
+ char psargs[PRARGSZ];
+ int offset = offsetof(psinfo_t, pr_psargs), error;
+
+#ifdef _SYSCALL32_IMPL
+ if (curproc->p_model != DATAMODEL_LP64)
+ offset = offsetof(psinfo32_t, pr_psargs);
+#endif
+
+ /*
+ * If this isn't a write to pr_psargs (or if the size doesn't match
+ * PRARGSZ) return.
+ */
+ if (uiop->uio_offset != offset || uiop->uio_resid != PRARGSZ)
+ return (0);
+
+ if ((error = uiomove(psargs, PRARGSZ, UIO_WRITE, uiop)) != 0)
+ return (error);
+
+ psargs[PRARGSZ - 1] = '\0';
+
+ if ((error = prlock(pnp, ZNO)) != 0)
+ return (error);
+
+ bcopy(psargs, pnp->pr_common->prc_proc->p_user.u_psargs, PRARGSZ);
+
+ prunlock(pnp);
+
+ return (0);
+}
+
+int
+pr_write_psinfo(prnode_t *pnp, uio_t *uiop)
+{
+ int error;
+
+ if ((error = pr_write_psinfo_fname(pnp, uiop)) != 0)
+ return (error);
+
+ if ((error = pr_write_psinfo_psargs(pnp, uiop)) != 0)
+ return (error);
+
+ return (0);
+}
+
+
/* ARGSUSED */
static int
prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
@@ -2764,6 +2861,9 @@ prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
uiop->uio_resid = resid;
return (error);
+ case PR_PSINFO:
+ return (pr_write_psinfo(pnp, uiop));
+
default:
return ((vp->v_type == VDIR)? EISDIR : EBADF);
}
@@ -4546,6 +4646,9 @@ prgetnode(vnode_t *dp, prnodetype_t type)
break;
case PR_PSINFO:
+ pnp->pr_mode = 0644; /* readable by all + owner can write */
+ break;
+
case PR_LPSINFO:
case PR_LWPSINFO:
case PR_USAGE:
@@ -6010,7 +6113,7 @@ prpoll(vnode_t *vp, short events, int anyyet, short *reventsp,
}
*reventsp = revents;
- if (!anyyet && revents == 0) {
+ if ((!anyyet && revents == 0) || (events & POLLET)) {
/*
* Arrange to wake up the polling lwp when
* the target process/lwp stops or terminates
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
index 0be628f329..d3ff264eef 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
@@ -953,6 +953,13 @@ so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
if (!list_is_empty(&so->so_acceptq_list))
*reventsp |= (POLLIN|POLLRDNORM) & events;
+ /*
+ * If we're looking for POLLRDHUP, indicate it if we have sent the
+ * last rx signal for the socket.
+ */
+ if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
+ *reventsp |= POLLRDHUP;
+
/* Data */
/* so_downcalls is null for sctp */
if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
@@ -988,14 +995,18 @@ so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
*reventsp |= POLLHUP;
}
- if (!*reventsp && !anyyet) {
+ if ((!*reventsp && !anyyet) || (events & POLLET)) {
/* Check for read events again, but this time under lock */
if (events & (POLLIN|POLLRDNORM)) {
mutex_enter(&so->so_lock);
if (SO_HAVE_DATA(so) ||
!list_is_empty(&so->so_acceptq_list)) {
+ if (events & POLLET)
+ so->so_pollev |= SO_POLLEV_IN;
+
mutex_exit(&so->so_lock);
*reventsp |= (POLLIN|POLLRDNORM) & events;
+
return (0);
} else {
so->so_pollev |= SO_POLLEV_IN;
diff --git a/usr/src/uts/common/fs/sockfs/socknotify.c b/usr/src/uts/common/fs/sockfs/socknotify.c
index 3d5ba2a7e8..3f858afecc 100644
--- a/usr/src/uts/common/fs/sockfs/socknotify.c
+++ b/usr/src/uts/common/fs/sockfs/socknotify.c
@@ -377,7 +377,7 @@ i_so_notify_last_rx(struct sonode *so, int *pollev, int *sigev)
so->so_state |= SS_SENTLASTREADSIG;
so->so_pollev &= ~SO_POLLEV_IN;
- *pollev |= POLLIN|POLLRDNORM;
+ *pollev |= POLLIN|POLLRDNORM|POLLRDHUP;
*sigev |= SOCKETSIG_READ;
return (1);
diff --git a/usr/src/uts/common/fs/sockfs/socksyscalls.c b/usr/src/uts/common/fs/sockfs/socksyscalls.c
index a86cda937c..e8aba2ad64 100644
--- a/usr/src/uts/common/fs/sockfs/socksyscalls.c
+++ b/usr/src/uts/common/fs/sockfs/socksyscalls.c
@@ -21,10 +21,10 @@
/*
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
-
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
@@ -83,10 +83,12 @@ extern void nl7c_init(void);
extern int sockfs_defer_nl7c_init;
/*
- * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c"
- * as there isn't a formal definition of IOV_MAX ???
+ * For dated (and stupid) reasons, MSG_MAXIOVLEN is 16 -- but it's 1024 on most
+ * other systems. This becomes a problem when running binaries from these
+ * other systems, and we therefore need to support an iov length of up to 1024,
+ * regardless of the advertised constant for maximum length.
*/
-#define MSG_MAXIOVLEN 16
+#define MSG_MAXMAXIOVLEN 1024
/*
* Kernel component of socket creation.
@@ -1023,9 +1025,10 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
STRUCT_HANDLE(nmsghdr, umsgptr);
struct nmsghdr lmsg;
struct uio auio;
- struct iovec aiov[MSG_MAXIOVLEN];
+ struct iovec buf[MSG_MAXIOVLEN], *aiov = buf;
+ ssize_t iovsize = 0;
int iovcnt;
- ssize_t len;
+ ssize_t len, rval;
int i;
int *flagsp;
model_t model;
@@ -1068,22 +1071,36 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
iovcnt = lmsg.msg_iovlen;
- if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+ if (iovcnt <= 0 || iovcnt > MSG_MAXMAXIOVLEN) {
return (set_errno(EMSGSIZE));
}
+ if (iovcnt > MSG_MAXIOVLEN) {
+ iovsize = MSG_MAXMAXIOVLEN * sizeof (struct iovec);
+ aiov = kmem_alloc(iovsize, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded, while ensuring
* that they can't move more than 2Gbytes of data in a single call.
*/
if (model == DATAMODEL_ILP32) {
- struct iovec32 aiov32[MSG_MAXIOVLEN];
+ struct iovec32 buf32[MSG_MAXIOVLEN], *aiov32 = buf32;
ssize32_t count32;
+ if (iovsize != 0)
+ aiov32 = kmem_alloc(iovsize, KM_SLEEP);
+
if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
- iovcnt * sizeof (struct iovec32)))
+ iovcnt * sizeof (struct iovec32))) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iovsize);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
@@ -1091,15 +1108,28 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iovsize);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EINVAL));
+ }
+
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (iovsize != 0)
+ kmem_free(aiov32, iovsize);
} else
#endif /* _SYSCALL32_IMPL */
if (copyin(lmsg.msg_iov, aiov, iovcnt * sizeof (struct iovec))) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
len = 0;
@@ -1107,6 +1137,9 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
ssize_t iovlen = aiov[i].iov_len;
len += iovlen;
if (iovlen < 0 || len < 0) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EINVAL));
}
}
@@ -1121,12 +1154,20 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
(do_useracc == 0 ||
useracc(lmsg.msg_control, lmsg.msg_controllen,
B_WRITE) != 0)) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
- return (recvit(sock, &lmsg, &auio, flags,
+ rval = recvit(sock, &lmsg, &auio, flags,
STRUCT_FADDR(umsgptr, msg_namelen),
- STRUCT_FADDR(umsgptr, msg_controllen), flagsp));
+ STRUCT_FADDR(umsgptr, msg_controllen), flagsp);
+
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
+ return (rval);
}
/*
@@ -1264,9 +1305,10 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
struct nmsghdr lmsg;
STRUCT_DECL(nmsghdr, u_lmsg);
struct uio auio;
- struct iovec aiov[MSG_MAXIOVLEN];
+ struct iovec buf[MSG_MAXIOVLEN], *aiov = buf;
+ ssize_t iovsize = 0;
int iovcnt;
- ssize_t len;
+ ssize_t len, rval;
int i;
model_t model;
@@ -1309,7 +1351,7 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
iovcnt = lmsg.msg_iovlen;
- if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+ if (iovcnt <= 0 || iovcnt > MSG_MAXMAXIOVLEN) {
/*
* Unless this is XPG 4.2 we allow iovcnt == 0 to
* be compatible with SunOS 4.X and 4.4BSD.
@@ -1318,19 +1360,33 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
return (set_errno(EMSGSIZE));
}
+ if (iovcnt > MSG_MAXIOVLEN) {
+ iovsize = MSG_MAXMAXIOVLEN * sizeof (struct iovec);
+ aiov = kmem_alloc(iovsize, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded, while ensuring
* that they can't move more than 2Gbytes of data in a single call.
*/
if (model == DATAMODEL_ILP32) {
- struct iovec32 aiov32[MSG_MAXIOVLEN];
+ struct iovec32 buf32[MSG_MAXIOVLEN], *aiov32 = buf32;
ssize32_t count32;
+ if (iovsize != 0)
+ aiov32 = kmem_alloc(iovsize, KM_SLEEP);
+
if (iovcnt != 0 &&
copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
- iovcnt * sizeof (struct iovec32)))
+ iovcnt * sizeof (struct iovec32))) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iovsize);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
@@ -1338,17 +1394,30 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iovsize);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EINVAL));
+ }
+
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (iovsize != 0)
+ kmem_free(aiov32, iovsize);
} else
#endif /* _SYSCALL32_IMPL */
if (iovcnt != 0 &&
copyin(lmsg.msg_iov, aiov,
(unsigned)iovcnt * sizeof (struct iovec))) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
len = 0;
@@ -1356,6 +1425,9 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
ssize_t iovlen = aiov[i].iov_len;
len += iovlen;
if (iovlen < 0 || len < 0) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EINVAL));
}
}
@@ -1366,7 +1438,12 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
auio.uio_segflg = UIO_USERSPACE;
auio.uio_limit = 0;
- return (sendit(sock, &lmsg, &auio, flags));
+ rval = sendit(sock, &lmsg, &auio, flags);
+
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
+ return (rval);
}
ssize_t
diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c
index 582db26448..43bc503ed1 100644
--- a/usr/src/uts/common/fs/sockfs/socktpi.c
+++ b/usr/src/uts/common/fs/sockfs/socktpi.c
@@ -6271,6 +6271,13 @@ sotpi_poll(
if (sti->sti_conn_ind_head != NULL)
*reventsp |= (POLLIN|POLLRDNORM) & events;
+ if (so->so_state & SS_CANTRCVMORE) {
+ *reventsp |= POLLRDHUP & events;
+
+ if (so->so_state & SS_CANTSENDMORE)
+ *reventsp |= POLLHUP;
+ }
+
if (so->so_state & SS_OOBPEND)
*reventsp |= POLLRDBAND & events;
diff --git a/usr/src/uts/common/fs/swapfs/swap_subr.c b/usr/src/uts/common/fs/swapfs/swap_subr.c
index 74c4302da9..a4d983665b 100644
--- a/usr/src/uts/common/fs/swapfs/swap_subr.c
+++ b/usr/src/uts/common/fs/swapfs/swap_subr.c
@@ -110,9 +110,11 @@ swapfs_recalc(pgcnt_t pgs)
* memory that can be used as swap space should do so by
* setting swapfs_desfree at boot time, not swapfs_minfree.
* However, swapfs_minfree is tunable by install as a
- * workaround for bugid 1147463.
+ * workaround for bugid 1147463. Note swapfs_minfree is set
+ * to 1/8th of memory, but clamped at the limit of 256 MB.
*/
- new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
+ new_swapfs_minfree = MIN(MAX(btopr(2 * 1024 * 1024), pgs >> 3),
+ btopr(256 * 1024 * 1024));
}
/*
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_dir.c b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
index f6621c8097..387cc6ae54 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_dir.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
@@ -516,7 +516,7 @@ tdirdelete(
*/
namelen = strlen(tpdp->td_name) + 1;
- tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
+ kmem_free(tpdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
@@ -549,8 +549,8 @@ tdirinit(
ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
- dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
- dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
+ dot = kmem_zalloc(sizeof (struct tdirent) + 2, KM_SLEEP);
+ dotdot = kmem_zalloc(sizeof (struct tdirent) + 3, KM_SLEEP);
/*
* Initialize the entries
@@ -650,7 +650,7 @@ tdirtrunc(struct tmpnode *dir)
tmpfs_hash_out(tdp);
- tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
+ kmem_free(tdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
}
@@ -925,7 +925,7 @@ tdiraddentry(
*/
namelen = strlen(name) + 1;
alloc_size = namelen + sizeof (struct tdirent);
- tdp = tmp_memalloc(alloc_size, 0);
+ tdp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
if (tdp == NULL)
return (ENOSPC);
@@ -1025,7 +1025,7 @@ tdirmaketnode(
((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
return (EOVERFLOW);
type = va->va_type;
- tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
+ tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tmpnode_init(tm, tp, va, cred);
/* setup normal file/dir's extended attribute directory */
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_subr.c b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
index 2e59d28d80..bc768466cb 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_subr.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
@@ -89,40 +89,6 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
}
/*
- * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
- * or the 'musthave' flag is set. 'musthave' allocations should
- * always be subordinate to normal allocations so that tmpfs_maxkmem
- * can't be exceeded by more than a few KB. Example: when creating
- * a new directory, the tmpnode is a normal allocation; if that
- * succeeds, the dirents for "." and ".." are 'musthave' allocations.
- */
-void *
-tmp_memalloc(size_t size, int musthave)
-{
- static time_t last_warning;
- time_t now;
-
- if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
- musthave)
- return (kmem_zalloc(size, KM_SLEEP));
-
- atomic_add_long(&tmp_kmemspace, -size);
- now = gethrestime_sec();
- if (last_warning != now) {
- last_warning = now;
- cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
- }
- return (NULL);
-}
-
-void
-tmp_memfree(void *cp, size_t size)
-{
- kmem_free(cp, size);
- atomic_add_long(&tmp_kmemspace, -size);
-}
-
-/*
* Convert a string containing a number (number of bytes) to a pgcnt_t,
* containing the corresponding number of pages. On 32-bit kernels, the
* maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
index f8a36a528f..91d953f88a 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -55,6 +56,15 @@
static int tmpfsfstype;
/*
+ * tmpfs_mountcount is used to prevent module unloads while there is still
+ * state from a former mount hanging around. With forced umount support, the
+ * filesystem module must not be allowed to go away before the last
+ * VFS_FREEVFS() call has been made. Since this is just an atomic counter,
+ * there's no need for locking.
+ */
+static uint32_t tmpfs_mountcount;
+
+/*
* tmpfs vfs operations.
*/
static int tmpfsinit(int, char *);
@@ -64,6 +74,7 @@ static int tmp_unmount(struct vfs *, int, struct cred *);
static int tmp_root(struct vfs *, struct vnode **);
static int tmp_statvfs(struct vfs *, struct statvfs64 *);
static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
+static void tmp_freevfs(vfs_t *vfsp);
/*
* Loadable module wrapper
@@ -76,7 +87,7 @@ static vfsdef_t vfw = {
VFSDEF_VERSION,
"tmpfs",
tmpfsinit,
- VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
+ VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
&tmpfs_proto_opttbl
};
@@ -121,6 +132,14 @@ _fini()
{
int error;
+ /*
+ * If a forceably unmounted instance is still hanging around, we cannot
+ * allow the module to be unloaded because that would cause panics once
+ * the VFS framework decides it's time to call into VFS_FREEVFS().
+ */
+ if (tmpfs_mountcount)
+ return (EBUSY);
+
error = mod_remove(&modlinkage);
if (error)
return (error);
@@ -139,14 +158,6 @@ _info(struct modinfo *modinfop)
}
/*
- * The following are patchable variables limiting the amount of system
- * resources tmpfs can use.
- *
- * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
- * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
- * It is not determined by setting a hard limit but rather as a percentage of
- * physical memory which is determined when tmpfs is first used in the system.
- *
* tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
* the rest of the system. In other words, if the amount of free swap space
* in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
@@ -155,9 +166,7 @@ _info(struct modinfo *modinfop)
* There is also a per mount limit on the amount of swap space
* (tmount.tm_anonmax) settable via a mount option.
*/
-size_t tmpfs_maxkmem = 0;
size_t tmpfs_minfree = 0;
-size_t tmp_kmemspace; /* bytes of kernel heap used by all tmpfs */
static major_t tmpfs_major;
static minor_t tmpfs_minor;
@@ -176,6 +185,7 @@ tmpfsinit(int fstype, char *name)
VFSNAME_ROOT, { .vfs_root = tmp_root },
VFSNAME_STATVFS, { .vfs_statvfs = tmp_statvfs },
VFSNAME_VGET, { .vfs_vget = tmp_vget },
+ VFSNAME_FREEVFS, { .vfs_freevfs = tmp_freevfs },
NULL, NULL
};
int error;
@@ -210,18 +220,12 @@ tmpfsinit(int fstype, char *name)
tmpfs_minfree = btopr(TMPMINFREE);
}
- /*
- * The maximum amount of space tmpfs can allocate is
- * TMPMAXPROCKMEM percent of kernel memory
- */
- if (tmpfs_maxkmem == 0)
- tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
-
if ((tmpfs_major = getudev()) == (major_t)-1) {
cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
tmpfs_major = 0;
}
mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
+ tmpfs_mountcount = 0;
return (0);
}
@@ -249,7 +253,7 @@ tmp_mount(
return (ENOTDIR);
mutex_enter(&mvp->v_lock);
- if ((uap->flags & MS_OVERLAY) == 0 &&
+ if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
(mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
mutex_exit(&mvp->v_lock);
return (EBUSY);
@@ -286,7 +290,23 @@ tmp_mount(
(uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
goto out;
- if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
+ if (uap->flags & MS_REMOUNT) {
+ tm = (struct tmount *)VFSTOTM(vfsp);
+
+ /*
+ * If we change the size so its less than what is currently
+ * being used, we allow that. The file system will simply be
+ * full until enough files have been removed to get below the
+ * new max.
+ */
+ mutex_enter(&tm->tm_contents);
+ tm->tm_anonmax = anonmax;
+ mutex_exit(&tm->tm_contents);
+ goto out;
+ }
+
+ if ((tm = kmem_zalloc(sizeof (struct tmount),
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
pn_free(&dpn);
error = ENOMEM;
goto out;
@@ -318,7 +338,7 @@ tmp_mount(
vfsp->vfs_bsize = PAGESIZE;
vfsp->vfs_flag |= VFS_NOTRUNC;
vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
- tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
+ tm->tm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
(void) strcpy(tm->tm_mntpath, dpn.pn_path);
/*
@@ -328,7 +348,7 @@ tmp_mount(
rattr.va_mode = (mode_t)(S_IFDIR | 0777); /* XXX modes */
rattr.va_type = VDIR;
rattr.va_rdev = 0;
- tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
+ tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tmpnode_init(tm, tp, &rattr, cr);
/*
@@ -366,6 +386,7 @@ tmp_mount(
pn_free(&dpn);
error = 0;
+ atomic_inc_32(&tmpfs_mountcount);
out:
if (error == 0)
@@ -381,36 +402,107 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
struct tmpnode *tnp, *cancel;
struct vnode *vp;
int error;
+ uint_t cnt;
+ int i;
if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
return (error);
- /*
- * forced unmount is not supported by this file system
- * and thus, ENOTSUP, is being returned.
- */
- if (flag & MS_FORCE)
- return (ENOTSUP);
-
mutex_enter(&tm->tm_contents);
/*
- * If there are no open files, only the root node should have
- * a reference count.
+ * In the normal unmount case (non-forced unmount), if there are no
+ * open files, only the root node should have a reference count.
+ *
* With tm_contents held, nothing can be added or removed.
* There may be some dirty pages. To prevent fsflush from
* disrupting the unmount, put a hold on each node while scanning.
* If we find a previously referenced node, undo the holds we have
* placed and fail EBUSY.
+ *
+ * However, in the case of a forced umount, things are a bit different.
+ * An additional VFS_HOLD is added for each outstanding VN_HOLD to
+ * ensure that the file system is not cleaned up (tmp_freevfs) until
+ * the last vfs hold is dropped. This happens in tmp_inactive as the
+ * vnodes are released. Also, we can't add an additional VN_HOLD in
+ * this case since that would prevent tmp_inactive from ever being
+ * called. Finally, we do need to drop the zone ref now (zone_rele_ref)
+ * so that the zone is not blocked waiting for the final file system
+ * cleanup.
*/
tnp = tm->tm_rootnode;
- if (TNTOV(tnp)->v_count > 1) {
+
+ vp = TNTOV(tnp);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (flag & MS_FORCE) {
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ /* Extra hold which we rele below when we drop the zone ref */
+ VFS_HOLD(vfsp);
+
+ for (i = 1; i < cnt; i++)
+ VFS_HOLD(vfsp);
+
+ /* drop the mutex now because no one can find this mount */
+ mutex_exit(&tm->tm_contents);
+ } else if (cnt > 1) {
+ mutex_exit(&vp->v_lock);
mutex_exit(&tm->tm_contents);
return (EBUSY);
}
+ mutex_exit(&vp->v_lock);
+ /*
+ * Check for open files. An open file causes everything to unwind
+ * unless this is a forced umount.
+ */
for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
- if ((vp = TNTOV(tnp))->v_count > 0) {
+ vp = TNTOV(tnp);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (flag & MS_FORCE) {
+ for (i = 0; i < cnt; i++)
+ VFS_HOLD(vfsp);
+
+ /*
+ * In the case of a forced umount don't add an
+ * additional VN_HOLD on the already held vnodes, like
+ * we do in the non-forced unmount case. If the
+ * cnt > 0, then the vnode already has at least one
+ * hold and we need tmp_inactive to get called when the
+ * last pre-existing hold on the node is released so
+ * that we can VFS_RELE the VFS holds we just added.
+ */
+ if (cnt == 0) {
+ /* directly add VN_HOLD since have the lock */
+ vp->v_count++;
+ }
+
+ mutex_exit(&vp->v_lock);
+
+ /*
+ * If the tmpnode has any pages associated with it
+ * (i.e. if it's a normal file with non-zero size), the
+ * tmpnode could still be discovered by pageout or
+ * fsflush via the page vnode pointers. To prevent this
+ * from interfering with the tmp_freevfs, truncate the
+ * tmpnode now.
+ */
+ if (tnp->tn_size != 0 && tnp->tn_type == VREG) {
+ rw_enter(&tnp->tn_rwlock, RW_WRITER);
+ rw_enter(&tnp->tn_contents, RW_WRITER);
+
+ (void) tmpnode_trunc(tm, tnp, 0);
+
+ rw_exit(&tnp->tn_contents);
+ rw_exit(&tnp->tn_rwlock);
+
+ ASSERT(tnp->tn_size == 0);
+ ASSERT(tnp->tn_nblocks == 0);
+ }
+ } else if (cnt > 0) {
+ /* An open file; unwind the holds we've been adding. */
+ mutex_exit(&vp->v_lock);
cancel = tm->tm_rootnode->tn_forw;
while (cancel != tnp) {
vp = TNTOV(cancel);
@@ -420,14 +512,50 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
}
mutex_exit(&tm->tm_contents);
return (EBUSY);
+ } else {
+ /* directly add a VN_HOLD since we have the lock */
+ vp->v_count++;
+ mutex_exit(&vp->v_lock);
}
- VN_HOLD(vp);
}
- /*
- * We can drop the mutex now because no one can find this mount
- */
- mutex_exit(&tm->tm_contents);
+ if (flag & MS_FORCE) {
+ /*
+ * Drop the zone ref now since we don't know how long it will
+ * be until the final vfs_rele is called by tmp_inactive.
+ */
+ if (vfsp->vfs_zone) {
+ zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
+ ZONE_REF_VFS);
+ vfsp->vfs_zone = 0;
+ }
+ /* We can now drop the extra hold we added above. */
+ VFS_RELE(vfsp);
+ } else {
+ /*
+ * For the non-forced case, we can drop the mutex now because
+ * no one can find this mount anymore
+ */
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ mutex_exit(&tm->tm_contents);
+ }
+
+ return (0);
+}
+
+/*
+ * Implementation of VFS_FREEVFS() to support forced umounts. This is called by
+ * the vfs framework after umount and the last VFS_RELE, to trigger the release
+ * of any resources still associated with the given vfs_t. We only add
+ * additional VFS_HOLDs during the forced umount case, so this is normally
+ * called immediately after tmp_umount.
+ */
+void
+tmp_freevfs(vfs_t *vfsp)
+{
+ struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
+ struct tmpnode *tnp;
+ struct vnode *vp;
/*
* Free all kmemalloc'd and anonalloc'd memory associated with
@@ -437,6 +565,16 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
* tmpnode_free which assumes that the directory entry has been
* removed before the file.
*/
+
+ /*
+ * Now that we are tearing ourselves down we need to remove the
+ * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
+ * files from the system causing us to have a negative value. Doing this
+ * seems a bit better than trying to set a flag on the tmount that says
+ * we're tearing down.
+ */
+ vfsp->vfs_flag &= ~VFS_UNMOUNTED;
+
/*
* Remove all directory entries
*/
@@ -503,15 +641,16 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
ASSERT(tm->tm_mntpath);
- tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
+ kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
ASSERT(tm->tm_anonmem == 0);
mutex_destroy(&tm->tm_contents);
mutex_destroy(&tm->tm_renamelck);
- tmp_memfree(tm, sizeof (struct tmount));
+ kmem_free(tm, sizeof (struct tmount));
- return (0);
+ /* Allow _fini() to succeed now */
+ atomic_dec_32(&tmpfs_mountcount);
}
/*
@@ -614,13 +753,7 @@ tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
* available to tmpfs. This is fairly inaccurate since it doesn't
* take into account the names stored in the directory entries.
*/
- if (tmpfs_maxkmem > tmp_kmemspace)
- sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
- (sizeof (struct tmpnode) + sizeof (struct tdirent));
- else
- sbp->f_ffree = 0;
-
- sbp->f_files = tmpfs_maxkmem /
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
(sizeof (struct tmpnode) + sizeof (struct tdirent));
sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
(void) cmpldev(&d32, vfsp->vfs_dev);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index e44bba1252..c404433edd 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -583,6 +583,10 @@ tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred,
struct tmount *tm = (struct tmount *)VTOTM(vp);
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
/*
* We don't currently support reading non-regular files
*/
@@ -612,6 +616,10 @@ tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred,
struct tmount *tm = (struct tmount *)VTOTM(vp);
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
/*
* We don't currently support writing to non-regular files
*/
@@ -832,6 +840,9 @@ tmp_lookup(
struct tmpnode *ntp = NULL;
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
/* allow cd into @ dir */
if (flags & LOOKUP_XATTR) {
@@ -870,8 +881,7 @@ tmp_lookup(
return (error);
}
- xdp = tmp_memalloc(sizeof (struct tmpnode),
- TMP_MUSTHAVE);
+ xdp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tm = VTOTM(dvp);
tmpnode_init(tm, xdp, &tp->tn_attr, NULL);
/*
@@ -1185,7 +1195,7 @@ tmp_rename(
struct tmpnode *toparent;
struct tmpnode *fromtp = NULL; /* source tmpnode */
struct tmount *tm = (struct tmount *)VTOTM(odvp);
- int error;
+ int error, err;
int samedir = 0; /* set if odvp == ndvp */
struct vnode *realvp;
@@ -1261,15 +1271,6 @@ tmp_rename(
error = 0;
goto done;
}
- vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct);
-
- /*
- * Notify the target directory if not same as
- * source directory.
- */
- if (ndvp != odvp) {
- vnevent_rename_dest_dir(ndvp, ct);
- }
/*
* Unlink from source.
@@ -1277,7 +1278,7 @@ tmp_rename(
rw_enter(&fromparent->tn_rwlock, RW_WRITER);
rw_enter(&fromtp->tn_rwlock, RW_WRITER);
- error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred);
+ error = err = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred);
/*
* The following handles the case where our source tmpnode was
@@ -1292,6 +1293,12 @@ tmp_rename(
rw_exit(&fromtp->tn_rwlock);
rw_exit(&fromparent->tn_rwlock);
+
+ if (err == 0) {
+ vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, TNTOV(fromtp), nnm, ct);
+ }
+
done:
tmpnode_rele(fromtp);
mutex_exit(&tm->tm_renamelck);
@@ -1458,6 +1465,10 @@ tmp_readdir(
int reclen;
caddr_t outbuf;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
if (uiop->uio_loffset >= MAXOFF_T) {
if (eofp)
*eofp = 1;
@@ -1596,7 +1607,7 @@ tmp_symlink(
return (error);
}
len = strlen(tnm) + 1;
- cp = tmp_memalloc(len, 0);
+ cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI);
if (cp == NULL) {
tmpnode_rele(self);
return (ENOSPC);
@@ -1661,10 +1672,27 @@ top:
* there's little to do -- just drop our hold.
*/
if (vp->v_count > 1 || tp->tn_nlink != 0) {
- vp->v_count--;
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
+ /*
+ * Since the file system was forcibly unmounted, we can
+ * have a case (v_count == 1, tn_nlink != 0) where this
+ * file was open so we didn't add an extra hold on the
+ * file in tmp_unmount. We are counting on the
+ * interaction of the hold made in tmp_unmount and
+ * rele-ed in tmp_vfsfree so we need to be sure we
+ * don't decrement in this case.
+ */
+ if (vp->v_count > 1)
+ vp->v_count--;
+ } else {
+ vp->v_count--;
+ }
mutex_exit(&vp->v_lock);
mutex_exit(&tp->tn_tlock);
rw_exit(&tp->tn_rwlock);
+ /* If the filesystem was umounted by force, rele the vfs ref */
+ if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED)
+ VFS_RELE(tm->tm_vfsp);
return;
}
@@ -1689,7 +1717,7 @@ top:
goto top;
}
if (tp->tn_type == VLNK)
- tmp_memfree(tp->tn_symlink, tp->tn_size + 1);
+ kmem_free(tp->tn_symlink, tp->tn_size + 1);
}
/*
@@ -1723,7 +1751,11 @@ top:
rw_destroy(&tp->tn_rwlock);
mutex_destroy(&tp->tn_tlock);
vn_free(TNTOV(tp));
- tmp_memfree(tp, sizeof (struct tmpnode));
+ kmem_free(tp, sizeof (struct tmpnode));
+
+ /* If the filesystem was umounted by force, rele the vfs ref */
+ if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED)
+ VFS_RELE(tm->tm_vfsp);
}
/* ARGSUSED2 */
@@ -1849,6 +1881,10 @@ tmp_getapage(
struct vnode *pvp;
u_offset_t poff;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
if (protp != NULL)
*protp = PROT_ALL;
again:
@@ -2070,6 +2106,10 @@ tmp_putapage(
u_offset_t offset;
u_offset_t tmpoff;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
ASSERT(PAGE_LOCKED(pp));
/* Kluster in tmp_klustsize chunks */
diff --git a/usr/src/uts/common/fs/udfs/udf_dir.c b/usr/src/uts/common/fs/udfs/udf_dir.c
index c1e2c74a87..def046a0bf 100644
--- a/usr/src/uts/common/fs/udfs/udf_dir.c
+++ b/usr/src/uts/common/fs/udfs/udf_dir.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -562,9 +563,8 @@ out:
namep, ctp);
}
- if (sdp != tdp) {
- vnevent_rename_dest_dir(ITOV(tdp), ctp);
- }
+ vnevent_rename_dest_dir(ITOV(tdp), ITOV(tip),
+ namep, ctp);
}
/*
diff --git a/usr/src/uts/common/fs/udfs/udf_vnops.c b/usr/src/uts/common/fs/udfs/udf_vnops.c
index 307d3987ed..2d8de23399 100644
--- a/usr/src/uts/common/fs/udfs/udf_vnops.c
+++ b/usr/src/uts/common/fs/udfs/udf_vnops.c
@@ -911,7 +911,7 @@ udf_rename(
caller_context_t *ct,
int flags)
{
- int32_t error = 0;
+ int32_t error = 0, err;
struct udf_vfs *udf_vfsp;
struct ud_inode *sip; /* source inode */
struct ud_inode *sdp, *tdp; /* source and target parent inode */
@@ -995,7 +995,6 @@ udf_rename(
rw_exit(&tdp->i_rwlock);
goto errout;
}
- vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
rw_exit(&tdp->i_rwlock);
rw_enter(&sdp->i_rwlock, RW_WRITER);
@@ -1006,11 +1005,15 @@ udf_rename(
* If the entry has changed just forget about it. Release
* the source inode.
*/
- if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
+ if ((error = err = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
DR_RENAME, cr, ct)) == ENOENT) {
error = 0;
}
rw_exit(&sdp->i_rwlock);
+
+ if (err == 0)
+ vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
+
errout:
ITIMES(sdp);
ITIMES(tdp);
diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c
index fcffd952ed..c77872b11d 100644
--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c
+++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -3367,11 +3367,10 @@ ufs_rename(
struct inode *ip = NULL; /* check inode */
struct inode *sdp; /* old (source) parent inode */
struct inode *tdp; /* new (target) parent inode */
- struct vnode *svp = NULL; /* source vnode */
struct vnode *tvp = NULL; /* target vnode, if it exists */
struct vnode *realvp;
struct ufsvfs *ufsvfsp;
- struct ulockfs *ulp;
+ struct ulockfs *ulp = NULL;
struct ufs_slot slot;
timestruc_t now;
int error;
@@ -3380,7 +3379,7 @@ ufs_rename(
krwlock_t *first_lock;
krwlock_t *second_lock;
krwlock_t *reverse_lock;
- int serr, terr;
+ int terr;
sdp = VTOI(sdvp);
slot.fbp = NULL;
@@ -3389,34 +3388,13 @@ ufs_rename(
if (VOP_REALVP(tdvp, &realvp, ct) == 0)
tdvp = realvp;
+ /* Must do this before taking locks in case of DNLC miss */
terr = ufs_eventlookup(tdvp, tnm, cr, &tvp);
- serr = ufs_eventlookup(sdvp, snm, cr, &svp);
-
- if ((serr == 0) && ((terr == 0) || (terr == ENOENT))) {
- if (tvp != NULL)
- vnevent_rename_dest(tvp, tdvp, tnm, ct);
-
- /*
- * Notify the target directory of the rename event
- * if source and target directories are not the same.
- */
- if (sdvp != tdvp)
- vnevent_rename_dest_dir(tdvp, ct);
-
- if (svp != NULL)
- vnevent_rename_src(svp, sdvp, snm, ct);
- }
-
- if (tvp != NULL)
- VN_RELE(tvp);
-
- if (svp != NULL)
- VN_RELE(svp);
retry_rename:
error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_RENAME_MASK);
if (error)
- goto out;
+ goto unlock;
if (ulp)
TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_RENAME,
@@ -3712,6 +3690,9 @@ retry_firstlock:
goto errout;
}
+ if (terr == 0 && tvp != NULL)
+ vnevent_rename_dest(tvp, tdvp, tnm, ct);
+
/*
* Unlink the source.
* Remove the source entry. ufs_dirremove() checks that the entry
@@ -3723,6 +3704,9 @@ retry_firstlock:
DR_RENAME, cr)) == ENOENT)
error = 0;
+ vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
+ vnevent_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
+
errout:
if (slot.fbp)
fbrelse(slot.fbp, S_OTHER);
@@ -3732,15 +3716,17 @@ errout:
rw_exit(&sdp->i_rwlock);
}
- VN_RELE(ITOV(sip));
-
unlock:
+ if (tvp != NULL)
+ VN_RELE(tvp);
+ if (sip != NULL)
+ VN_RELE(ITOV(sip));
+
if (ulp) {
TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_RENAME, trans_size);
ufs_lockfs_end(ulp);
}
-out:
return (error);
}
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index 409e69a00d..3f14df9add 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -904,6 +905,7 @@ vfs_mountroot(void)
vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab");
vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
vfs_mountfs("objfs", "objfs", OBJFS_ROOT);
+ vfs_mountfs("bootfs", "bootfs", "/system/boot");
if (getzoneid() == GLOBAL_ZONEID) {
vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
@@ -1129,6 +1131,7 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
struct pathname pn, rpn;
vsk_anchor_t *vskap;
char fstname[FSTYPSZ];
+ zone_t *zone;
/*
* The v_flag value for the mount point vp is permanently set
@@ -1590,9 +1593,24 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
}
/*
- * Serialize with zone creations.
+ * Serialize with zone state transitions.
+ * See vfs_list_add; zone mounted into is:
+ * zone_find_by_path(refstr_value(vfsp->vfs_mntpt))
+ * not the zone doing the mount (curproc->p_zone), but if we're already
+ * inside a NGZ, then we know what zone we are.
*/
- mount_in_progress();
+ if (INGLOBALZONE(curproc)) {
+ zone = zone_find_by_path(mountpt);
+ ASSERT(zone != NULL);
+ } else {
+ zone = curproc->p_zone;
+ /*
+ * zone_find_by_path does a hold, so do one here too so that
+ * we can do a zone_rele after mount_completed.
+ */
+ zone_hold(zone);
+ }
+ mount_in_progress(zone);
/*
* Instantiate (or reinstantiate) the file system. If appropriate,
* splice it into the file system name space.
@@ -1761,7 +1779,8 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
vfs_unlock(vfsp);
}
- mount_completed();
+ mount_completed(zone);
+ zone_rele(zone);
if (splice)
vn_vfsunlock(vp);
@@ -3881,6 +3900,8 @@ vfs_to_modname(const char *vfstype)
vfstype = "fdfs";
} else if (strncmp(vfstype, "nfs", 3) == 0) {
vfstype = "nfs";
+ } else if (strcmp(vfstype, "lxproc") == 0) {
+ vfstype = "lxprocfs";
}
return (vfstype);
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 4abb040de0..561fb1bd02 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -66,6 +66,7 @@
#include <fs/fs_subr.h>
#include <sys/taskq.h>
#include <fs/fs_reparse.h>
+#include <sys/time.h>
/* Determine if this vnode is a file that is read-only */
#define ISROFILE(vp) \
@@ -200,6 +201,11 @@ static void (**vsd_destructor)(void *);
cr = crgetmapped(cr); \
}
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+#define VOP_LATENCY_10S 10000000000
+
/*
* Convert stat(2) formats to vnode types and vice versa. (Knows about
* numerical order of S_IFMT and vnode types.)
@@ -2516,6 +2522,7 @@ vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
+ (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct);
(void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
}
@@ -2530,12 +2537,13 @@ vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
}
void
-vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
+vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
+ caller_context_t *ct)
{
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
- (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
+ (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct);
}
void
@@ -3231,14 +3239,57 @@ fop_read(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
+ ssize_t len;
+ int err;
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, read,
- read_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, read, read_bytes, len);
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += len;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}
@@ -3250,14 +3301,62 @@ fop_write(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
+ ssize_t len;
+ int err;
+
+ /*
+ * For the purposes of VFS kstat consumers, the "waitq" calculation is
+ * repurposed as the active queue for VFS write operations. There's no
+ * actual wait queue for VFS operations.
+ */
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, write,
- write_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, write, write_bytes, len);
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += len;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 435096becc..efd0eba5ff 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -129,6 +129,7 @@
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/dsl_pool.h>
+#include <sys/zfs_zone.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <vm/anon.h>
@@ -280,6 +281,7 @@ typedef struct arc_stats {
* not from the spa we're trying to evict from.
*/
kstat_named_t arcstat_evict_skip;
+ kstat_named_t arcstat_evict_allocfail;
kstat_named_t arcstat_evict_l2_cached;
kstat_named_t arcstat_evict_l2_eligible;
kstat_named_t arcstat_evict_l2_ineligible;
@@ -347,6 +349,7 @@ static arc_stats_t arc_stats = {
{ "recycle_miss", KSTAT_DATA_UINT64 },
{ "mutex_miss", KSTAT_DATA_UINT64 },
{ "evict_skip", KSTAT_DATA_UINT64 },
+ { "evict_allocfail", KSTAT_DATA_UINT64 },
{ "evict_l2_cached", KSTAT_DATA_UINT64 },
{ "evict_l2_eligible", KSTAT_DATA_UINT64 },
{ "evict_l2_ineligible", KSTAT_DATA_UINT64 },
@@ -2539,15 +2542,27 @@ arc_get_data_buf(arc_buf_t *buf)
*/
if (!arc_evict_needed(type)) {
if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
- arc_space_consume(size, ARC_SPACE_DATA);
+ buf->b_data = zio_buf_alloc_canfail(size);
+ if (buf->b_data != NULL) {
+ arc_space_consume(size, ARC_SPACE_DATA);
+ goto out;
+ }
} else {
ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
- ARCSTAT_INCR(arcstat_data_size, size);
- atomic_add_64(&arc_size, size);
+ buf->b_data = zio_data_buf_alloc_canfail(size);
+ if (buf->b_data != NULL) {
+ ARCSTAT_INCR(arcstat_data_size, size);
+ atomic_add_64(&arc_size, size);
+ goto out;
+ }
}
- goto out;
+
+ /*
+ * Memory allocation failed, presumably due to excessive
+ * fragmentation; we'll bump a counter and drop into the ARC
+ * eviction case.
+ */
+ ARCSTAT_BUMP(arcstat_evict_allocfail);
}
/*
@@ -3212,6 +3227,14 @@ top:
rzio = zio_read(pio, spa, bp, buf->b_data, size,
arc_read_done, buf, priority, zio_flags, zb);
+ /*
+ * At this point, this read I/O has already missed in the ARC
+ * and will be going through to the disk. The I/O throttle
+ * should delay this I/O if this zone is using more than its I/O
+ * priority allows.
+ */
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_READ);
+
if (*arc_flags & ARC_FLAG_WAIT)
return (zio_wait(rzio));
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index fd539c838d..83ff69d438 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -526,8 +526,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
if (bonuslen < DN_MAX_BONUSLEN)
bzero(db->db.db_data, DN_MAX_BONUSLEN);
- if (bonuslen)
- bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
+
+ if (bonuslen) {
+ /*
+ * Absent byzantine on-disk corruption, we fully expect
+ * our bonuslen to be no more than DN_MAX_BONUSLEN --
+ * but we nonetheless explicitly clamp it on the bcopy()
+ * to prevent any on-disk corruption from becoming
+ * rampant in-kernel corruption.
+ */
+ bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
+ MIN(bonuslen, DN_MAX_BONUSLEN));
+ }
+
DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index c541e04d54..dca7aaf36c 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1777,7 +1777,6 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
if (!zio_checksum_table[checksum].ci_dedup)
dedup_verify = B_TRUE;
}
-
/*
* Enable nopwrite if we have a cryptographically secure
* checksum that has no known collisions (i.e. SHA-256)
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 56ce2f0c27..c2b1dca1c0 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -38,11 +38,11 @@
#include <sys/sa_impl.h>
#include <sys/zfs_context.h>
#include <sys/varargs.h>
+#include <sys/zfs_zone.h>
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
uint64_t arg1, uint64_t arg2);
-
dmu_tx_t *
dmu_tx_create_dd(dsl_dir_t *dd)
{
@@ -223,6 +223,8 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
if (len == 0)
return;
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE);
+
min_bs = SPA_MINBLOCKSHIFT;
max_bs = highbit64(txh->txh_tx->tx_objset->os_recordsize) - 1;
min_ibs = DN_MIN_INDBLKSHIFT;
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index f495841683..7aba754cab 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -40,6 +40,7 @@
#include <sys/zio.h>
#include <sys/arc.h>
#include <sys/sunddi.h>
+#include <sys/zfs_zone.h>
#include <sys/zfeature.h>
#include <sys/policy.h>
#include <sys/zfs_znode.h>
@@ -1247,7 +1248,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
* locks are held.
*/
txg_delay(dd->dd_pool, tx->tx_txg,
- MSEC2NSEC(10), MSEC2NSEC(10));
+ zfs_zone_txg_delay(), MSEC2NSEC(10));
err = SET_ERROR(ERESTART);
}
}
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 39a797c279..ab1bcb4c63 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -41,6 +41,7 @@
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
+#include <sys/zfs_zone.h>
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 9ba9fd3841..02cccae29d 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -63,6 +63,11 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
int zfs_condense_pct = 200;
/*
+ * Never condense any space map. This is for debugging/recovery only.
+ */
+int zfs_condense_never = 0;
+
+/*
* Condensing a metaslab is not guaranteed to actually reduce the amount of
* space used on disk. In particular, a space map uses data in increments of
* MAX(1 << ashift, space_map_blksize), so a metaslab might use the
@@ -1656,6 +1661,9 @@ metaslab_should_condense(metaslab_t *msp)
ASSERT(MUTEX_HELD(&msp->ms_lock));
ASSERT(msp->ms_loaded);
+ if (zfs_condense_never != 0)
+ return (B_FALSE);
+
/*
* Use the ms_size_tree range tree, which is ordered by size, to
* obtain the largest segment in the free tree. We always condense
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 6d9bcb17d0..e717f131cc 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -114,6 +114,7 @@ struct vdev_queue {
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
avl_tree_t vq_active_tree;
uint64_t vq_last_offset;
+ zoneid_t vq_last_zone_id;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
kmutex_t vq_lock;
};
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
new file mode 100644
index 0000000000..4f9383ffdb
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_ZFS_ZONE_H
+#define _SYS_FS_ZFS_ZONE_H
+
+#ifdef _KERNEL
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ ZFS_ZONE_IOP_READ = 0,
+ ZFS_ZONE_IOP_WRITE,
+ ZFS_ZONE_IOP_LOGICAL_WRITE,
+} zfs_zone_iop_type_t;
+
+extern void zfs_zone_io_throttle(zfs_zone_iop_type_t);
+
+extern void zfs_zone_zio_init(zio_t *);
+extern void zfs_zone_zio_start(zio_t *);
+extern void zfs_zone_zio_done(zio_t *);
+extern void zfs_zone_zio_dequeue(zio_t *);
+extern void zfs_zone_zio_enqueue(zio_t *);
+extern void zfs_zone_report_txg_sync(void *);
+extern hrtime_t zfs_zone_txg_delay();
+#ifdef _KERNEL
+extern zio_t *zfs_zone_schedule(vdev_queue_t *, zio_priority_t, avl_index_t);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_ZFS_ZONE_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index f6cf259bf7..6c1da96fe5 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -417,7 +417,8 @@ struct zio {
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
- hrtime_t io_timestamp;
+ hrtime_t io_timestamp; /* time I/O entered zio pipeline */
+ hrtime_t io_dispatched; /* time I/O was dispatched to disk */
avl_node_t io_queue_node;
/* Internal pipeline state */
@@ -445,6 +446,7 @@ struct zio {
zio_cksum_report_t *io_cksum_report;
uint64_t io_ena;
+ zoneid_t io_zoneid; /* zone which originated this I/O */
/* Taskq dispatching state */
taskq_ent_t io_tqent;
};
@@ -511,8 +513,10 @@ extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
extern void *zio_buf_alloc(size_t size);
+extern void *zio_buf_alloc_canfail(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
+extern void *zio_data_buf_alloc_canfail(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c
index 191259e75b..915c9bb4b2 100644
--- a/usr/src/uts/common/fs/zfs/txg.c
+++ b/usr/src/uts/common/fs/zfs/txg.c
@@ -31,6 +31,7 @@
#include <sys/dsl_pool.h>
#include <sys/dsl_scan.h>
#include <sys/callb.h>
+#include <sys/zfs_zone.h>
/*
* ZFS Transaction Groups
@@ -506,6 +507,8 @@ txg_sync_thread(dsl_pool_t *dp)
txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
mutex_exit(&tx->tx_sync_lock);
+ zfs_zone_report_txg_sync(dp);
+
start = ddi_get_lbolt();
spa_sync(spa, txg);
delta = ddi_get_lbolt() - start;
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index ed4a8b773b..e4cc42452a 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -21,11 +21,13 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
+#include <sys/zfs_zone.h>
#include <sys/spa_impl.h>
#include <sys/refcount.h>
#include <sys/vdev_disk.h>
@@ -44,6 +46,11 @@ extern ldi_ident_t zfs_li;
static void vdev_disk_close(vdev_t *);
+typedef struct vdev_disk_buf {
+ buf_t vdb_buf;
+ zio_t *vdb_io;
+} vdev_disk_buf_t;
+
typedef struct vdev_disk_ldi_cb {
list_node_t lcb_next;
ldi_callback_id_t lcb_id;
@@ -127,6 +134,8 @@ vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
int ldi_result, void *arg, void *ev_data)
{
vdev_t *vd = (vdev_t *)arg;
+ vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_ldi_cb_t *lcb;
/*
* Ignore events other than offline.
@@ -586,6 +595,7 @@ static void
vdev_disk_close(vdev_t *vd)
{
vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_ldi_cb_t *lcb;
if (vd->vdev_reopening || dvd == NULL)
return;
@@ -812,6 +822,8 @@ vdev_disk_io_start(zio_t *zio)
bp->b_bufsize = zio->io_size;
bp->b_iodone = (int (*)())vdev_disk_io_intr;
+ zfs_zone_zio_start(zio);
+
/* ldi_strategy() will return non-zero only on programming errors */
VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
}
@@ -821,6 +833,8 @@ vdev_disk_io_done(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
+ zfs_zone_zio_done(zio);
+
/*
* If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
* the device has been removed. If this is the case, then we trigger an
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index f47c4cd1e2..00246fff22 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -33,6 +34,7 @@
#include <sys/zio.h>
#include <sys/avl.h>
#include <sys/dsl_pool.h>
+#include <sys/zfs_zone.h>
/*
* ZFS I/O Scheduler
@@ -141,7 +143,7 @@ uint32_t zfs_vdev_sync_write_min_active = 10;
uint32_t zfs_vdev_sync_write_max_active = 10;
uint32_t zfs_vdev_async_read_min_active = 1;
uint32_t zfs_vdev_async_read_max_active = 3;
-uint32_t zfs_vdev_async_write_min_active = 1;
+uint32_t zfs_vdev_async_write_min_active = 3;
uint32_t zfs_vdev_async_write_max_active = 10;
uint32_t zfs_vdev_scrub_min_active = 1;
uint32_t zfs_vdev_scrub_max_active = 2;
@@ -215,6 +217,8 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
+ vq->vq_last_zone_id = 0;
+
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
/*
* The synchronous i/o queues are FIFO rather than LBA ordered.
@@ -248,6 +252,7 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+ zfs_zone_zio_enqueue(zio);
avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
mutex_enter(&spa->spa_iokstat_lock);
@@ -262,6 +267,7 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+ zfs_zone_zio_dequeue(zio);
avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
mutex_enter(&spa->spa_iokstat_lock);
@@ -630,7 +636,11 @@ again:
search.io_timestamp = 0;
search.io_offset = vq->vq_last_offset + 1;
VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL);
+#ifdef _KERNEL
+ zio = zfs_zone_schedule(vq, p, idx);
+#else
zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+#endif
if (zio == NULL)
zio = avl_first(&vqc->vqc_queued_tree);
ASSERT3U(zio->io_priority, ==, p);
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 20a64666a7..43f5c0d824 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -612,9 +612,10 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
* Check permissions for special properties.
*/
switch (prop) {
+ case ZFS_PROP_DEDUP:
case ZFS_PROP_ZONED:
/*
- * Disallow setting of 'zoned' from within a local zone.
+ * Disallow setting these properties from within a local zone.
*/
if (!INGLOBALZONE(curproc))
return (SET_ERROR(EPERM));
@@ -2035,7 +2036,8 @@ zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
}
static int
-zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
+zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os,
+ boolean_t cachedpropsonly)
{
int error = 0;
nvlist_t *nv;
@@ -2053,7 +2055,8 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
* XXX reading with out owning
*/
if (!zc->zc_objset_stats.dds_inconsistent &&
- dmu_objset_type(os) == DMU_OST_ZVOL) {
+ dmu_objset_type(os) == DMU_OST_ZVOL &&
+ !cachedpropsonly) {
error = zvol_get_stats(os, nv);
if (error == EIO)
return (error);
@@ -2080,11 +2083,24 @@ static int
zfs_ioc_objset_stats(zfs_cmd_t *zc)
{
objset_t *os;
+ nvlist_t *nvl = NULL;
+ boolean_t cachedpropsonly = B_FALSE;
int error;
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvl) != 0))
+ return (error);
+
+ if (nvl != NULL) {
+ (void) nvlist_lookup_boolean_value(nvl, "cachedpropsonly",
+ &cachedpropsonly);
+ nvlist_free(nvl);
+ }
+
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
if (error == 0) {
- error = zfs_ioc_objset_stats_impl(zc, os);
+ error = zfs_ioc_objset_stats_impl(zc, os, cachedpropsonly);
dmu_objset_rele(os, FTAG);
}
@@ -2278,8 +2294,21 @@ static int
zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
{
objset_t *os;
+ nvlist_t *nvl = NULL;
+ boolean_t cachedpropsonly = B_FALSE;
int error;
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvl) != 0))
+ return (error);
+
+ if (nvl != NULL) {
+ (void) nvlist_lookup_boolean_value(nvl, "cachedpropsonly",
+ &cachedpropsonly);
+ nvlist_free(nvl);
+ }
+
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
if (error != 0) {
return (error == ENOENT ? ESRCH : error);
@@ -2308,8 +2337,10 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
objset_t *ossnap;
error = dmu_objset_from_ds(ds, &ossnap);
- if (error == 0)
- error = zfs_ioc_objset_stats_impl(zc, ossnap);
+ if (error == 0) {
+ error = zfs_ioc_objset_stats_impl(zc,
+ ossnap, cachedpropsonly);
+ }
dsl_dataset_rele(ds, FTAG);
}
} else if (error == ENOENT) {
@@ -3019,6 +3050,7 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
uint64_t sense = ZFS_PROP_UNDEFINED;
uint64_t norm = ZFS_PROP_UNDEFINED;
uint64_t u8 = ZFS_PROP_UNDEFINED;
+ int error;
ASSERT(zplprops != NULL);
@@ -3062,8 +3094,9 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
- if (norm == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
+ if (norm == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
@@ -3072,13 +3105,15 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
*/
if (norm)
u8 = 1;
- if (u8 == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
+ if (u8 == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
- if (sense == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
+ if (sense == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 8cf83b399a..f9b7986c56 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -1950,6 +1951,17 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
if (zfsvfs->z_ctldir != NULL)
zfsctl_destroy(zfsvfs);
+ /*
+ * If we're doing a forced unmount on a dataset which still has
+ * references and is in a zone, then we need to cleanup the zone
+ * reference at this point or else the zone will never be able to
+ * shutdown.
+ */
+ if ((fflag & MS_FORCE) && vfsp->vfs_count > 1 && vfsp->vfs_zone) {
+ zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref, ZONE_REF_VFS);
+ vfsp->vfs_zone = NULL;
+ }
+
return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 5060412afd..58468f2cbd 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -22,11 +22,16 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
/* Portions Copyright 2010 Robert Milkowski */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
@@ -636,6 +641,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
limit = MAXOFFSET_T;
+ /*
+ * Pre-fault the pages to ensure slow (eg NFS) pages
+ * don't hold up txg.
+ * Skip this if uio contains loaned arc_buf.
+ */
+ if ((uio->uio_extflg == UIO_XUIO) &&
+ (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
+ xuio = (xuio_t *)uio;
+ else
+ uio_prefaultpages(n, uio);
+
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
@@ -688,17 +704,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
}
/*
- * Pre-fault the pages to ensure slow (eg NFS) pages
- * don't hold up txg.
- * Skip this if uio contains loaned arc_buf.
- */
- if ((uio->uio_extflg == UIO_XUIO) &&
- (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
- xuio = (xuio_t *)uio;
- else
- uio_prefaultpages(MIN(n, max_blksz), uio);
-
- /*
* If in append mode, set the io offset pointer to eof.
*/
if (ioflag & FAPPEND) {
@@ -938,9 +943,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
break;
ASSERT(tx_bytes == nbytes);
n -= nbytes;
-
- if (!xuio && n > 0)
- uio_prefaultpages(MIN(n, max_blksz), uio);
}
zfs_range_unlock(rl);
@@ -3612,18 +3614,6 @@ top:
}
}
- vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
- if (tzp)
- vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
-
- /*
- * notify the target directory if it is not the same
- * as source directory.
- */
- if (tdvp != sdvp) {
- vnevent_rename_dest_dir(tdvp, ct);
- }
-
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
@@ -3664,8 +3654,12 @@ top:
return (error);
}
- if (tzp) /* Attempt to remove the existing target */
+ if (tzp) {
+ /* Attempt to remove the existing target */
error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+ if (error == 0)
+ vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
+ }
if (error == 0) {
error = zfs_link_create(tdl, szp, tx, ZRENAMING);
@@ -3707,6 +3701,11 @@ top:
}
dmu_tx_commit(tx);
+
+ if (error == 0) {
+ vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
+ vnevent_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct);
+ }
out:
if (zl != NULL)
zfs_rename_unlock(&zl);
@@ -4197,6 +4196,8 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
&zp->z_pflags, 8);
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
B_TRUE);
+ err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
}
dmu_tx_commit(tx);
@@ -4732,10 +4733,6 @@ zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
- if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
- vn_has_cached_data(vp))
- (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
-
return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_zone.c b/usr/src/uts/common/fs/zfs/zfs_zone.c
new file mode 100644
index 0000000000..6ad5e974eb
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/zfs_zone.c
@@ -0,0 +1,1334 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The ZFS/Zone I/O throttle and scheduler attempts to ensure fair access to
+ * ZFS I/O resources for each zone.
+ *
+ * I/O contention can be major pain point on a multi-tenant system. A single
+ * zone can issue a stream of I/O operations, usually synchronous writes, which
+ * disrupt I/O performance for all other zones. This problem is further
+ * exacerbated by ZFS, which buffers all asynchronous writes in a single TXG,
+ * a set of blocks which are atomically synced to disk. The process of
+ * syncing a TXG can occupy all of a device's I/O bandwidth, thereby starving
+ * out any pending read operations.
+ *
+ * There are two facets to this capability; the throttle and the scheduler.
+ *
+ * Throttle
+ *
+ * The requirements on the throttle are:
+ *
+ * 1) Ensure consistent and predictable I/O latency across all zones.
+ * 2) Sequential and random workloads have very different characteristics,
+ * so it is a non-starter to track IOPS or throughput.
+ * 3) A zone should be able to use the full disk bandwidth if no other zone
+ * is actively using the disk.
+ *
+ * The throttle has two components: one to track and account for each zone's
+ * I/O requests, and another to throttle each zone's operations when it
+ * exceeds its fair share of disk I/O. When the throttle detects that a zone is
+ * consuming more than is appropriate, each read or write system call is
+ * delayed by up to 100 microseconds, which we've found is sufficient to allow
+ * other zones to interleave I/O requests during those delays.
+ *
+ * Note: The throttle will delay each logical I/O (as opposed to the physical
+ * I/O which will likely be issued asynchronously), so it may be easier to
+ * think of the I/O throttle delaying each read/write syscall instead of the
+ * actual I/O operation. For each zone, the throttle tracks an ongoing average
+ * of read and write operations performed to determine the overall I/O
+ * utilization for each zone.
+ *
+ * The throttle calculates a I/O utilization metric for each zone using the
+ * following formula:
+ *
+ * (# of read syscalls) x (Average read latency) +
+ * (# of write syscalls) x (Average write latency)
+ *
+ * Once each zone has its utilization metric, the I/O throttle will compare I/O
+ * utilization across all zones, and if a zone has a higher-than-average I/O
+ * utilization, system calls from that zone are throttled. That is, if one
+ * zone has a much higher utilization, that zone's delay is increased by 5
+ * microseconds, up to a maximum of 100 microseconds. Conversely, if a zone is
+ * already throttled and has a lower utilization than average, its delay will
+ * be lowered by 5 microseconds.
+ *
+ * The throttle calculation is driven by IO activity, but since IO does not
+ * happen at fixed intervals, timestamps are used to track when the last update
+ * was made and to drive recalculation.
+ *
+ * The throttle recalculates each zone's I/O usage and throttle delay (if any)
+ * on the zfs_zone_adjust_time interval. Overall I/O latency is maintained as
+ * a decayed average which is updated on the zfs_zone_sys_avg_cycle interval.
+ *
+ * Scheduler
+ *
+ * The I/O scheduler manages the vdev queues – the queues of pending I/Os to
+ * issue to the disks. It only makes scheduling decisions for the two
+ * synchronous I/O queues (read & write).
+ *
+ * The scheduler maintains how many I/Os in the queue are from each zone, and
+ * if one zone has a disproportionately large number of I/Os in the queue, the
+ * scheduler will allow certain I/Os from the underutilized zones to be "bumped"
+ * and pulled from the middle of the queue. This bump allows zones with a small
+ * number of I/Os (so small they may not even be taken into account by the
+ * throttle) to complete quickly instead of waiting behind dozens of I/Os from
+ * other zones.
+ */
+
+#include <sys/spa.h>
+#include <sys/vdev_impl.h>
+#include <sys/zfs_zone.h>
+
+#ifndef _KERNEL
+
+/*
+ * Stubs for when compiling for user-land.
+ */
+
+void
+zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+{
+}
+
+void
+zfs_zone_zio_init(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_start(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_done(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_dequeue(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_enqueue(zio_t *zp)
+{
+}
+
+/*ARGSUSED*/
+void
+zfs_zone_report_txg_sync(void *dp)
+{
+}
+
+hrtime_t
+zfs_zone_txg_delay()
+{
+ return (MSEC2NSEC(10));
+}
+
+#else
+
+/*
+ * The real code.
+ */
+
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/atomic.h>
+#include <sys/zio.h>
+#include <sys/zone.h>
+#include <sys/avl.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+
+/*
+ * The zone throttle delays read and write operations from certain zones based
+ * on each zone's IO utilitzation. Once a cycle (defined by zfs_zone_cycle_time
+ * below), the delays for each zone are recalculated based on the utilization
+ * over the previous window.
+ */
+boolean_t zfs_zone_delay_enable = B_TRUE; /* enable IO throttle */
+uint16_t zfs_zone_delay_step = 5; /* usec amnt to change delay */
+uint16_t zfs_zone_delay_ceiling = 100; /* usec delay max */
+
+boolean_t zfs_zone_priority_enable = B_TRUE; /* enable IO priority */
+
+/*
+ * For certain workloads, one zone may be issuing primarily sequential I/O and
+ * another primarily random I/O. The sequential I/O will complete much more
+ * quickly than the random I/O, driving the average system latency for those
+ * operations way down. As a result, the random I/O may be throttled back, even
+ * though the sequential I/O should be throttled to allow the random I/O more
+ * access to the disk.
+ *
+ * This tunable limits the discrepancy between the read and write system
+ * latency. If one becomes excessively high, this tunable prevents the I/O
+ * throttler from exacerbating the imbalance.
+ */
+uint_t zfs_zone_rw_lat_limit = 10;
+
+/*
+ * The I/O throttle will only start delaying zones when it detects disk
+ * utilization has reached a certain level. This tunable controls the
+ * threshold at which the throttle will start delaying zones. When the number
+ * of vdevs is small, the calculation should correspond closely with the %b
+ * column from iostat -- but as the number of vdevs becomes large, it will
+ * correlate less and less to any single device (therefore making it a poor
+ * approximation for the actual I/O utilization on such systems). We
+ * therefore use our derived utilization conservatively: we know that low
+ * derived utilization does indeed correlate to low I/O use -- but that a high
+ * rate of derived utilization does not necesarily alone denote saturation;
+ * where we see a high rate of utilization, we also look for laggard I/Os to
+ * attempt to detect saturation.
+ */
+uint_t zfs_zone_util_threshold = 80;
+uint_t zfs_zone_underutil_threshold = 60;
+
+/*
+ * There are three important tunables here: zfs_zone_laggard_threshold denotes
+ * the threshold at which an I/O is considered to be of notably high latency;
+ * zfs_zone_laggard_recent denotes the number of microseconds before the
+ * current time after which the last laggard is considered to be sufficiently
+ * recent to merit increasing the throttle; zfs_zone_laggard_ancient denotes
+ * the microseconds before the current time before which the last laggard is
+ * considered to be sufficiently old to merit decreasing the throttle. The
+ * most important tunable of these three is the zfs_zone_laggard_threshold: in
+ * modeling data from a large public cloud, this tunable was found to have a
+ * much greater effect on the throttle than the two time-based thresholds.
+ * This must be set high enough to not result in spurious throttling, but not
+ * so high as to allow pathological I/O to persist in the system.
+ */
+uint_t zfs_zone_laggard_threshold = 50000; /* 50 ms */
+uint_t zfs_zone_laggard_recent = 1000000; /* 1000 ms */
+uint_t zfs_zone_laggard_ancient = 5000000; /* 5000 ms */
+
+/*
+ * Throughout this subsystem, our timestamps are in microseconds. Our system
+ * average cycle is one second or 1 million microseconds. Our zone counter
+ * update cycle is two seconds or 2 million microseconds. We use a longer
+ * duration for that cycle because some ops can see a little over two seconds of
+ * latency when they are being starved by another zone.
+ */
+uint_t zfs_zone_sys_avg_cycle = 1000000; /* 1 s */
+uint_t zfs_zone_cycle_time = 2000000; /* 2 s */
+
+/*
+ * How often the I/O throttle will reevaluate each zone's utilization, in
+ * microseconds. Default is 1/4 sec.
+ */
+uint_t zfs_zone_adjust_time = 250000; /* 250 ms */
+
+typedef struct {
+ hrtime_t cycle_start;
+ int cycle_cnt;
+ hrtime_t cycle_lat;
+ hrtime_t sys_avg_lat;
+} sys_lat_cycle_t;
+
+typedef struct {
+ hrtime_t zi_now;
+ uint_t zi_avgrlat;
+ uint_t zi_avgwlat;
+ uint64_t zi_totpri;
+ uint64_t zi_totutil;
+ int zi_active;
+ uint_t zi_diskutil;
+ boolean_t zi_underutil;
+ boolean_t zi_overutil;
+} zoneio_stats_t;
+
+static sys_lat_cycle_t rd_lat;
+static sys_lat_cycle_t wr_lat;
+
+/*
+ * Some basic disk stats to determine disk utilization. The utilization info
+ * for all disks on the system is aggregated into these values.
+ *
+ * Overall disk utilization for the current cycle is calculated as:
+ *
+ * ((zfs_disk_rtime - zfs_disk_last_rtime) * 100)
+ * ----------------------------------------------
+ * ((now - zfs_zone_last_checked) * 1000);
+ */
+kmutex_t zfs_disk_lock; /* protects the following: */
+uint_t zfs_disk_rcnt; /* Number of outstanding IOs */
+hrtime_t zfs_disk_rtime = 0; /* cummulative sum of time performing IO */
+hrtime_t zfs_disk_rlastupdate = 0; /* time last IO dispatched */
+
+hrtime_t zfs_disk_last_rtime = 0; /* prev. cycle's zfs_disk_rtime val */
+/* time that we last updated per-zone throttle info */
+hrtime_t zfs_zone_last_checked = 0;
+hrtime_t zfs_disk_last_laggard = 0;
+
+/*
+ * Data used to keep track of how often txg sync is running.
+ */
+extern int zfs_txg_timeout;
+static uint_t txg_last_check;
+static uint_t txg_cnt;
+static uint_t txg_sync_rate;
+
+boolean_t zfs_zone_schedule_enable = B_TRUE; /* enable IO sched. */
+/*
+ * Threshold for when zio scheduling should kick in.
+ *
+ * This threshold is based on the zfs_vdev_sync_read_max_active value for the
+ * number of I/Os that can be pending on a device. If there are more than the
+ * max_active ops already queued up, beyond those already issued to the vdev,
+ * then use zone-based scheduling to get the next synchronous zio.
+ */
+uint32_t zfs_zone_schedule_thresh = 10;
+
+/*
+ * On each pass of the scheduler we increment the zone's weight (up to this
+ * maximum). The weight is used by the scheduler to prevent starvation so
+ * that zones which haven't been able to do any IO over many iterations
+ * will max out thier weight to this value.
+ */
+#define SCHED_WEIGHT_MAX 20
+
+/*
+ * Tunables for delay throttling when TXG sync is occurring.
+ *
+ * If the zone is performing a write and we're doing above normal TXG syncing,
+ * then throttle for longer than normal. The zone's wait time is multiplied
+ * by the scale (zfs_zone_txg_throttle_scale).
+ */
+int zfs_zone_txg_throttle_scale = 2;
+hrtime_t zfs_zone_txg_delay_nsec = MSEC2NSEC(20);
+
+typedef struct {
+ int zq_qdepth;
+ zio_priority_t zq_queue;
+ int zq_priority;
+ int zq_wt;
+ zoneid_t zq_zoneid;
+} zone_q_bump_t;
+
+/*
+ * This uses gethrtime() but returns a value in usecs.
+ */
+#define GET_USEC_TIME (gethrtime() / 1000)
+#define NANO_TO_MICRO(x) (x / (NANOSEC / MICROSEC))
+
+/*
+ * Keep track of the zone's ZFS IOPs.
+ *
+ * See the comment on the zfs_zone_io_throttle function for which/how IOPs are
+ * accounted for.
+ *
+ * If the number of ops is >1 then we can just use that value. However,
+ * if the number of ops is <2 then we might have a zone which is trying to do
+ * IO but is not able to get any ops through the system. We don't want to lose
+ * track of this zone so we factor in its decayed count into the current count.
+ *
+ * Each cycle (zfs_zone_sys_avg_cycle) we want to update the decayed count.
+ * However, since this calculation is driven by IO activity and since IO does
+ * not happen at fixed intervals, we use a timestamp to see when the last update
+ * was made. If it was more than one cycle ago, then we need to decay the
+ * historical count by the proper number of additional cycles in which no IO was
+ * performed.
+ *
+ * Return a time delta indicating how far into the current cycle we are or 0
+ * if the last IO was more than a cycle ago.
+ */
+static hrtime_t
+compute_historical_zone_cnt(hrtime_t unow, sys_zio_cntr_t *cp)
+{
+ hrtime_t delta;
+ int gen_cnt;
+
+ /*
+ * Check if its time to recompute a new zone count.
+ * If we're still collecting data for the current cycle, return false.
+ */
+ delta = unow - cp->cycle_start;
+ if (delta < zfs_zone_cycle_time)
+ return (delta);
+
+ /* A previous cycle is past, compute the new zone count. */
+
+ /*
+ * Figure out how many generations we have to decay the historical
+ * count, since multiple cycles may have elapsed since our last IO.
+ * We depend on int rounding here.
+ */
+ gen_cnt = (int)(delta / zfs_zone_cycle_time);
+
+ /* If more than 5 cycles since last the IO, reset count. */
+ if (gen_cnt > 5) {
+ cp->zone_avg_cnt = 0;
+ } else {
+ /* Update the count. */
+ int i;
+
+ /*
+ * If the zone did more than 1 IO, just use its current count
+ * as the historical value, otherwise decay the historical
+ * count and factor that into the new historical count. We
+ * pick a threshold > 1 so that we don't lose track of IO due
+ * to int rounding.
+ */
+ if (cp->cycle_cnt > 1)
+ cp->zone_avg_cnt = cp->cycle_cnt;
+ else
+ cp->zone_avg_cnt = cp->cycle_cnt +
+ (cp->zone_avg_cnt / 2);
+
+ /*
+ * If more than one generation has elapsed since the last
+ * update, decay the values further.
+ */
+ for (i = 1; i < gen_cnt; i++)
+ cp->zone_avg_cnt = cp->zone_avg_cnt / 2;
+ }
+
+ /* A new cycle begins. */
+ cp->cycle_start = unow;
+ cp->cycle_cnt = 0;
+
+ return (0);
+}
+
+/*
+ * Add IO op data to the zone.
+ */
+static void
+add_zone_iop(zone_t *zonep, hrtime_t unow, zfs_zone_iop_type_t op)
+{
+ switch (op) {
+ case ZFS_ZONE_IOP_READ:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_rd_ops);
+ zonep->zone_rd_ops.cycle_cnt++;
+ break;
+ case ZFS_ZONE_IOP_WRITE:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_wr_ops);
+ zonep->zone_wr_ops.cycle_cnt++;
+ break;
+ case ZFS_ZONE_IOP_LOGICAL_WRITE:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_lwr_ops);
+ zonep->zone_lwr_ops.cycle_cnt++;
+ break;
+ }
+}
+
+/*
+ * Use a decaying average to keep track of the overall system latency.
+ *
+ * We want to have the recent activity heavily weighted, but if the
+ * activity decreases or stops, then the average should quickly decay
+ * down to the new value.
+ *
+ * Each cycle (zfs_zone_sys_avg_cycle) we want to update the decayed average.
+ * However, since this calculation is driven by IO activity and since IO does
+ * not happen at fixed intervals, we use a timestamp to see when the last
+ * update was made. If it was more than one cycle ago, then we need to decay
+ * the average by the proper number of additional cycles in which no IO was
+ * performed.
+ *
+ * Return true if we actually computed a new system average.
+ * If we're still within an active cycle there is nothing to do, return false.
+ */
+static boolean_t
+compute_new_sys_avg(hrtime_t unow, sys_lat_cycle_t *cp)
+{
+ hrtime_t delta;
+ int gen_cnt;
+
+ /*
+ * Check if its time to recompute a new average.
+ * If we're still collecting data for the current cycle, return false.
+ */
+ delta = unow - cp->cycle_start;
+ if (delta < zfs_zone_sys_avg_cycle)
+ return (B_FALSE);
+
+ /* A previous cycle is past, compute a new system average. */
+
+ /*
+ * Figure out how many generations we have to decay, since multiple
+ * cycles may have elapsed since our last IO.
+ * We count on int rounding here.
+ */
+ gen_cnt = (int)(delta / zfs_zone_sys_avg_cycle);
+
+ /* If more than 5 cycles since last the IO, reset average. */
+ if (gen_cnt > 5) {
+ cp->sys_avg_lat = 0;
+ } else {
+ /* Update the average. */
+ int i;
+
+ cp->sys_avg_lat =
+ (cp->sys_avg_lat + cp->cycle_lat) / (1 + cp->cycle_cnt);
+
+ /*
+ * If more than one generation has elapsed since the last
+ * update, decay the values further.
+ */
+ for (i = 1; i < gen_cnt; i++)
+ cp->sys_avg_lat = cp->sys_avg_lat / 2;
+ }
+
+ /* A new cycle begins. */
+ cp->cycle_start = unow;
+ cp->cycle_cnt = 0;
+ cp->cycle_lat = 0;
+
+ return (B_TRUE);
+}
+
+static void
+add_sys_iop(hrtime_t unow, int op, int lat)
+{
+ switch (op) {
+ case ZFS_ZONE_IOP_READ:
+ (void) compute_new_sys_avg(unow, &rd_lat);
+ rd_lat.cycle_cnt++;
+ rd_lat.cycle_lat += lat;
+ break;
+ case ZFS_ZONE_IOP_WRITE:
+ (void) compute_new_sys_avg(unow, &wr_lat);
+ wr_lat.cycle_cnt++;
+ wr_lat.cycle_lat += lat;
+ break;
+ }
+}
+
+/*
+ * Get the zone IO counts.
+ */
+static uint_t
+calc_zone_cnt(hrtime_t unow, sys_zio_cntr_t *cp)
+{
+ hrtime_t delta;
+ uint_t cnt;
+
+ if ((delta = compute_historical_zone_cnt(unow, cp)) == 0) {
+ /*
+ * No activity in the current cycle, we already have the
+ * historical data so we'll use that.
+ */
+ cnt = cp->zone_avg_cnt;
+ } else {
+ /*
+ * If we're less than half way through the cycle then use
+ * the current count plus half the historical count, otherwise
+ * just use the current count.
+ */
+ if (delta < (zfs_zone_cycle_time / 2))
+ cnt = cp->cycle_cnt + (cp->zone_avg_cnt / 2);
+ else
+ cnt = cp->cycle_cnt;
+ }
+
+ return (cnt);
+}
+
+/*
+ * Get the average read/write latency in usecs for the system.
+ */
+static uint_t
+calc_avg_lat(hrtime_t unow, sys_lat_cycle_t *cp)
+{
+ if (compute_new_sys_avg(unow, cp)) {
+ /*
+ * No activity in the current cycle, we already have the
+ * historical data so we'll use that.
+ */
+ return (cp->sys_avg_lat);
+ } else {
+ /*
+ * We're within a cycle; weight the current activity higher
+ * compared to the historical data and use that.
+ */
+ DTRACE_PROBE3(zfs__zone__calc__wt__avg,
+ uintptr_t, cp->sys_avg_lat,
+ uintptr_t, cp->cycle_lat,
+ uintptr_t, cp->cycle_cnt);
+
+ return ((cp->sys_avg_lat + (cp->cycle_lat * 8)) /
+ (1 + (cp->cycle_cnt * 8)));
+ }
+}
+
+/*
+ * Account for the current IOP on the zone and for the system as a whole.
+ * The latency parameter is in usecs.
+ */
+static void
+add_iop(zone_t *zonep, hrtime_t unow, zfs_zone_iop_type_t op, hrtime_t lat)
+{
+ /* Add op to zone */
+ add_zone_iop(zonep, unow, op);
+
+ /* Track system latency */
+ if (op != ZFS_ZONE_IOP_LOGICAL_WRITE)
+ add_sys_iop(unow, op, lat);
+}
+
+/*
+ * Calculate and return the total number of read ops, write ops and logical
+ * write ops for the given zone. If the zone has issued operations of any type
+ * return a non-zero value, otherwise return 0.
+ */
+static int
+get_zone_io_cnt(hrtime_t unow, zone_t *zonep, uint_t *rops, uint_t *wops,
+ uint_t *lwops)
+{
+ *rops = calc_zone_cnt(unow, &zonep->zone_rd_ops);
+ *wops = calc_zone_cnt(unow, &zonep->zone_wr_ops);
+ *lwops = calc_zone_cnt(unow, &zonep->zone_lwr_ops);
+
+ DTRACE_PROBE4(zfs__zone__io__cnt, uintptr_t, zonep->zone_id,
+ uintptr_t, *rops, uintptr_t, *wops, uintptr_t, *lwops);
+
+ return (*rops | *wops | *lwops);
+}
+
+/*
+ * Get the average read/write latency in usecs for the system.
+ */
+static void
+get_sys_avg_lat(hrtime_t unow, uint_t *rlat, uint_t *wlat)
+{
+ *rlat = calc_avg_lat(unow, &rd_lat);
+ *wlat = calc_avg_lat(unow, &wr_lat);
+
+ /*
+ * In an attempt to improve the accuracy of the throttling algorithm,
+ * assume that IO operations can't have zero latency. Instead, assume
+ * a reasonable lower bound for each operation type. If the actual
+ * observed latencies are non-zero, use those latency values instead.
+ */
+ if (*rlat == 0)
+ *rlat = 1000;
+ if (*wlat == 0)
+ *wlat = 1000;
+
+ DTRACE_PROBE2(zfs__zone__sys__avg__lat, uintptr_t, *rlat,
+ uintptr_t, *wlat);
+}
+
+/*
+ * Find disk utilization for each zone and average utilization for all active
+ * zones.
+ */
+static int
+zfs_zone_wait_adjust_calculate_cb(zone_t *zonep, void *arg)
+{
+ zoneio_stats_t *sp = arg;
+ uint_t rops, wops, lwops;
+
+ if (zonep->zone_id == GLOBAL_ZONEID ||
+ get_zone_io_cnt(sp->zi_now, zonep, &rops, &wops, &lwops) == 0) {
+ zonep->zone_io_util = 0;
+ return (0);
+ }
+
+ zonep->zone_io_util = (rops * sp->zi_avgrlat) +
+ (wops * sp->zi_avgwlat) + (lwops * sp->zi_avgwlat);
+ sp->zi_totutil += zonep->zone_io_util;
+
+ if (zonep->zone_io_util > 0) {
+ sp->zi_active++;
+ sp->zi_totpri += zonep->zone_zfs_io_pri;
+ }
+
+ /*
+ * sdt:::zfs-zone-utilization
+ *
+ * arg0: zone ID
+ * arg1: read operations observed during time window
+ * arg2: physical write operations observed during time window
+ * arg3: logical write ops observed during time window
+ * arg4: calculated utilization given read and write ops
+ * arg5: I/O priority assigned to this zone
+ */
+ DTRACE_PROBE6(zfs__zone__utilization, uint_t, zonep->zone_id,
+ uint_t, rops, uint_t, wops, uint_t, lwops,
+ uint_t, zonep->zone_io_util, uint_t, zonep->zone_zfs_io_pri);
+
+ return (0);
+}
+
+static void
+zfs_zone_delay_inc(zone_t *zonep)
+{
+ if (zonep->zone_io_delay < zfs_zone_delay_ceiling)
+ zonep->zone_io_delay += zfs_zone_delay_step;
+}
+
+static void
+zfs_zone_delay_dec(zone_t *zonep)
+{
+ if (zonep->zone_io_delay > 0)
+ zonep->zone_io_delay -= zfs_zone_delay_step;
+}
+
+/*
+ * For all zones "far enough" away from the average utilization, increase that
+ * zones delay. Otherwise, reduce its delay.
+ */
+static int
+zfs_zone_wait_adjust_delay_cb(zone_t *zonep, void *arg)
+{
+ zoneio_stats_t *sp = arg;
+ uint16_t delay = zonep->zone_io_delay;
+ uint_t fairutil = 0;
+
+ zonep->zone_io_util_above_avg = B_FALSE;
+
+ /*
+ * Given the calculated total utilitzation for all zones, calculate the
+ * fair share of I/O for this zone.
+ */
+ if (zfs_zone_priority_enable && sp->zi_totpri > 0) {
+ fairutil = (sp->zi_totutil * zonep->zone_zfs_io_pri) /
+ sp->zi_totpri;
+ } else if (sp->zi_active > 0) {
+ fairutil = sp->zi_totutil / sp->zi_active;
+ }
+
+ /*
+ * Adjust each IO's delay. If the overall delay becomes too high, avoid
+ * increasing beyond the ceiling value.
+ */
+ if (zonep->zone_io_util > fairutil && sp->zi_overutil) {
+ zonep->zone_io_util_above_avg = B_TRUE;
+
+ if (sp->zi_active > 1)
+ zfs_zone_delay_inc(zonep);
+ } else if (zonep->zone_io_util < fairutil || sp->zi_underutil ||
+ sp->zi_active <= 1) {
+ zfs_zone_delay_dec(zonep);
+ }
+
+ /*
+ * sdt:::zfs-zone-throttle
+ *
+ * arg0: zone ID
+ * arg1: old delay for this zone
+ * arg2: new delay for this zone
+ * arg3: calculated fair I/O utilization
+ * arg4: actual I/O utilization
+ */
+ DTRACE_PROBE5(zfs__zone__throttle, uintptr_t, zonep->zone_id,
+ uintptr_t, delay, uintptr_t, zonep->zone_io_delay,
+ uintptr_t, fairutil, uintptr_t, zonep->zone_io_util);
+
+ return (0);
+}
+
+/*
+ * Examine the utilization between different zones, and adjust the delay for
+ * each zone appropriately.
+ */
+static void
+zfs_zone_wait_adjust(hrtime_t unow, hrtime_t last_checked)
+{
+ zoneio_stats_t stats;
+ hrtime_t laggard_udelta = 0;
+
+ (void) bzero(&stats, sizeof (stats));
+
+ stats.zi_now = unow;
+ get_sys_avg_lat(unow, &stats.zi_avgrlat, &stats.zi_avgwlat);
+
+ if (stats.zi_avgrlat > stats.zi_avgwlat * zfs_zone_rw_lat_limit)
+ stats.zi_avgrlat = stats.zi_avgwlat * zfs_zone_rw_lat_limit;
+ else if (stats.zi_avgrlat * zfs_zone_rw_lat_limit < stats.zi_avgwlat)
+ stats.zi_avgwlat = stats.zi_avgrlat * zfs_zone_rw_lat_limit;
+
+ if (zone_walk(zfs_zone_wait_adjust_calculate_cb, &stats) != 0)
+ return;
+
+ /*
+ * Calculate disk utilization for the most recent period.
+ */
+ if (zfs_disk_last_rtime == 0 || unow - last_checked <= 0) {
+ stats.zi_diskutil = 0;
+ } else {
+ stats.zi_diskutil =
+ ((zfs_disk_rtime - zfs_disk_last_rtime) * 100) /
+ ((unow - last_checked) * 1000);
+ }
+ zfs_disk_last_rtime = zfs_disk_rtime;
+
+ if (unow > zfs_disk_last_laggard)
+ laggard_udelta = unow - zfs_disk_last_laggard;
+
+ /*
+ * To minimize porpoising, we have three separate states for our
+ * assessment of I/O performance: overutilized, underutilized, and
+ * neither overutilized nor underutilized. We will increment the
+ * throttle if a zone is using more than its fair share _and_ I/O
+ * is overutilized; we will decrement the throttle if a zone is using
+ * less than its fair share _or_ I/O is underutilized.
+ */
+ stats.zi_underutil = stats.zi_diskutil < zfs_zone_underutil_threshold ||
+ laggard_udelta > zfs_zone_laggard_ancient;
+
+ stats.zi_overutil = stats.zi_diskutil > zfs_zone_util_threshold &&
+ laggard_udelta < zfs_zone_laggard_recent;
+
+ /*
+ * sdt:::zfs-zone-stats
+ *
+ * Statistics observed over the last period:
+ *
+ * arg0: average system read latency
+ * arg1: average system write latency
+ * arg2: number of active zones
+ * arg3: total I/O 'utilization' for all zones
+ * arg4: total I/O priority of all active zones
+ * arg5: calculated disk utilization
+ */
+ DTRACE_PROBE6(zfs__zone__stats, uintptr_t, stats.zi_avgrlat,
+ uintptr_t, stats.zi_avgwlat, uintptr_t, stats.zi_active,
+ uintptr_t, stats.zi_totutil, uintptr_t, stats.zi_totpri,
+ uintptr_t, stats.zi_diskutil);
+
+ (void) zone_walk(zfs_zone_wait_adjust_delay_cb, &stats);
+}
+
+/*
+ * Callback used to calculate a zone's IO schedule priority.
+ *
+ * We scan the zones looking for ones with ops in the queue. Out of those,
+ * we pick the one that calculates to the highest schedule priority.
+ */
+static int
+get_sched_pri_cb(zone_t *zonep, void *arg)
+{
+ int pri;
+ uint_t cnt;
+ zone_q_bump_t *qbp = arg;
+ zio_priority_t p = qbp->zq_queue;
+
+ cnt = zonep->zone_zfs_queued[p];
+ if (cnt == 0) {
+ zonep->zone_zfs_weight = 0;
+ return (0);
+ }
+
+ /*
+ * On each pass, increment the zone's weight. We use this as input
+ * to the calculation to prevent starvation. The value is reset
+ * each time we issue an IO for this zone so zones which haven't
+ * done any IO over several iterations will see their weight max
+ * out.
+ */
+ if (zonep->zone_zfs_weight < SCHED_WEIGHT_MAX)
+ zonep->zone_zfs_weight++;
+
+ /*
+ * This zone's IO priority is the inverse of the number of IOs
+ * the zone has enqueued * zone's configured priority * weight.
+ * The queue depth has already been scaled by 10 to avoid problems
+ * with int rounding.
+ *
+ * This means that zones with fewer IOs in the queue will get
+ * preference unless other zone's assigned priority pulls them
+ * ahead. The weight is factored in to help ensure that zones
+ * which haven't done IO in a while aren't getting starved.
+ */
+ pri = (qbp->zq_qdepth / cnt) *
+ zonep->zone_zfs_io_pri * zonep->zone_zfs_weight;
+
+ /*
+ * If this zone has a higher priority than what we found so far,
+ * it becomes the new leading contender.
+ */
+ if (pri > qbp->zq_priority) {
+ qbp->zq_zoneid = zonep->zone_id;
+ qbp->zq_priority = pri;
+ qbp->zq_wt = zonep->zone_zfs_weight;
+ }
+ return (0);
+}
+
+/*
+ * See if we need to bump a zone's zio to the head of the queue. This is only
+ * done on the two synchronous I/O queues (see the block comment on the
+ * zfs_zone_schedule function). We get the correct vdev_queue_class_t and
+ * queue depth from our caller.
+ *
+ * For single-threaded synchronous processes a zone cannot get more than
+ * 1 op into the queue at a time unless the zone is running multiple processes
+ * in parallel. This can cause an imbalance in performance if there are zones
+ * with many parallel processes (and ops in the queue) vs. other zones which
+ * are doing simple single-threaded processes, such as interactive tasks in the
+ * shell. These zones can get backed up behind a deep queue and their IO
+ * performance will appear to be very poor as a result. This can make the
+ * zone work badly for interactive behavior.
+ *
+ * The scheduling algorithm kicks in once we start to get a deeper queue.
+ * Once that occurs, we look at all of the zones to see which one calculates
+ * to the highest priority. We bump that zone's first zio to the head of the
+ * queue.
+ *
+ * We use a counter on the zone so that we can quickly find how many ops each
+ * zone has in the queue without having to search the entire queue itself.
+ * This scales better since the number of zones is expected to be on the
+ * order of 10-100 whereas the queue depth can be in the range of 50-2000.
+ * In addition, since the zio's in the queue only have the zoneid, we would
+ * have to look up the zone for each zio enqueued and that means the overhead
+ * for scanning the queue each time would be much higher.
+ *
+ * In all cases, we fall back to simply pulling the next op off the queue
+ * if something should go wrong.
+ */
+static zio_t *
+get_next_zio(vdev_queue_class_t *vqc, int qdepth, zio_priority_t p)
+{
+ zone_q_bump_t qbump;
+ zio_t *zp = NULL, *zphead;
+ int cnt = 0;
+
+ /* To avoid problems with int rounding, scale the queue depth by 10 */
+ qbump.zq_qdepth = qdepth * 10;
+ qbump.zq_priority = 0;
+ qbump.zq_zoneid = 0;
+ qbump.zq_queue = p;
+ (void) zone_walk(get_sched_pri_cb, &qbump);
+
+ zphead = avl_first(&vqc->vqc_queued_tree);
+
+ /* Check if the scheduler didn't pick a zone for some reason!? */
+ if (qbump.zq_zoneid != 0) {
+ for (zp = avl_first(&vqc->vqc_queued_tree); zp != NULL;
+ zp = avl_walk(&vqc->vqc_queued_tree, zp, AVL_AFTER)) {
+ if (zp->io_zoneid == qbump.zq_zoneid)
+ break;
+ cnt++;
+ }
+ }
+
+ if (zp == NULL) {
+ zp = zphead;
+ } else if (zp != zphead) {
+ /*
+ * Only fire the probe if we actually picked a different zio
+ * than the one already at the head of the queue.
+ */
+ DTRACE_PROBE4(zfs__zone__sched__bump, uint_t, zp->io_zoneid,
+ uint_t, cnt, int, qbump.zq_priority, int, qbump.zq_wt);
+ }
+
+ return (zp);
+}
+
+/*
+ * Add our zone ID to the zio so we can keep track of which zones are doing
+ * what, even when the current thread processing the zio is not associated
+ * with the zone (e.g. the kernel taskq which pushes out TX groups).
+ */
+void
+zfs_zone_zio_init(zio_t *zp)
+{
+ zone_t *zonep = curzone;
+
+ zp->io_zoneid = zonep->zone_id;
+}
+
+/*
+ * Track and throttle IO operations per zone. Called from:
+ * - dmu_tx_count_write for (logical) write ops (both dataset and zvol writes
+ * go through this path)
+ * - arc_read for read ops that miss the ARC (both dataset and zvol)
+ * For each operation, increment that zone's counter based on the type of
+ * operation, then delay the operation, if necessary.
+ *
+ * There are three basic ways that we can see write ops:
+ * 1) An application does write syscalls. Those ops go into a TXG which
+ * we'll count here. Sometime later a kernel taskq thread (we'll see the
+ * vdev IO as zone 0) will perform some number of physical writes to commit
+ * the TXG to disk. Those writes are not associated with the zone which
+ * made the write syscalls and the number of operations is not correlated
+ * between the taskq and the zone. We only see logical writes in this
+ * function, we see the physcial writes in the zfs_zone_zio_start and
+ * zfs_zone_zio_done functions.
+ * 2) An application opens a file with O_SYNC. Each write will result in
+ * an operation which we'll see here plus a low-level vdev write from
+ * that zone.
+ * 3) An application does write syscalls followed by an fsync(). We'll
+ * count the writes going into a TXG here. We'll also see some number
+ * (usually much smaller, maybe only 1) of low-level vdev writes from this
+ * zone when the fsync is performed, plus some other low-level vdev writes
+ * from the taskq in zone 0 (are these metadata writes?).
+ *
+ * 4) In addition to the above, there are misc. system-level writes, such as
+ * writing out dirty pages to swap, or sync(2) calls, which will be handled
+ * by the global zone and which we count but don't generally worry about.
+ *
+ * Because of the above, we can see writes twice; first because this function
+ * is always called by a zone thread for logical writes, but then we also will
+ * count the physical writes that are performed at a low level via
+ * zfs_zone_zio_start. Without this, it can look like a non-global zone never
+ * writes (case 1). Depending on when the TXG is synced, the counts may be in
+ * the same sample bucket or in a different one.
+ *
+ * Tracking read operations is simpler due to their synchronous semantics. The
+ * zfs_read function -- called as a result of a read(2) syscall -- will always
+ * retrieve the data to be read through arc_read and we only come into this
+ * function when we have an arc miss.
+ */
+void
+zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+{
+ zone_t *zonep = curzone;
+ hrtime_t unow, last_checked;
+ uint16_t wait;
+
+ unow = GET_USEC_TIME;
+
+ /*
+ * Only bump the counter for logical writes here. The counters for
+ * tracking physical IO operations are handled in zfs_zone_zio_done.
+ */
+ if (type == ZFS_ZONE_IOP_LOGICAL_WRITE) {
+ mutex_enter(&zonep->zone_stg_io_lock);
+ add_iop(zonep, unow, type, 0);
+ mutex_exit(&zonep->zone_stg_io_lock);
+ }
+
+ if (!zfs_zone_delay_enable)
+ return;
+
+ /*
+ * If the zone's I/O priority is set to zero, don't throttle that zone's
+ * operations at all.
+ */
+ if (zonep->zone_zfs_io_pri == 0)
+ return;
+
+ /*
+ * XXX There's a potential race here in that more than one thread may
+ * update the zone delays concurrently. The worst outcome is corruption
+ * of our data to track each zone's IO, so the algorithm may make
+ * incorrect throttling decisions until the data is refreshed.
+ */
+ last_checked = zfs_zone_last_checked;
+ if ((unow - last_checked) > zfs_zone_adjust_time) {
+ zfs_zone_last_checked = unow;
+ zfs_zone_wait_adjust(unow, last_checked);
+ }
+
+ if ((wait = zonep->zone_io_delay) > 0) {
+ /*
+ * If this is a write and we're doing above normal TXG
+ * syncing, then throttle for longer than normal.
+ */
+ if (type == ZFS_ZONE_IOP_LOGICAL_WRITE &&
+ (txg_cnt > 1 || txg_sync_rate > 1))
+ wait *= zfs_zone_txg_throttle_scale;
+
+ /*
+ * sdt:::zfs-zone-wait
+ *
+ * arg0: zone ID
+ * arg1: type of IO operation
+ * arg2: time to delay (in us)
+ */
+ DTRACE_PROBE3(zfs__zone__wait, uintptr_t, zonep->zone_id,
+ uintptr_t, type, uintptr_t, wait);
+
+ drv_usecwait(wait);
+
+ if (zonep->zone_vfs_stats != NULL) {
+ atomic_inc_64(&zonep->zone_vfs_stats->
+ zv_delay_cnt.value.ui64);
+ atomic_add_64(&zonep->zone_vfs_stats->
+ zv_delay_time.value.ui64, wait);
+ }
+ }
+}
+
+/*
+ * XXX Ignore the pool pointer parameter for now.
+ *
+ * Keep track to see if the TXG sync rate is running above the expected rate.
+ * If so, this implies that we are filling TXG's at a high rate due to a heavy
+ * write workload. We use this as input into the zone throttle.
+ *
+ * This function is called every 5 seconds (zfs_txg_timeout) under a normal
+ * write load. In this case, the sync rate is going to be 1. When there
+ * is a heavy write load, TXG's fill up fast and the sync thread will write
+ * the TXG more frequently (perhaps once a second). In this case the rate
+ * will be > 1. The sync rate is a lagging indicator since it can be up
+ * to 5 seconds old. We use the txg_cnt to keep track of the rate in the
+ * current 5 second interval and txg_sync_rate to keep track of the previous
+ * 5 second interval. In that way we don't have a period (1 or more seconds)
+ * where the txg_cnt == 0 and we cut back on throttling even though the rate
+ * is still high.
+ */
+/*ARGSUSED*/
+void
+zfs_zone_report_txg_sync(void *dp)
+{
+ uint_t now;
+
+ txg_cnt++;
+ now = (uint_t)(gethrtime() / NANOSEC);
+ if ((now - txg_last_check) >= zfs_txg_timeout) {
+ txg_sync_rate = txg_cnt / 2;
+ txg_cnt = 0;
+ txg_last_check = now;
+ }
+}
+
+hrtime_t
+zfs_zone_txg_delay()
+{
+ if (curzone->zone_io_util_above_avg)
+ return (zfs_zone_txg_delay_nsec);
+
+ return (MSEC2NSEC(10));
+}
+
+/*
+ * Called from vdev_disk_io_start when an IO hits the end of the zio pipeline
+ * and is issued.
+ * Keep track of start time for latency calculation in zfs_zone_zio_done.
+ */
+void
+zfs_zone_zio_start(zio_t *zp)
+{
+ zone_t *zonep;
+
+ /*
+ * I/Os of type ZIO_TYPE_IOCTL are used to flush the disk cache, not for
+ * an actual I/O operation. Ignore those operations as they relate to
+ * throttling and scheduling.
+ */
+ if (zp->io_type == ZIO_TYPE_IOCTL)
+ return;
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_zfs_lock);
+ if (zp->io_type == ZIO_TYPE_READ)
+ kstat_runq_enter(&zonep->zone_zfs_rwstats);
+ zonep->zone_zfs_weight = 0;
+ mutex_exit(&zonep->zone_zfs_lock);
+
+ mutex_enter(&zfs_disk_lock);
+ zp->io_dispatched = gethrtime();
+
+ if (zfs_disk_rcnt++ != 0)
+ zfs_disk_rtime += (zp->io_dispatched - zfs_disk_rlastupdate);
+ zfs_disk_rlastupdate = zp->io_dispatched;
+ mutex_exit(&zfs_disk_lock);
+
+ zone_rele(zonep);
+}
+
+/*
+ * Called from vdev_disk_io_done when an IO completes.
+ * Increment our counter for zone ops.
+ * Calculate the IO latency avg. for this zone.
+ */
+void
+zfs_zone_zio_done(zio_t *zp)
+{
+ zone_t *zonep;
+ hrtime_t now, unow, udelta;
+
+ if (zp->io_type == ZIO_TYPE_IOCTL)
+ return;
+
+ if (zp->io_dispatched == 0)
+ return;
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ now = gethrtime();
+ unow = NANO_TO_MICRO(now);
+ udelta = unow - NANO_TO_MICRO(zp->io_dispatched);
+
+ mutex_enter(&zonep->zone_zfs_lock);
+
+ /*
+ * To calculate the wsvc_t average, keep a cumulative sum of all the
+ * wait time before each I/O was dispatched. Since most writes are
+ * asynchronous, only track the wait time for read I/Os.
+ */
+ if (zp->io_type == ZIO_TYPE_READ) {
+ zonep->zone_zfs_rwstats.reads++;
+ zonep->zone_zfs_rwstats.nread += zp->io_size;
+
+ zonep->zone_zfs_stats->zz_waittime.value.ui64 +=
+ zp->io_dispatched - zp->io_timestamp;
+
+ kstat_runq_exit(&zonep->zone_zfs_rwstats);
+ } else {
+ zonep->zone_zfs_rwstats.writes++;
+ zonep->zone_zfs_rwstats.nwritten += zp->io_size;
+ }
+
+ mutex_exit(&zonep->zone_zfs_lock);
+
+ mutex_enter(&zfs_disk_lock);
+ zfs_disk_rcnt--;
+ zfs_disk_rtime += (now - zfs_disk_rlastupdate);
+ zfs_disk_rlastupdate = now;
+
+ if (udelta > zfs_zone_laggard_threshold)
+ zfs_disk_last_laggard = unow;
+
+ mutex_exit(&zfs_disk_lock);
+
+ if (zfs_zone_delay_enable) {
+ mutex_enter(&zonep->zone_stg_io_lock);
+ add_iop(zonep, unow, zp->io_type == ZIO_TYPE_READ ?
+ ZFS_ZONE_IOP_READ : ZFS_ZONE_IOP_WRITE, udelta);
+ mutex_exit(&zonep->zone_stg_io_lock);
+ }
+
+ zone_rele(zonep);
+
+ /*
+ * sdt:::zfs-zone-latency
+ *
+ * arg0: zone ID
+ * arg1: type of I/O operation
+ * arg2: I/O latency (in us)
+ */
+ DTRACE_PROBE3(zfs__zone__latency, uintptr_t, zp->io_zoneid,
+ uintptr_t, zp->io_type, uintptr_t, udelta);
+}
+
+void
+zfs_zone_zio_dequeue(zio_t *zp)
+{
+ zio_priority_t p;
+ zone_t *zonep;
+
+ p = zp->io_priority;
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return;
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_stg_io_lock);
+ ASSERT(zonep->zone_zfs_queued[p] > 0);
+ if (zonep->zone_zfs_queued[p] == 0)
+ cmn_err(CE_WARN, "zfs_zone_zio_dequeue: count==0");
+ else
+ zonep->zone_zfs_queued[p]--;
+ mutex_exit(&zonep->zone_stg_io_lock);
+ zone_rele(zonep);
+}
+
+void
+zfs_zone_zio_enqueue(zio_t *zp)
+{
+ zio_priority_t p;
+ zone_t *zonep;
+
+ p = zp->io_priority;
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return;
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_stg_io_lock);
+ zonep->zone_zfs_queued[p]++;
+ mutex_exit(&zonep->zone_stg_io_lock);
+ zone_rele(zonep);
+}
+
+/*
+ * Called from vdev_queue_io_to_issue. That function is where zio's are listed
+ * in FIFO order on one of the sync queues, then pulled off (by
+ * vdev_queue_io_remove) and issued. We potentially do zone-based scheduling
+ * here to find a zone's zio deeper in the sync queue and issue that instead
+ * of simply doing FIFO.
+ *
+ * We only do zone-based zio scheduling for the two synchronous I/O queues
+ * (read & write). These queues are normally serviced in FIFO order but we
+ * may decide to move a zone's zio to the head of the line. A typical I/O
+ * load will be mostly synchronous reads and some asynchronous writes (which
+ * are scheduled differently due to transaction groups). There will also be
+ * some synchronous writes for those apps which want to ensure their data is on
+ * disk. We want to make sure that a zone with a single-threaded app (e.g. the
+ * shell) that is doing synchronous I/O (typically reads) isn't penalized by
+ * other zones which are doing lots of synchronous I/O because they have many
+ * running threads.
+ *
+ * The vq->vq_lock mutex is held when we're executing this function so we
+ * can safely access the "last zone" variable on the queue.
+ */
+zio_t *
+zfs_zone_schedule(vdev_queue_t *vq, zio_priority_t p, avl_index_t idx)
+{
+ vdev_queue_class_t *vqc = &vq->vq_class[p];
+ uint_t cnt;
+ zoneid_t last_zone;
+ zio_t *zio;
+
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
+
+ /* Don't change the order on the LBA ordered queues. */
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return (avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER));
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ cnt = avl_numnodes(&vqc->vqc_queued_tree);
+ last_zone = vq->vq_last_zone_id;
+
+ /*
+ * If there are only a few zios in the queue then just issue the head.
+ * If there are more than a few zios already queued up, then use
+ * scheduling to get the next zio.
+ */
+ if (!zfs_zone_schedule_enable || cnt < zfs_zone_schedule_thresh)
+ zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+ else
+ zio = get_next_zio(vqc, cnt, p);
+
+ vq->vq_last_zone_id = zio->io_zoneid;
+
+ /*
+ * Probe with 4 args; the number of IOs in the queue, the zone that
+ * was last scheduled off this queue, the zone that was associated
+ * with the next IO that is scheduled, and which queue (priority).
+ */
+ DTRACE_PROBE4(zfs__zone__sched, uint_t, cnt, uint_t, last_zone,
+ uint_t, zio->io_zoneid, uint_t, p);
+
+ return (zio);
+}
+
+#endif
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 53944165ef..13105e246a 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -1802,9 +1803,18 @@ zil_close(zilog_t *zilog)
if (lwb != NULL)
txg = lwb->lwb_max_txg;
mutex_exit(&zilog->zl_lock);
- if (txg)
+
+ if (zilog_is_dirty(zilog)) {
+ /*
+ * If we're dirty, always wait for the current transaction --
+ * our lwb_max_txg may be in the past.
+ */
+ txg_wait_synced(zilog->zl_dmu_pool, 0);
+ } else if (txg) {
txg_wait_synced(zilog->zl_dmu_pool, txg);
- ASSERT(!zilog_is_dirty(zilog));
+ }
+
+ VERIFY(!zilog_is_dirty(zilog));
taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index d0a42e9af1..066c58cca8 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#include <sys/sysmacros.h>
@@ -39,6 +40,7 @@
#include <sys/ddt.h>
#include <sys/blkptr.h>
#include <sys/zfeature.h>
+#include <sys/zfs_zone.h>
/*
* ==========================================================================
@@ -222,6 +224,20 @@ zio_buf_alloc(size_t size)
}
/*
+ * Same as zio_buf_alloc, but won't sleep in case memory cannot be allocated
+ * and will instead return immediately with a failure.
+ */
+void *
+zio_buf_alloc_canfail(size_t size)
+{
+ size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
+
+ ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+ return (kmem_cache_alloc(zio_buf_cache[c], KM_NOSLEEP | KM_NORMALPRI));
+}
+
+/*
* Use zio_data_buf_alloc to allocate data. The data will not appear in a
* crashdump if the kernel panics. This exists so that we will limit the amount
* of ZFS data that shows up in a kernel crashdump. (Thus reducing the amount
@@ -237,6 +253,21 @@ zio_data_buf_alloc(size_t size)
return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
}
+/*
+ * Same as zio_data_buf_alloc, but won't sleep in case memory cannot be
+ * allocated and will instead return immediately with a failure.
+ */
+void *
+zio_data_buf_alloc_canfail(size_t size)
+{
+ size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
+
+ ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+ return (kmem_cache_alloc(zio_data_buf_cache[c],
+ KM_NOSLEEP | KM_NORMALPRI));
+}
+
void
zio_buf_free(void *buf, size_t size)
{
@@ -557,11 +588,14 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_bookmark = *zb;
if (pio != NULL) {
+ zio->io_zoneid = pio->io_zoneid;
if (zio->io_logical == NULL)
zio->io_logical = pio->io_logical;
if (zio->io_child_type == ZIO_CHILD_GANG)
zio->io_gang_leader = pio->io_gang_leader;
zio_add_child(pio, zio);
+ } else {
+ zfs_zone_zio_init(zio);
}
return (zio);
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 80888103fe..f681b1dc65 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -25,7 +25,7 @@
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -83,6 +83,7 @@
#include <sys/zvol.h>
#include <sys/dumphdr.h>
#include <sys/zil_impl.h>
+#include <sys/sdt.h>
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/zfeature.h>
@@ -137,6 +138,11 @@ typedef struct zvol_state {
#define ZVOL_EXCL 0x4
#define ZVOL_WCE 0x8
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+#define VOP_LATENCY_10S 10000000000
+
/*
* zvol maximum transfer in one DMU tx.
*/
@@ -1378,6 +1384,9 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
uint64_t volsize;
rl_t *rl;
int error = 0;
+ zone_t *zonep = curzone;
+ uint64_t tot_bytes;
+ hrtime_t start, lat;
zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
if (zv == NULL)
@@ -1394,6 +1403,14 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
return (error);
}
+ DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 0);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ start = gethrtime();
+ tot_bytes = 0;
+
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
RL_READER);
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
@@ -1403,6 +1420,7 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
if (bytes > volsize - uio->uio_loffset)
bytes = volsize - uio->uio_loffset;
+ tot_bytes += bytes;
error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
if (error) {
/* convert checksum errors into IO errors */
@@ -1412,6 +1430,39 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
}
}
zfs_range_unlock(rl);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += tot_bytes;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ zone_vfs_kstat_t *zvp;
+
+ zvp = zonep->zone_vfs_stats;
+ if (lat < VOP_LATENCY_100MS) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+
+ DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 0, int,
+ error);
+
return (error);
}
@@ -1425,6 +1476,9 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
rl_t *rl;
int error = 0;
boolean_t sync;
+ zone_t *zonep = curzone;
+ uint64_t tot_bytes;
+ hrtime_t start, lat;
zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
if (zv == NULL)
@@ -1441,6 +1495,19 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
return (error);
}
+ DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 1);
+
+ /*
+ * For the purposes of VFS kstat consumers, the "waitq" calculation is
+ * repurposed as the active queue for zvol write operations. There's no
+ * actual wait queue for zvol operations.
+ */
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ start = gethrtime();
+ tot_bytes = 0;
+
sync = !(zv->zv_flags & ZVOL_WCE) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
@@ -1454,6 +1521,7 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
if (bytes > volsize - off) /* don't write past the end */
bytes = volsize - off;
+ tot_bytes += bytes;
dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
@@ -1471,6 +1539,39 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
zfs_range_unlock(rl);
if (sync)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+ DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 1, int,
+ error);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += tot_bytes;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ zone_vfs_kstat_t *zvp;
+
+ zvp = zonep->zone_vfs_stats;
+ if (lat < VOP_LATENCY_100MS) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+
return (error);
}
diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c
index 5029552f19..7f79d41dd6 100644
--- a/usr/src/uts/common/inet/ilb/ilb_conn.c
+++ b/usr/src/uts/common/inet/ilb/ilb_conn.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#include <sys/sysmacros.h>
@@ -365,6 +366,7 @@ ilb_conn_hash_fini(ilb_stack_t *ilbs)
{
uint32_t i;
ilb_conn_t *connp;
+ ilb_conn_hash_t *hash;
if (ilbs->ilbs_c2s_conn_hash == NULL) {
ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
@@ -388,10 +390,10 @@ ilb_conn_hash_fini(ilb_stack_t *ilbs)
ilbs->ilbs_conn_taskq = NULL;
/* Then remove all the conns. */
+ hash = ilbs->ilbs_s2c_conn_hash;
for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
- while ((connp = ilbs->ilbs_s2c_conn_hash->ilb_connp) != NULL) {
- ilbs->ilbs_s2c_conn_hash->ilb_connp =
- connp->conn_s2c_next;
+ while ((connp = hash[i].ilb_connp) != NULL) {
+ hash[i].ilb_connp = connp->conn_s2c_next;
ILB_SERVER_REFRELE(connp->conn_server);
if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
ilb_nat_src_entry_t *ent;
diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c
index 85ee142dfc..c350d67c2d 100644
--- a/usr/src/uts/common/inet/ip/ip_attr.c
+++ b/usr/src/uts/common/inet/ip/ip_attr.c
@@ -909,6 +909,11 @@ ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
*/
if (ixa->ixa_free_flags & IXA_FREE_CRED)
crhold(ixa->ixa_cred);
+
+ /*
+ * There is no cleanup in progress on this new copy.
+ */
+ ixa->ixa_tcpcleanup = IXATC_IDLE;
}
/*
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index d15d86d248..e31f96ebb8 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -24,6 +24,9 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* This file contains the interface control functions for IP.
@@ -224,6 +227,8 @@ static void ipif_trace_cleanup(const ipif_t *);
static void ill_dlpi_clear_deferred(ill_t *ill);
+static void phyint_flags_init(phyint_t *, t_uscalar_t);
+
/*
* if we go over the memory footprint limit more than once in this msec
* interval, we'll start pruning aggressively.
@@ -282,7 +287,6 @@ static ip_m_t ip_m_tbl[] = {
ip_nodef_v6intfid }
};
-static ill_t ill_null; /* Empty ILL for init. */
char ipif_loopback_name[] = "lo0";
/* These are used by all IP network modules. */
@@ -3331,50 +3335,42 @@ ipsq_init(ill_t *ill, boolean_t enter)
}
/*
- * ill_init is called by ip_open when a device control stream is opened.
- * It does a few initializations, and shoots a DL_INFO_REQ message down
- * to the driver. The response is later picked up in ip_rput_dlpi and
- * used to set up default mechanisms for talking to the driver. (Always
- * called as writer.)
- *
- * If this function returns error, ip_open will call ip_close which in
- * turn will call ill_delete to clean up any memory allocated here that
- * is not yet freed.
+ * Here we perform initialisation of the ill_t common to both regular
+ * interface ILLs and the special loopback ILL created by ill_lookup_on_name.
*/
-int
-ill_init(queue_t *q, ill_t *ill)
+static int
+ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback,
+ boolean_t ipsq_enter)
{
- int count;
- dl_info_req_t *dlir;
- mblk_t *info_mp;
+ int count;
uchar_t *frag_ptr;
- /*
- * The ill is initialized to zero by mi_alloc*(). In addition
- * some fields already contain valid values, initialized in
- * ip_open(), before we reach here.
- */
mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
ill->ill_saved_ire_cnt = 0;
- ill->ill_rq = q;
- ill->ill_wq = WR(q);
+ if (is_loopback) {
+ ill->ill_max_frag = isv6 ? ip_loopback_mtu_v6plus :
+ ip_loopback_mtuplus;
+ /*
+ * No resolver here.
+ */
+ ill->ill_net_type = IRE_LOOPBACK;
+ } else {
+ ill->ill_rq = q;
+ ill->ill_wq = WR(q);
+ ill->ill_ppa = UINT_MAX;
+ }
- info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
- BPRI_HI);
- if (info_mp == NULL)
- return (ENOMEM);
+ ill->ill_isv6 = isv6;
/*
* Allocate sufficient space to contain our fragment hash table and
* the device name.
*/
frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
- if (frag_ptr == NULL) {
- freemsg(info_mp);
+ if (frag_ptr == NULL)
return (ENOMEM);
- }
ill->ill_frag_ptr = frag_ptr;
ill->ill_frag_free_num_pkts = 0;
ill->ill_last_frag_clean_time = 0;
@@ -3387,35 +3383,30 @@ ill_init(queue_t *q, ill_t *ill)
ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
if (ill->ill_phyint == NULL) {
- freemsg(info_mp);
mi_free(frag_ptr);
return (ENOMEM);
}
mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
- /*
- * For now pretend this is a v4 ill. We need to set phyint_ill*
- * at this point because of the following reason. If we can't
- * enter the ipsq at some point and cv_wait, the writer that
- * wakes us up tries to locate us using the list of all phyints
- * in an ipsq and the ills from the phyint thru the phyint_ill*.
- * If we don't set it now, we risk a missed wakeup.
- */
- ill->ill_phyint->phyint_illv4 = ill;
- ill->ill_ppa = UINT_MAX;
+ if (isv6) {
+ ill->ill_phyint->phyint_illv6 = ill;
+ } else {
+ ill->ill_phyint->phyint_illv4 = ill;
+ }
+ if (is_loopback) {
+ phyint_flags_init(ill->ill_phyint, DL_LOOP);
+ }
+
list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
ill_set_inputfn(ill);
- if (!ipsq_init(ill, B_TRUE)) {
- freemsg(info_mp);
+ if (!ipsq_init(ill, ipsq_enter)) {
mi_free(frag_ptr);
mi_free(ill->ill_phyint);
return (ENOMEM);
}
- ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
-
/* Frag queue limit stuff */
ill->ill_frag_count = 0;
ill->ill_ipf_gen = 0;
@@ -3440,6 +3431,53 @@ ill_init(queue_t *q, ill_t *ill)
ill->ill_max_buf = ND_MAX_Q;
ill->ill_refcnt = 0;
+ return (0);
+}
+
+/*
+ * ill_init is called by ip_open when a device control stream is opened.
+ * It does a few initializations, and shoots a DL_INFO_REQ message down
+ * to the driver. The response is later picked up in ip_rput_dlpi and
+ * used to set up default mechanisms for talking to the driver. (Always
+ * called as writer.)
+ *
+ * If this function returns error, ip_open will call ip_close which in
+ * turn will call ill_delete to clean up any memory allocated here that
+ * is not yet freed.
+ *
+ * Note: ill_ipst and ill_zoneid must be set before calling ill_init.
+ */
+int
+ill_init(queue_t *q, ill_t *ill)
+{
+ int ret;
+ dl_info_req_t *dlir;
+ mblk_t *info_mp;
+
+ info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
+ BPRI_HI);
+ if (info_mp == NULL)
+ return (ENOMEM);
+
+ /*
+ * The ill is initialized to zero by mi_alloc*(). In addition
+ * some fields already contain valid values, initialized in
+ * ip_open(), before we reach here.
+ *
+ * For now pretend this is a v4 ill. We need to set phyint_ill*
+ * at this point because of the following reason. If we can't
+ * enter the ipsq at some point and cv_wait, the writer that
+ * wakes us up tries to locate us using the list of all phyints
+ * in an ipsq and the ills from the phyint thru the phyint_ill*.
+ * If we don't set it now, we risk a missed wakeup.
+ */
+ if ((ret = ill_init_common(ill, q, B_FALSE, B_FALSE, B_TRUE)) != 0) {
+ freemsg(info_mp);
+ return (ret);
+ }
+
+ ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
+
/* Send down the Info Request to the driver. */
info_mp->b_datap->db_type = M_PCPROTO;
dlir = (dl_info_req_t *)info_mp->b_rptr;
@@ -3687,10 +3725,8 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6,
if (ill == NULL)
goto done;
- *ill = ill_null;
- mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL);
+ bzero(ill, sizeof (*ill));
ill->ill_ipst = ipst;
- list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
netstack_hold(ipst->ips_netstack);
/*
* For exclusive stacks we set the zoneid to zero
@@ -3698,25 +3734,12 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6,
*/
ill->ill_zoneid = GLOBAL_ZONEID;
- ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
- if (ill->ill_phyint == NULL)
+ if (ill_init_common(ill, NULL, isv6, B_TRUE, B_FALSE) != 0)
goto done;
- if (isv6)
- ill->ill_phyint->phyint_illv6 = ill;
- else
- ill->ill_phyint->phyint_illv4 = ill;
- mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
- phyint_flags_init(ill->ill_phyint, DL_LOOP);
-
- if (isv6) {
- ill->ill_isv6 = B_TRUE;
- ill->ill_max_frag = ip_loopback_mtu_v6plus;
- } else {
- ill->ill_max_frag = ip_loopback_mtuplus;
- }
if (!ill_allocate_mibs(ill))
goto done;
+
ill->ill_current_frag = ill->ill_max_frag;
ill->ill_mtu = ill->ill_max_frag; /* Initial value */
ill->ill_mc_mtu = ill->ill_mtu;
@@ -3732,21 +3755,6 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6,
/* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
ill->ill_dlpi_pending = DL_PRIM_INVAL;
- rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
- mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
- ill->ill_global_timer = INFINITY;
- ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
- ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
- ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
- ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
-
- /* No resolver here. */
- ill->ill_net_type = IRE_LOOPBACK;
-
- /* Initialize the ipsq */
- if (!ipsq_init(ill, B_FALSE))
- goto done;
-
ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
if (ipif == NULL)
goto done;
@@ -3775,17 +3783,10 @@ ill_lookup_on_name(char *name, boolean_t do_alloc, boolean_t isv6,
* Chain us in at the end of the ill list. hold the ill
* before we make it globally visible. 1 for the lookup.
*/
- ill->ill_refcnt = 0;
ill_refhold(ill);
- ill->ill_frag_count = 0;
- ill->ill_frag_free_num_pkts = 0;
- ill->ill_last_frag_clean_time = 0;
-
ipsq = ill->ill_phyint->phyint_ipsq;
- ill_set_inputfn(ill);
-
if (ill_glist_insert(ill, "lo", isv6) != 0)
cmn_err(CE_PANIC, "cannot insert loopback interface");
diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c
index 33a2fa5935..dedb4dadcc 100644
--- a/usr/src/uts/common/inet/ip/ip_squeue.c
+++ b/usr/src/uts/common/inet/ip/ip_squeue.c
@@ -163,7 +163,7 @@ ip_squeue_create(pri_t pri)
{
squeue_t *sqp;
- sqp = squeue_create(ip_squeue_worker_wait, pri);
+ sqp = squeue_create(ip_squeue_worker_wait, pri, B_TRUE);
ASSERT(sqp != NULL);
if (ip_squeue_create_callback != NULL)
ip_squeue_create_callback(sqp);
diff --git a/usr/src/uts/common/inet/ip/ip_tunables.c b/usr/src/uts/common/inet/ip/ip_tunables.c
index 58e3b59ff3..dda05a316d 100644
--- a/usr/src/uts/common/inet/ip/ip_tunables.c
+++ b/usr/src/uts/common/inet/ip/ip_tunables.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c
index c325e8dc26..2ca770ebe9 100644
--- a/usr/src/uts/common/inet/ip/ipsecesp.c
+++ b/usr/src/uts/common/inet/ip/ipsecesp.c
@@ -234,8 +234,7 @@ esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
{
espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
"net", KSTAT_TYPE_NAMED,
- sizeof (esp_kstats_t) / sizeof (kstat_named_t),
- KSTAT_FLAG_PERSISTENT, stackid);
+ sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid);
if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
return (B_FALSE);
diff --git a/usr/src/uts/common/inet/ipf/fil.c b/usr/src/uts/common/inet/ipf/fil.c
index 3955d881e8..f96d2780e6 100644
--- a/usr/src/uts/common/inet/ipf/fil.c
+++ b/usr/src/uts/common/inet/ipf/fil.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#if defined(KERNEL) || defined(_KERNEL)
@@ -134,6 +136,9 @@ struct file;
# endif
#endif
#include "netinet/ipl.h"
+#if defined(SOLARIS) && defined(_KERNEL)
+#include <sys/sunddi.h>
+#endif
/* END OF INCLUDES */
#if !defined(lint)
@@ -5696,6 +5701,47 @@ static int fr_objbytes[NUM_OBJ_TYPES][2] = {
/* ------------------------------------------------------------------------ */
+/* Function: fr_getzoneid */
+/* Returns: int - 0 = success, else failure */
+/* Parameters: idsp(I) - pointer to ipf_devstate_t */
+/* data(I) - pointer to ioctl data */
+/* */
+/* Set the zone ID in idsp based on the zone name in ipfzoneobj. Further */
+/* ioctls will act on the IPF stack for that zone ID. */
+/* ------------------------------------------------------------------------ */
+#if defined(SOLARIS) && defined(_KERNEL)
+int fr_setzoneid(idsp, data)
+ipf_devstate_t *idsp;
+void *data;
+{
+ int error = 0;
+ ipfzoneobj_t ipfzo;
+ zone_t *zone;
+
+ error = BCOPYIN(data, &ipfzo, sizeof(ipfzo));
+ if (error != 0)
+ return EFAULT;
+
+ if (memchr(ipfzo.ipfz_zonename, '\0', ZONENAME_MAX) == NULL)
+ return EFAULT;
+
+ if ((zone = zone_find_by_name(ipfzo.ipfz_zonename)) == NULL)
+ return ENODEV;
+
+ /*
+ * Store the zone ID that to control, and whether it's the
+ * GZ-controlled stack that's wanted
+ */
+ idsp->ipfs_zoneid = zone->zone_id;
+ idsp->ipfs_gz = (ipfzo.ipfz_gz == 1) ? B_TRUE : B_FALSE;
+ zone_rele(zone);
+
+ return error;
+}
+#endif
+
+
+/* ------------------------------------------------------------------------ */
/* Function: fr_inobj */
/* Returns: int - 0 = success, else failure */
/* Parameters: data(I) - pointer to ioctl data */
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index d6475d85b3..7ece4fead6 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -4,6 +4,8 @@
* See the IPFILTER.LICENCE file for details on licencing.
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#if !defined(lint)
@@ -81,6 +83,14 @@ static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
void *));
static int ipf_hook6 __P((hook_data_t, int, int, void *));
+static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
+ void *));
extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
@@ -101,6 +111,64 @@ u_long *ip_forwarding = NULL;
#endif
#endif
+vmem_t *ipf_minor; /* minor number arena */
+void *ipf_state; /* DDI state */
+
+/*
+ * GZ and per-zone stacks:
+ *
+ * For each non-global zone, we create two ipf stacks: the per-zone stack and
+ * the GZ-controlled stack. The per-zone stack can be controlled and observed
+ * from inside the zone or from the global zone. The GZ-controlled stack can
+ * only be controlled and observed from the global zone (though the rules
+ * still only affect that non-global zone).
+ *
+ * The two hooks are always arranged so that the GZ-controlled stack is always
+ * "outermost" with respect to the zone. The traffic flow then looks like
+ * this:
+ *
+ * Inbound:
+ *
+ * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
+ *
+ * Outbound:
+ *
+ * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
+ */
+
+/* IPv4 hook names */
+char *hook4_nicevents = "ipfilter_hook4_nicevents";
+char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
+char *hook4_in = "ipfilter_hook4_in";
+char *hook4_in_gz = "ipfilter_hook4_in_gz";
+char *hook4_out = "ipfilter_hook4_out";
+char *hook4_out_gz = "ipfilter_hook4_out_gz";
+char *hook4_loop_in = "ipfilter_hook4_loop_in";
+char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
+char *hook4_loop_out = "ipfilter_hook4_loop_out";
+char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
+
+/* IPv6 hook names */
+char *hook6_nicevents = "ipfilter_hook6_nicevents";
+char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
+char *hook6_in = "ipfilter_hook6_in";
+char *hook6_in_gz = "ipfilter_hook6_in_gz";
+char *hook6_out = "ipfilter_hook6_out";
+char *hook6_out_gz = "ipfilter_hook6_out_gz";
+char *hook6_loop_in = "ipfilter_hook6_loop_in";
+char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
+char *hook6_loop_out = "ipfilter_hook6_loop_out";
+char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
+
+/* vnd IPv4/v6 hook names */
+char *hook4_vnd_in = "ipfilter_hookvndl3v4_in";
+char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz";
+char *hook6_vnd_in = "ipfilter_hookvndl3v6_in";
+char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz";
+char *hook4_vnd_out = "ipfilter_hookvndl3v4_out";
+char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz";
+char *hook6_vnd_out = "ipfilter_hookvndl3v6_out";
+char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz";
/* ------------------------------------------------------------------------ */
/* Function: ipldetach */
@@ -117,7 +185,7 @@ int ipldetach(ifs)
ipf_stack_t *ifs;
{
- ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
+ ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
#if SOLARIS2 < 10
@@ -141,11 +209,13 @@ ipf_stack_t *ifs;
#define UNDO_HOOK(_f, _b, _e, _h) \
do { \
+ int tmp; \
if (ifs->_f != NULL) { \
if (ifs->_b) { \
- ifs->_b = (net_hook_unregister(ifs->_f, \
- _e, ifs->_h) != 0); \
- if (!ifs->_b) { \
+ tmp = net_hook_unregister(ifs->_f, \
+ _e, ifs->_h); \
+ ifs->_b = (tmp != 0 && tmp != ENXIO); \
+ if (!ifs->_b && ifs->_h != NULL) { \
hook_free(ifs->_h); \
ifs->_h = NULL; \
} \
@@ -197,6 +267,31 @@ ipf_stack_t *ifs;
ifs->ifs_ipf_ipv4 = NULL;
}
+ /*
+ * Remove VND hooks
+ */
+ if (ifs->ifs_ipf_vndl3v4 != NULL) {
+ UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
+ NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
+ UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
+ NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
+
+ if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
+ goto detach_failed;
+ ifs->ifs_ipf_vndl3v4 = NULL;
+ }
+
+ if (ifs->ifs_ipf_vndl3v6 != NULL) {
+ UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
+ NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
+ UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
+ NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
+
+ if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
+ goto detach_failed;
+ ifs->ifs_ipf_vndl3v6 = NULL;
+ }
+
#undef UNDO_HOOK
#ifdef IPFDEBUG
@@ -243,7 +338,7 @@ ipf_stack_t *ifs;
cmn_err(CE_CONT, "iplattach()\n");
#endif
- ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
+ ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
ifs->ifs_fr_flags = IPF_LOGGING;
#ifdef _KERNEL
ifs->ifs_fr_update_ipid = 0;
@@ -268,16 +363,35 @@ ipf_stack_t *ifs;
if (fr_initialise(ifs) < 0)
return -1;
- HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
- "ipfilter_hook4_nicevents", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
- "ipfilter_hook4_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
- "ipfilter_hook4_out", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
- "ipfilter_hook4_loop_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
- "ipfilter_hook4_loop_out", ifs);
+ /*
+ * For incoming packets, we want the GZ hooks to run before the
+ * per-zone hooks, regardless of what order they're are installed.
+ */
+#define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
+ HOOK_INIT(x, fn, ifs->ifs_gz ? gzn : n, ifs); \
+ (x)->h_hint = ifs->ifs_gz ? HH_BEFORE : HH_AFTER; \
+ (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz ? n : gzn);
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
+ hook4_nicevents, hook4_nicevents_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
+ hook4_in, hook4_in_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
+ hook4_loop_in, hook4_loop_in_gz, ifs);
+
+ /*
+ * For outgoing packets, we want the GZ hooks to run after the
+ * per-zone hooks, regardless of what order they're are installed.
+ */
+#define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
+ HOOK_INIT(x, fn, ifs->ifs_gz ? gzn : n, ifs); \
+ (x)->h_hint = ifs->ifs_gz ? HH_AFTER : HH_BEFORE; \
+ (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz ? n : gzn);
+
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
+ hook4_out, hook4_out_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
+ hook4_loop_out, hook4_loop_out_gz, ifs);
/*
* If we hold this lock over all of the net_hook_register calls, we
@@ -322,6 +436,7 @@ ipf_stack_t *ifs;
if (!ifs->ifs_hook4_loopback_out)
goto hookup_failed;
}
+
/*
* Add IPv6 hooks
*/
@@ -329,16 +444,16 @@ ipf_stack_t *ifs;
if (ifs->ifs_ipf_ipv6 == NULL)
goto hookup_failed;
- HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
- "ipfilter_hook6_nicevents", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
- "ipfilter_hook6_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
- "ipfilter_hook6_out", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
- "ipfilter_hook6_loop_in", ifs);
- HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
- "ipfilter_hook6_loop_out", ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
+ hook6_nicevents, hook6_nicevents_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
+ hook6_in, hook6_in_gz, ifs);
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
+ hook6_loop_in, hook6_loop_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
+ hook6_out, hook6_out_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
+ hook6_loop_out, hook6_loop_out_gz, ifs);
ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
@@ -370,6 +485,48 @@ ipf_stack_t *ifs;
}
/*
+ * Add VND INET hooks
+ */
+ ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
+ if (ifs->ifs_ipf_vndl3v4 == NULL)
+ goto hookup_failed;
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
+ hook4_vnd_in, hook4_vnd_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
+ hook4_vnd_out, hook4_vnd_out_gz, ifs);
+ ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
+ NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
+ if (!ifs->ifs_hookvndl3v4_physical_in)
+ goto hookup_failed;
+
+ ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
+ NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
+ if (!ifs->ifs_hookvndl3v4_physical_out)
+ goto hookup_failed;
+
+
+ /*
+ * VND INET6 hooks
+ */
+ ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
+ if (ifs->ifs_ipf_vndl3v6 == NULL)
+ goto hookup_failed;
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
+ hook6_vnd_in, hook6_vnd_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
+ hook6_vnd_out, hook6_vnd_out_gz, ifs);
+ ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
+ NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
+ if (!ifs->ifs_hookvndl3v6_physical_in)
+ goto hookup_failed;
+
+ ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
+ NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
+ if (!ifs->ifs_hookvndl3v6_physical_out)
+ goto hookup_failed;
+ /*
* Reacquire ipf_global, now it is safe.
*/
WRITE_ENTER(&ifs->ifs_ipf_global);
@@ -518,35 +675,74 @@ int *rp;
minor_t unit;
u_int enable;
ipf_stack_t *ifs;
+ zoneid_t zid;
+ ipf_devstate_t *isp;
+ boolean_t gz_stack;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
dev, cmd, data, mode, cp, rp);
#endif
unit = getminor(dev);
- if (IPL_LOGMAX < unit)
+
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
return ENXIO;
+ unit = isp->ipfs_minor;
+
+ zid = crgetzoneid(cp);
+ if (cmd == SIOCIPFZONESET) {
+ if (zid == GLOBAL_ZONEID)
+ return fr_setzoneid(isp, (caddr_t) data);
+ return EACCES;
+ }
+
+ /*
+ * If we're in the GZ, determine if we're acting on a zone's stack,
+ * and whether or not that stack is the GZ-controlled or in-zone
+ * one. See the "GZ and per-zone stacks" note at the top of this
+ * file.
+ */
+ if (zid == GLOBAL_ZONEID && (isp->ipfs_zoneid != -1)) {
+ /* Global zone, and we've set the zoneid for this fd already */
+
+ if (zid == isp->ipfs_zoneid) {
+ /* There's only a per-zone stack for the GZ */
+ gz_stack = B_FALSE;
+ } else {
+ gz_stack = isp->ipfs_gz;
+ }
+
+ zid = isp->ipfs_zoneid;
+ } else {
+ /*
+ * Non-global zone or GZ without having set a zoneid: act on
+ * the per-zone stack of the zone that this ioctl originated
+ * from.
+ */
+ gz_stack = B_FALSE;
+ }
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ ifs = ipf_find_stack(zid, gz_stack);
+ if (ifs == NULL)
+ return ENXIO;
if (ifs->ifs_fr_running <= 0) {
if (unit != IPL_LOGIPF) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
cmd != SIOCIPFSET && cmd != SIOCFRENB &&
cmd != SIOCGETFS && cmd != SIOCGETFF) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
}
- READ_ENTER(&ifs->ifs_ipf_global);
if (ifs->ifs_fr_enable_active != 0) {
RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EBUSY;
@@ -847,7 +1043,9 @@ dev_t *devp;
int flags, otype;
cred_t *cred;
{
+ ipf_devstate_t *isp;
minor_t min = getminor(*devp);
+ minor_t minor;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
@@ -855,8 +1053,25 @@ cred_t *cred;
if (!(otype & OTYP_CHR))
return ENXIO;
- min = (IPL_LOGMAX < min) ? ENXIO : 0;
- return min;
+ if (IPL_LOGMAX < min)
+ return ENXIO;
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
+ vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
+ return ENXIO;
+ }
+
+ *devp = makedevice(getmajor(*devp), minor);
+ isp = ddi_get_soft_state(ipf_state, minor);
+ VERIFY(isp != NULL);
+
+ isp->ipfs_minor = min;
+ isp->ipfs_zoneid = -1;
+
+ return 0;
}
@@ -872,8 +1087,13 @@ cred_t *cred;
cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
#endif
- min = (IPL_LOGMAX < min) ? ENXIO : 0;
- return min;
+ if (IPL_LOGMAX < min)
+ return ENXIO;
+
+ ddi_soft_state_free(ipf_state, min);
+ vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
+
+ return 0;
}
#ifdef IPFILTER_LOG
@@ -891,30 +1111,70 @@ cred_t *cp;
{
ipf_stack_t *ifs;
int ret;
+ minor_t unit;
+ zoneid_t zid;
+ ipf_devstate_t *isp;
+ boolean_t gz_stack;
+
+ unit = getminor(dev);
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
+ return ENXIO;
+ unit = isp->ipfs_minor;
+
+ zid = crgetzoneid(cp);
+
+ /*
+ * If we're in the GZ, determine if we're acting on a zone's stack,
+ * and whether or not that stack is the GZ-controlled or in-zone
+ * one. See the "GZ and per-zone stacks" note at the top of this
+ * file.
+ */
+ if (zid == GLOBAL_ZONEID && (isp->ipfs_zoneid != -1)) {
+ /* Global zone, and we've set the zoneid for this fd already */
+
+ if (zid == isp->ipfs_zoneid) {
+ /* There's only a per-zone stack for the GZ */
+ gz_stack = B_FALSE;
+ } else {
+ gz_stack = isp->ipfs_gz;
+ }
+
+ zid = isp->ipfs_zoneid;
+ } else {
+ /*
+ * Non-global zone or GZ without having set a zoneid: act on
+ * the per-zone stack of the zone that this ioctl originated
+ * from.
+ */
+ gz_stack = B_FALSE;
+ }
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ ifs = ipf_find_stack(zid, gz_stack);
+ if (ifs == NULL)
+ return ENXIO;
# ifdef IPFDEBUG
cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
# endif
if (ifs->ifs_fr_running < 1) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
# ifdef IPFILTER_SYNC
- if (getminor(dev) == IPL_LOGSYNC) {
+ if (unit == IPL_LOGSYNC) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ipfsync_read(uio);
}
# endif
- ret = ipflog_read(getminor(dev), uio, ifs);
+ ret = ipflog_read(unit, uio, ifs);
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ret;
}
#endif /* IPFILTER_LOG */
@@ -932,30 +1192,71 @@ register struct uio *uio;
cred_t *cp;
{
ipf_stack_t *ifs;
+ minor_t unit;
+ zoneid_t zid;
+ ipf_devstate_t *isp;
+ boolean_t gz_stack;
+
+ unit = getminor(dev);
+ isp = ddi_get_soft_state(ipf_state, unit);
+ if (isp == NULL)
+ return ENXIO;
+ unit = isp->ipfs_minor;
/*
- * As we're calling ipf_find_stack in user space, from a given zone
- * to find the stack pointer for this zone, there is no need to have
- * a hold/refence count here.
+ * ipf_find_stack returns with a read lock on ifs_ipf_global
*/
- ifs = ipf_find_stack(crgetzoneid(cp));
- ASSERT(ifs != NULL);
+ zid = crgetzoneid(cp);
+
+ /*
+ * If we're in the GZ, determine if we're acting on a zone's stack,
+ * and whether or not that stack is the GZ-controlled or in-zone
+ * one. See the "GZ and per-zone stacks" note at the top of this
+ * file.
+ */
+ if (zid == GLOBAL_ZONEID && (isp->ipfs_zoneid != -1)) {
+ /* Global zone, and we've set the zoneid for this fd already */
+
+ if (zid == isp->ipfs_zoneid) {
+ /* There's only a per-zone stack for the GZ */
+ gz_stack = B_FALSE;
+ } else {
+ gz_stack = isp->ipfs_gz;
+ }
+
+ zid = isp->ipfs_zoneid;
+ } else {
+ /*
+ * Non-global zone or GZ without having set a zoneid: act on
+ * the per-zone stack of the zone that this ioctl originated
+ * from.
+ */
+ gz_stack = B_FALSE;
+ }
+
+ ifs = ipf_find_stack(zid, gz_stack);
+ if (ifs == NULL)
+ return ENXIO;
#ifdef IPFDEBUG
cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
#endif
if (ifs->ifs_fr_running < 1) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return EIO;
}
#ifdef IPFILTER_SYNC
- if (getminor(dev) == IPL_LOGSYNC)
+ if (getminor(dev) == IPL_LOGSYNC) {
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ipfsync_write(uio);
+ }
#endif /* IPFILTER_SYNC */
dev = dev; /* LINT */
uio = uio; /* LINT */
cp = cp; /* LINT */
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
return ENXIO;
}
@@ -1882,6 +2183,42 @@ int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
}
/* ------------------------------------------------------------------------ */
+/* Function: ipf_hookvndl3_in */
+/* Returns: int - 0 == packet ok, else problem, free packet if not done */
+/* Parameters: event(I) - pointer to event */
+/* info(I) - pointer to hook information for firewalling */
+/* */
+/* The vnd hooks are private hooks to ON. They represents a layer 2 */
+/* datapath generally used to implement virtual machines. The driver sends */
+/* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
+/* them is in the upper 16 bits while the remaining bits are the */
+/* traditional packet hook flags. */
+/* */
+/* They end up calling the appropriate traditional ip hooks. */
+/* ------------------------------------------------------------------------ */
+/*ARGSUSED*/
+int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_in(token, info, arg);
+}
+
+int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_in(token, info, arg);
+}
+
+/*ARGSUSED*/
+int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_out(token, info, arg);
+}
+
+int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_out(token, info, arg);
+}
+
+/* ------------------------------------------------------------------------ */
/* Function: ipf_hook4_loop_in */
/* Returns: int - 0 == packet ok, else problem, free packet if not done */
/* Parameters: event(I) - pointer to event */
@@ -1997,7 +2334,6 @@ int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
fw->hpe_mb = qpi.qpi_m;
fw->hpe_hdr = qpi.qpi_data;
return rval;
-
}
diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c
index 1fe54f3a07..5d40498671 100644
--- a/usr/src/uts/common/inet/ipf/ip_log.c
+++ b/usr/src/uts/common/inet/ipf/ip_log.c
@@ -7,6 +7,8 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -572,10 +574,16 @@ ipf_stack_t *ifs;
while (ifs->ifs_iplt[unit] == NULL) {
# if SOLARIS && defined(_KERNEL)
+ /*
+ * Prevent a deadlock with ipldetach()
+ */
+ RWLOCK_EXIT(&ifs->ifs_ipf_global);
if (!cv_wait_sig(&ifs->ifs_iplwait, &ifs->ifs_ipl_mutex.ipf_lk)) {
+ READ_ENTER(&ifs->ifs_ipf_global);
MUTEX_EXIT(&ifs->ifs_ipl_mutex);
return EINTR;
}
+ READ_ENTER(&ifs->ifs_ipf_global);
# else
# if defined(__hpux) && defined(_KERNEL)
lock_t *l;
diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c
index c92262b3ad..adf9ff9382 100644
--- a/usr/src/uts/common/inet/ipf/ip_state.c
+++ b/usr/src/uts/common/inet/ipf/ip_state.c
@@ -1285,6 +1285,7 @@ u_int flags;
switch (ic->icmp_type)
{
case ICMP_ECHO :
+ case ICMP_ECHOREPLY :
case ICMP_TSTAMP :
case ICMP_IREQ :
case ICMP_MASKREQ :
diff --git a/usr/src/uts/common/inet/ipf/ipf.conf b/usr/src/uts/common/inet/ipf/ipf.conf
index 6b36f9fdbf..f49e024a72 100644
--- a/usr/src/uts/common/inet/ipf/ipf.conf
+++ b/usr/src/uts/common/inet/ipf/ipf.conf
@@ -1,3 +1,8 @@
#
#
name="ipf" parent="pseudo" instance=0;
+
+# Increase the state table limits. fr_statemax should be ~70% of fr_statesize,
+# and both should be prime numbers
+fr_statesize=151007;
+fr_statemax=113279;
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
index 0a2d6431d8..3de0047f3b 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
@@ -7,6 +7,8 @@
* $Id: ip_fil.h,v 2.170.2.22 2005/07/16 05:55:35 darrenr Exp $
*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifndef __IP_FIL_H__
@@ -107,6 +109,10 @@
#define SIOCDELFR SIOCRMAFR
#define SIOCINSFR SIOCINAFR
+#ifdef SOLARIS
+# define SIOCIPFZONESET _IOWR('r', 97, struct ipfzoneobj)
+#endif
+
/*
* What type of table is getting flushed?
*/
@@ -1165,6 +1171,25 @@ typedef struct ipfobj {
u_char ipfo_xxxpad[32]; /* reserved for future use */
} ipfobj_t;
+#if defined(SOLARIS)
+
+#include <sys/zone.h>
+
+typedef struct ipfzoneobj {
+ u_32_t ipfz_gz; /* GZ stack */
+ char ipfz_zonename[ZONENAME_MAX]; /* zone to act on */
+} ipfzoneobj_t;
+
+#if defined(_KERNEL)
+typedef struct ipf_devstate {
+ zoneid_t ipfs_zoneid;
+ minor_t ipfs_minor;
+ boolean_t ipfs_gz;
+} ipf_devstate_t;
+#endif
+
+#endif
+
#define IPFOBJ_FRENTRY 0 /* struct frentry */
#define IPFOBJ_IPFSTAT 1 /* struct friostat */
#define IPFOBJ_IPFINFO 2 /* struct fr_info */
@@ -1352,7 +1377,7 @@ extern void ipfilterattach __P((int));
extern int ipl_enable __P((void));
extern int ipl_disable __P((void));
# ifdef MENTAT
-extern ipf_stack_t *ipf_find_stack(const zoneid_t zone);
+extern ipf_stack_t *ipf_find_stack(const zoneid_t zone, boolean_t gz);
extern int fr_check __P((struct ip *, int, void *, int, void *,
mblk_t **, ipf_stack_t *));
# if SOLARIS
@@ -1576,6 +1601,10 @@ extern int ipf_earlydrop __P((int, ipftq_t *, int, ipf_stack_t *));
extern u_32_t ipf_random __P((void));
#endif
+#if defined(SOLARIS) && defined(_KERNEL)
+extern int fr_setzoneid __P((ipf_devstate_t *, void *));
+#endif
+
extern char ipfilter_version[];
#ifdef USE_INET6
extern int icmptoicmp6types[ICMP_MAXTYPE+1];
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
index 75703994a5..f13d363bbc 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
@@ -5,6 +5,8 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef __IPF_STACK_H__
@@ -41,8 +43,10 @@
struct ipf_stack {
struct ipf_stack *ifs_next;
struct ipf_stack **ifs_pnext;
+ struct ipf_stack *ifs_pgz;
netid_t ifs_netid;
zoneid_t ifs_zone;
+ boolean_t ifs_gz;
/* ipf module */
fr_info_t ifs_frcache[2][8];
@@ -121,6 +125,10 @@ struct ipf_stack {
hook_t *ifs_ipfhook6_loop_in;
hook_t *ifs_ipfhook6_loop_out;
hook_t *ifs_ipfhook6_nicevents;
+ hook_t *ifs_ipfhookvndl3v4_in;
+ hook_t *ifs_ipfhookvndl3v6_in;
+ hook_t *ifs_ipfhookvndl3v4_out;
+ hook_t *ifs_ipfhookvndl3v6_out;
/* flags to indicate whether hooks are registered. */
boolean_t ifs_hook4_physical_in;
@@ -133,10 +141,16 @@ struct ipf_stack {
boolean_t ifs_hook6_nic_events;
boolean_t ifs_hook6_loopback_in;
boolean_t ifs_hook6_loopback_out;
+ boolean_t ifs_hookvndl3v4_physical_in;
+ boolean_t ifs_hookvndl3v6_physical_in;
+ boolean_t ifs_hookvndl3v4_physical_out;
+ boolean_t ifs_hookvndl3v6_physical_out;
int ifs_ipf_loopback;
net_handle_t ifs_ipf_ipv4;
net_handle_t ifs_ipf_ipv6;
+ net_handle_t ifs_ipf_vndl3v4;
+ net_handle_t ifs_ipf_vndl3v6;
/* ip_auth.c */
int ifs_fr_authsize;
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c
index 1a1d94def5..093a599649 100644
--- a/usr/src/uts/common/inet/ipf/solaris.c
+++ b/usr/src/uts/common/inet/ipf/solaris.c
@@ -5,6 +5,50 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * ipfilter kernel module mutexes and locking:
+ *
+ * Enabling ipfilter creates a per-netstack ipf_stack_t object that is
+ * stored in the ipf_stacks list, which is protected by ipf_stack_lock.
+ * ipf_stack_t objects are accessed in three contexts:
+ *
+ * 1) administering that filter (eg: ioctls handled with iplioctl())
+ * 2) reading log data (eg: iplread() / iplwrite())
+ * 3) filtering packets (eg: ipf_hook4_* and ipf_hook6_* pfhooks
+ * functions)
+ *
+ * Each ipf_stack_t has a RW lock, ifs_ipf_global, protecting access to the
+ * whole structure. The structure also has locks protecting the various
+ * data structures used for filtering. The following guidelines should be
+ * followed for ipf_stack_t locks:
+ *
+ * - ipf_stack_lock must be held when accessing the ipf_stacks list
+ * - ipf_stack_lock should be held before acquiring ifs_ipf_global for
+ * a stack (the exception to this is ipf_stack_destroy(), which removes
+ * the ipf_stack_t from the list, then drops ipf_stack_lock before
+ * acquiring ifs_ipf_global)
+ * - ifs_ipf_global must be held when accessing an ipf_stack_t in that list:
+ * - The write lock is held only during stack creation / destruction
+ * - The read lock should be held for all other accesses
+ * - To alter the filtering data in the administrative context, one must:
+ * - acquire the read lock for ifs_ipf_global
+ * - then acquire the write lock for the data in question
+ * - In the filtering path, the read lock needs to be held for each type of
+ * filtering data used
+ * - ifs_ipf_global does not need to be held in the filtering path:
+ * - The filtering hooks don't need to modify the stack itself
+ * - The ipf_stack_t will not be destroyed until the hooks are unregistered.
+ * This requires a write lock on the hook, ensuring that no active hooks
+ * (eg: the filtering path) are running, and that the hooks won't be run
+ * afterward.
+ *
+ * Note that there is a deadlock possible when calling net_hook_register()
+ * or net_hook_unregister() with ifs_ipf_global held: see the comments in
+ * iplattach() and ipldetach() for details.
*/
#include <sys/systm.h>
@@ -73,7 +117,8 @@ static int ipf_property_g_update __P((dev_info_t *));
static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME,
IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME,
IPLOOKUP_NAME, NULL };
-
+extern void *ipf_state; /* DDI state */
+extern vmem_t *ipf_minor; /* minor number arena */
static struct cb_ops ipf_cb_ops = {
iplopen,
@@ -221,10 +266,10 @@ static const filter_kstats_t ipf_kstat_tmp = {
static int ipf_kstat_update(kstat_t *ksp, int rwflag);
static void
-ipf_kstat_init(ipf_stack_t *ifs)
+ipf_kstat_init(ipf_stack_t *ifs, boolean_t from_gz)
{
ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid, "ipf", 0,
- "inbound", "net", KSTAT_TYPE_NAMED,
+ (from_gz ? "inbound_gz" : "inbound"), "net", KSTAT_TYPE_NAMED,
sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
if (ifs->ifs_kstatp[0] != NULL) {
bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[0]->ks_data,
@@ -235,7 +280,7 @@ ipf_kstat_init(ipf_stack_t *ifs)
}
ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid, "ipf", 0,
- "outbound", "net", KSTAT_TYPE_NAMED,
+ (from_gz ? "outbound_gz" : "outbound"), "net", KSTAT_TYPE_NAMED,
sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
if (ifs->ifs_kstatp[1] != NULL) {
bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[1]->ks_data,
@@ -369,12 +414,14 @@ dev_info_t *dip;
* Initialize things for IPF for each stack instance
*/
static void *
-ipf_stack_create(const netid_t id)
+ipf_stack_create_one(const netid_t id, const zoneid_t zid, boolean_t from_gz,
+ ipf_stack_t *ifs_gz)
{
ipf_stack_t *ifs;
#ifdef IPFDEBUG
- cmn_err(CE_NOTE, "IP Filter:stack_create id=%d", id);
+ cmn_err(CE_NOTE, "IP Filter:stack_create_one id=%d global=%d", id,
+ global);
#endif
ifs = (ipf_stack_t *)kmem_alloc(sizeof (*ifs), KM_SLEEP);
@@ -398,8 +445,11 @@ ipf_stack_create(const netid_t id)
RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock");
RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock");
ifs->ifs_netid = id;
- ifs->ifs_zone = net_getzoneidbynetid(id);
- ipf_kstat_init(ifs);
+ ifs->ifs_zone = zid;
+ ifs->ifs_gz = from_gz;
+ ifs->ifs_pgz = ifs_gz;
+
+ ipf_kstat_init(ifs, from_gz);
#ifdef IPFDEBUG
cmn_err(CE_CONT, "IP Filter:stack_create zone=%d", ifs->ifs_zone);
@@ -427,26 +477,43 @@ ipf_stack_create(const netid_t id)
return (ifs);
}
+static void *
+ipf_stack_create(const netid_t id)
+{
+ ipf_stack_t *ifs = NULL;
+ zoneid_t zid = net_getzoneidbynetid(id);
+
+ /*
+ * Create two ipfilter stacks for a zone - the first can only be
+ * controlled from the global zone, and the second is owned by
+ * the zone itself. There is no need to create a GZ-controlled
+ * stack for the global zone, since we're already in the global
+ * zone.
+ */
+ if (zid != GLOBAL_ZONEID)
+ ifs = ipf_stack_create_one(id, zid, B_TRUE, NULL);
+
+ return ipf_stack_create_one(id, zid, B_FALSE, ifs);
+}
/*
- * This function should only ever be used to find the pointer to the
- * ipfilter stack structure for the zone that is currently being
- * executed... so if you're running in the context of zone 1, you
- * should not attempt to find the ipf_stack_t for zone 0 or 2 or
- * anything else but 1. In that way, the returned pointer is safe
- * as it will only be nuked when the instance is destroyed as part
- * of the final shutdown of a zone.
+ * This function returns with the ipf_stack_t's ifs_ipf_global
+ * read lock held (if the stack is found).
*/
ipf_stack_t *
-ipf_find_stack(const zoneid_t zone)
+ipf_find_stack(const zoneid_t zone, const boolean_t gz)
{
ipf_stack_t *ifs;
mutex_enter(&ipf_stack_lock);
for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next) {
- if (ifs->ifs_zone == zone)
+ if (ifs->ifs_zone == zone && ifs->ifs_gz == gz)
break;
}
+
+ if (ifs != NULL) {
+ READ_ENTER(&ifs->ifs_ipf_global);
+ }
mutex_exit(&ipf_stack_lock);
return (ifs);
}
@@ -495,7 +562,8 @@ static int ipf_detach_check_all()
/*
- * Destroy things for ipf for one stack.
+ * Remove ipf kstats for both the per-zone ipf stack and the
+ * GZ-controlled stack for the same zone, if it exists.
*/
/* ARGSUSED */
static void
@@ -503,6 +571,15 @@ ipf_stack_shutdown(const netid_t id, void *arg)
{
ipf_stack_t *ifs = (ipf_stack_t *)arg;
+ /*
+ * The GZ-controlled stack
+ */
+ if (ifs->ifs_pgz != NULL)
+ ipf_kstat_fini(ifs->ifs_pgz);
+
+ /*
+ * The per-zone stack
+ */
ipf_kstat_fini(ifs);
}
@@ -510,15 +587,13 @@ ipf_stack_shutdown(const netid_t id, void *arg)
/*
* Destroy things for ipf for one stack.
*/
-/* ARGSUSED */
static void
-ipf_stack_destroy(const netid_t id, void *arg)
+ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs)
{
- ipf_stack_t *ifs = (ipf_stack_t *)arg;
timeout_id_t tid;
#ifdef IPFDEBUG
- (void) printf("ipf_stack_destroy(%p)\n", (void *)ifs);
+ (void) printf("ipf_stack_destroy_one(%p)\n", (void *)ifs);
#endif
/*
@@ -546,7 +621,7 @@ ipf_stack_destroy(const netid_t id, void *arg)
WRITE_ENTER(&ifs->ifs_ipf_global);
if (ipldetach(ifs) != 0) {
- printf("ipf_stack_destroy: ipldetach failed\n");
+ printf("ipf_stack_destroy_one: ipldetach failed\n");
}
ipftuneable_free(ifs);
@@ -560,6 +635,29 @@ ipf_stack_destroy(const netid_t id, void *arg)
}
+/*
+ * Destroy things for ipf for both the per-zone ipf stack and the
+ * GZ-controlled stack for the same zone, if it exists.
+ */
+/* ARGSUSED */
+static void
+ipf_stack_destroy(const netid_t id, void *arg)
+{
+ ipf_stack_t *ifs = (ipf_stack_t *)arg;
+
+ /*
+ * The GZ-controlled stack
+ */
+ if (ifs->ifs_pgz != NULL)
+ ipf_stack_destroy_one(id, ifs->ifs_pgz);
+
+ /*
+ * The per-zone stack
+ */
+ ipf_stack_destroy_one(id, ifs);
+}
+
+
static int ipf_attach(dip, cmd)
dev_info_t *dip;
ddi_attach_cmd_t cmd;
@@ -586,27 +684,39 @@ ddi_attach_cmd_t cmd;
(void) ipf_property_g_update(dip);
+ if (ddi_soft_state_init(&ipf_state, sizeof (ipf_devstate_t), 1)
+ != 0) {
+ ddi_prop_remove_all(dip);
+ return (DDI_FAILURE);
+ }
+
for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) {
s = strrchr(s, '/');
if (s == NULL)
continue;
s++;
if (ddi_create_minor_node(dip, s, S_IFCHR, i,
- DDI_PSEUDO, 0) ==
- DDI_FAILURE) {
- ddi_remove_minor_node(dip, NULL);
+ DDI_PSEUDO, 0) == DDI_FAILURE)
goto attach_failed;
- }
}
ipf_dev_info = dip;
ipfncb = net_instance_alloc(NETINFO_VERSION);
+ if (ipfncb == NULL)
+ goto attach_failed;
+
ipfncb->nin_name = "ipf";
ipfncb->nin_create = ipf_stack_create;
ipfncb->nin_destroy = ipf_stack_destroy;
ipfncb->nin_shutdown = ipf_stack_shutdown;
- i = net_instance_register(ipfncb);
+ if (net_instance_register(ipfncb) == DDI_FAILURE) {
+ net_instance_free(ipfncb);
+ goto attach_failed;
+ }
+
+ ipf_minor = vmem_create("ipf_minor", (void *)1, UINT32_MAX - 1,
+ 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
#ifdef IPFDEBUG
cmn_err(CE_CONT, "IP Filter:stack_create callback_reg=%d", i);
@@ -619,7 +729,9 @@ ddi_attach_cmd_t cmd;
}
attach_failed:
+ ddi_remove_minor_node(dip, NULL);
ddi_prop_remove_all(dip);
+ ddi_soft_state_fini(&ipf_state);
return (DDI_FAILURE);
}
@@ -652,6 +764,9 @@ ddi_detach_cmd_t cmd;
return (DDI_FAILURE);
}
+ vmem_destroy(ipf_minor);
+ ddi_soft_state_fini(&ipf_state);
+
(void) net_instance_unregister(ipfncb);
net_instance_free(ipfncb);
diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c
index 2e08dc359b..1009f0700f 100644
--- a/usr/src/uts/common/inet/squeue.c
+++ b/usr/src/uts/common/inet/squeue.c
@@ -23,7 +23,7 @@
*/
/*
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -61,6 +61,10 @@
* connection are processed on that squeue. The connection ("conn") to
* squeue mapping is stored in "conn_t" member "conn_sqp".
*
+ * If the squeue is not related to TCP/IP, then the value of sqp->sq_isip is
+ * false and it will not have an associated conn_t, which means many aspects of
+ * the system, such as polling and swtiching squeues will not be used.
+ *
* Since the processing of the connection cuts across multiple layers
* but still allows packets for different connnection to be processed on
* other CPU/squeues, squeues are also termed as "Vertical Perimeter" or
@@ -244,7 +248,7 @@ squeue_init(void)
/* ARGSUSED */
squeue_t *
-squeue_create(clock_t wait, pri_t pri)
+squeue_create(clock_t wait, pri_t pri, boolean_t isip)
{
squeue_t *sqp = kmem_cache_alloc(squeue_cache, KM_SLEEP);
@@ -260,11 +264,36 @@ squeue_create(clock_t wait, pri_t pri)
sqp->sq_enter = squeue_enter;
sqp->sq_drain = squeue_drain;
+ sqp->sq_isip = isip;
return (sqp);
}
/*
+ * We need to kill the threads and then clean up. We should VERIFY that
+ * polling is disabled so we don't have to worry about disassociating from
+ * MAC/IP/etc.
+ */
+void
+squeue_destroy(squeue_t *sqp)
+{
+ kt_did_t worker, poll;
+ mutex_enter(&sqp->sq_lock);
+ VERIFY(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
+ SQS_POLL_QUIESCE_DONE | SQS_PAUSE | SQS_EXIT)));
+ worker = sqp->sq_worker->t_did;
+ poll = sqp->sq_poll_thr->t_did;
+ sqp->sq_state |= SQS_EXIT;
+ cv_signal(&sqp->sq_poll_cv);
+ cv_signal(&sqp->sq_worker_cv);
+ mutex_exit(&sqp->sq_lock);
+
+ thread_join(poll);
+ thread_join(worker);
+ kmem_cache_free(squeue_cache, sqp);
+}
+
+/*
* Bind squeue worker thread to the specified CPU, given by CPU id.
* If the CPU id value is -1, bind the worker thread to the value
* specified in sq_bind field. If a thread is already bound to a
@@ -475,18 +504,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
* Handle squeue switching. More details in the
* block comment at the top of the file
*/
- if (connp->conn_sqp == sqp) {
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
SQUEUE_DBG_SET(sqp, mp, proc, connp,
tag);
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
SQUEUE_DBG_CLEAR(sqp);
- CONN_DEC_REF(connp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -513,7 +545,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
return;
}
} else {
- if (ira != NULL) {
+ if (sqp->sq_isip == B_TRUE && ira != NULL) {
mblk_t *attrmp;
ASSERT(cnt == 1);
@@ -587,7 +619,8 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
if (!(sqp->sq_state & SQS_REENTER) &&
(process_flag != SQ_FILL) && (sqp->sq_first == NULL) &&
(sqp->sq_run == curthread) && (cnt == 1) &&
- (connp->conn_on_sqp == B_FALSE)) {
+ (sqp->sq_isip == B_FALSE ||
+ connp->conn_on_sqp == B_FALSE)) {
sqp->sq_state |= SQS_REENTER;
mutex_exit(&sqp->sq_lock);
@@ -602,15 +635,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
* Handle squeue switching. More details in the
* block comment at the top of the file
*/
- if (connp->conn_sqp == sqp) {
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
+ SQUEUE_DBG_SET(sqp, mp, proc, connp,
+ tag);
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
- CONN_DEC_REF(connp);
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
+ SQUEUE_DBG_CLEAR(sqp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -631,7 +670,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
#ifdef DEBUG
mp->b_tag = tag;
#endif
- if (ira != NULL) {
+ if (sqp->sq_isip && ira != NULL) {
mblk_t *attrmp;
ASSERT(cnt == 1);
@@ -779,7 +818,7 @@ again:
mp->b_prev = NULL;
/* Is there an ip_recv_attr_t to handle? */
- if (ip_recv_attr_is_mblk(mp)) {
+ if (sqp->sq_isip == B_TRUE && ip_recv_attr_is_mblk(mp)) {
mblk_t *attrmp = mp;
ASSERT(attrmp->b_cont != NULL);
@@ -804,20 +843,25 @@ again:
/*
- * Handle squeue switching. More details in the
- * block comment at the top of the file
+ * Handle squeue switching. More details in the block comment at
+ * the top of the file. non-IP squeues cannot switch, as there
+ * is no conn_t.
*/
- if (connp->conn_sqp == sqp) {
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
SQUEUE_DBG_SET(sqp, mp, proc, connp,
mp->b_tag);
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
- CONN_DEC_REF(connp);
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
+ SQUEUE_DBG_CLEAR(sqp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira,
SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -1051,6 +1095,11 @@ squeue_polling_thread(squeue_t *sqp)
cv_wait(async, lock);
CALLB_CPR_SAFE_END(&cprinfo, lock);
+ if (sqp->sq_state & SQS_EXIT) {
+ mutex_exit(lock);
+ thread_exit();
+ }
+
ctl_state = sqp->sq_state & (SQS_POLL_THR_CONTROL |
SQS_POLL_THR_QUIESCED);
if (ctl_state != 0) {
@@ -1076,6 +1125,9 @@ squeue_polling_thread(squeue_t *sqp)
(SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
(SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
+ /* Only IP related squeues should reach this point */
+ VERIFY(sqp->sq_isip == B_TRUE);
+
poll_again:
sq_rx_ring = sqp->sq_rx_ring;
sq_get_pkts = sq_rx_ring->rr_rx;
@@ -1205,6 +1257,7 @@ squeue_worker_thr_control(squeue_t *sqp)
ill_rx_ring_t *rx_ring;
ASSERT(MUTEX_HELD(&sqp->sq_lock));
+ VERIFY(sqp->sq_isip == B_TRUE);
if (sqp->sq_state & SQS_POLL_RESTART) {
/* Restart implies a previous quiesce. */
@@ -1316,6 +1369,11 @@ squeue_worker(squeue_t *sqp)
for (;;) {
for (;;) {
+ if (sqp->sq_state & SQS_EXIT) {
+ mutex_exit(lock);
+ thread_exit();
+ }
+
/*
* If the poll thread has handed control to us
* we need to break out of the wait.
@@ -1412,6 +1470,7 @@ squeue_synch_enter(conn_t *connp, mblk_t *use_mp)
again:
sqp = connp->conn_sqp;
+ VERIFY(sqp->sq_isip == B_TRUE);
mutex_enter(&sqp->sq_lock);
if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) {
@@ -1487,6 +1546,7 @@ void
squeue_synch_exit(conn_t *connp)
{
squeue_t *sqp = connp->conn_sqp;
+ VERIFY(sqp->sq_isip == B_TRUE);
mutex_enter(&sqp->sq_lock);
if (sqp->sq_run == curthread) {
diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h
index 1b20e40aca..3488630f2c 100644
--- a/usr/src/uts/common/inet/tcp_impl.h
+++ b/usr/src/uts/common/inet/tcp_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent Inc. All rights reserved.
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
@@ -61,9 +61,9 @@ extern sock_downcalls_t sock_tcp_downcalls;
* by setting it to 0.
*/
#define TCP_XMIT_LOWATER 4096
-#define TCP_XMIT_HIWATER 49152
+#define TCP_XMIT_HIWATER 128000
#define TCP_RECV_LOWATER 2048
-#define TCP_RECV_HIWATER 128000
+#define TCP_RECV_HIWATER 1048576
/*
* Bind hash list size and has function. It has to be a power of 2 for
diff --git a/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c b/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c
index 94323582d6..c2c16e848b 100644
--- a/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c
+++ b/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
/*
* av1394 asynchronous module
@@ -359,9 +361,10 @@ av1394_async_poll(av1394_inst_t *avp, short events, int anyyet, short *reventsp,
AV1394_TNF_ENTER(av1394_async_poll);
if (events & POLLIN) {
- if (av1394_peekq(rq)) {
+ if (av1394_peekq(rq))
*reventsp |= POLLIN;
- } else if (!anyyet) {
+
+ if ((!*reventsp && !anyyet) || (events & POLLET)) {
mutex_enter(&ap->a_mutex);
ap->a_pollevents |= POLLIN;
*phpp = &ap->a_pollhead;
diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c
index 00545d2c03..a39110255a 100644
--- a/usr/src/uts/common/io/aggr/aggr_port.c
+++ b/usr/src/uts/common/io/aggr/aggr_port.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
/*
@@ -528,8 +529,13 @@ aggr_port_promisc(aggr_port_t *port, boolean_t on)
if (on) {
mac_rx_clear(port->lp_mch);
+ /* We use the promisc callback because without hardware
+ * rings, we deliver through flows that will cause duplicate
+ * delivery of packets when we've flipped into this mode
+ * to compensate for the lack of hardware MAC matching
+ */
rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL,
- aggr_recv_cb, port, &port->lp_mphp,
+ aggr_recv_promisc_cb, port, &port->lp_mphp,
MAC_PROMISC_FLAGS_NO_TX_LOOP);
if (rc != 0) {
mac_rx_set(port->lp_mch, aggr_recv_cb, port);
diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c
index 2bdb7872e3..0dfe234b70 100644
--- a/usr/src/uts/common/io/aggr/aggr_recv.c
+++ b/usr/src/uts/common/io/aggr/aggr_recv.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
/*
@@ -68,16 +69,27 @@ aggr_recv_lacp(aggr_port_t *port, mac_resource_handle_t mrh, mblk_t *mp)
/*
* Callback function invoked by MAC service module when packets are
- * made available by a MAC port.
+ * made available by a MAC port, both in promisc_on mode and not.
*/
/* ARGSUSED */
-void
-aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback)
+static void
+aggr_recv_path_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback, boolean_t promisc_path)
{
aggr_port_t *port = (aggr_port_t *)arg;
aggr_grp_t *grp = port->lp_grp;
+ /* In the case where lp_promisc_on has been turned on to
+ * compensate for insufficient hardware MAC matching and
+ * hardware rings are not in use we will fall back to
+ * using flows for delivery which can result in duplicates
+ * pushed up the stack. Only respect the chosen path.
+ */
+ if (port->lp_promisc_on != promisc_path) {
+ freemsgchain(mp);
+ return;
+ }
+
if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
aggr_mac_rx(grp->lg_mh, mrh, mp);
} else {
@@ -161,3 +173,19 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
}
}
}
+
+/* ARGSUSED */
+void
+aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_FALSE);
+}
+
+/* ARGSUSED */
+void
+aggr_recv_promisc_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_TRUE);
+}
diff --git a/usr/src/uts/common/io/axf/ax88172reg.h b/usr/src/uts/common/io/axf/ax88172reg.h
new file mode 100644
index 0000000000..8ca6ebc187
--- /dev/null
+++ b/usr/src/uts/common/io/axf/ax88172reg.h
@@ -0,0 +1,163 @@
+/*
+ * @(#)ax88172reg.h 1.1 09/06/15
+ * Macro definitions for ASIX AX88172 USB to fast ethernet controler
+ * based on ASIX AX88172/88772 data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+#ifndef __AX88172_H__
+#define __AX88172_H__
+
+/*
+ * Vendor command definitions
+ */
+#define VCMD_READ_SRAM 0x02
+#define VCMD_WRITE_RXSRAM 0x03
+#define VCMD_WRITE_TXSRAM 0x04
+#define VCMD_SOFTWARE_MII_OP 0x06
+#define VCMD_READ_MII_REG 0x07
+#define VCMD_WRITE_MII_REG 0x08
+#define VCMD_READ_MII_OPMODE 0x09
+#define VCMD_HARDWARE_MII_OP 0x0a
+#define VCMD_READ_SROM 0x0b
+#define VCMD_WRITE_SROM 0x0c
+#define VCMD_WRITE_SROM_ENABLE 0x0d
+#define VCMD_WRITE_SROM_DISABLE 0x0e
+#define VCMD_READ_RXCTRL 0x0f
+#define VCMD_WRITE_RXCTRL 0x10
+#define VCMD_READ_IPGS 0x11
+#define VCMD_WRITE_IPG 0x12
+#define VCMD_WRITE_IPG1 0x13
+#define VCMD_WRITE_IPG2 0x14
+#define VCMD_READ_MCAST_FILTER 0x15
+#define VCMD_WRITE_MCAST_FILTER 0x16
+#define VCMD_READ_NODE_ID 0x17
+#define VCMD_READ_PHY_IDS 0x19
+#define VCMD_READ_MEDIUM_STATUS 0x1a
+#define VCMD_WRITE_MEDIUM_STATUS 0x1b
+#define VCMD_SET_MONITOR_MODE 0x1c
+#define VCMD_GET_MONITOR_MODE 0x1d
+#define VCMD_READ_GPIO 0x1e
+#define VCMD_WRITE_GPIO 0x1f
+
+/* ax88772 only, currently not supported */
+#define VCMD_WRITE_IPGS_88772 0x12
+#define VCMD_READ_NODE_ID_88772 0x13
+#define VCMD_WRITE_NODE_ID_88772 0x14
+#define VCMD_WRITE_TEST_REG_88772 0x17
+#define VCMD_SOFTWARE_RESET_88772 0x20
+#define VCMD_READ_PHY_SELECT_88772 0x21
+#define VCMD_WRITE_PHY_SELECT_88772 0x22
+
+
+/*
+ * Register definitions
+ */
+
+/* Rx control register */
+#define RCR_SO 0x80 /* Start Operation */
+#define RCR_AP_88772 0x20 /* accept physical address from mcast filter */
+#define RCR_AM 0x10 /* accept multicast address */
+#define RCR_AB 0x08 /* accept broadcast address */
+#define RCR_SEP 0x04 /* save error packet */
+#define RCR_AMALL 0x02 /* accept all multicast address */
+#define RCR_PRO 0x01 /* promiscious, all frames received */
+
+#define RCR_MFB 0x0300
+#define RCR_MFB_SHIFT 8
+#define RCR_MFB_2K (0U << RCR_MFB_SHIFT)
+#define RCR_MFB_4K (1U << RCR_MFB_SHIFT)
+#define RCR_MFB_8K (2U << RCR_MFB_SHIFT)
+#define RCR_MFB_16K (3U << RCR_MFB_SHIFT)
+
+#define RCR_BITS \
+ "\020" \
+ "\010SO" \
+ "\006AP" \
+ "\005AM" \
+ "\004AB" \
+ "\003SEP" \
+ "\002AMALL" \
+ "\001PRO"
+
+/* Medium status register */
+#define MSR_SM 0x1000 /* super mac support */
+#define MSR_SBP 0x0800 /* stop backpressure */
+#define MSR_PS 0x0200 /* port speed in mii mode */
+#define MSR_RE 0x0100 /* rx enable */
+#define MSR_PF 0x0080 /* check only length/type for pause frame */
+#define MSR_JFE 0x0040 /* jumbo frame enable */
+#define MSR_TFC 0x0020 /* tx flow control enable */
+#define MSR_RFC 0x0010 /* rx flow control enable (178) */
+#define MSR_FCEN 0x0010 /* flow control enable (172/772) */
+#define MSR_ENCK 0x0008 /* Enable GTX_CLK and TXC clock output (178) */
+#define MSR_TXABT 0x0004 /* Tx abort allow, always set */
+#define MSR_FDPX 0x0002 /* full duplex */
+#define MSR_GM 0x0001 /* Gigabit mode (178) */
+
+#define MSR_BITS \
+ "\020" \
+ "\015SM" \
+ "\014SBP" \
+ "\012PS" \
+ "\011RE" \
+ "\005FCEN" \
+ "\004ENCK" \
+ "\003TXABT" \
+ "\002FDPX" \
+ "\001GM"
+
+/* monitor mode register */
+#define MMR_RWMP 0x04 /* remote wakeup by magic pkt */
+#define MMR_RWLU 0x02 /* remote wakeup by linkup */
+#define MMR_MOM 0x01 /* monitor mode 1:en, 0:dis */
+
+#define MMR_BITS \
+ "\020" \
+ "\003RWMP" \
+ "\002RWLU" \
+ "\001MOM"
+
+/* GPIO register */
+#define GPIO_RSE 0x80 /* reload serial eeprom (88772)*/
+#define GPIO_DATA2 0x20
+#define GPIO_EN2 0x10
+#define GPIO_DATA1 0x08
+#define GPIO_EN1 0x04
+#define GPIO_DATA0 0x02
+#define GPIO_EN0 0x01
+
+#define GPIO_BITS \
+ "\020" \
+ "\010RSE" \
+ "\006DATA2" \
+ "\005EN2" \
+ "\004DATA1" \
+ "\003EN1" \
+ "\002DATA0" \
+ "\001EN0"
+
+/* Software reset register */
+#define SWRST_IPPD 0x40 /* internal phy power down control */
+#define SWRST_IPRL 0x20 /* internal phy reset control */
+#define SWRST_BZ 0x10 /* force Bulk In to return zero-length pkt */
+#define SWRST_PRL 0x08 /* external phy reset pin level */
+#define SWRST_PRTE 0x04 /* external phy tri-state enable */
+#define SWRST_RT 0x02 /* clear frame length error for Bulk-Out */
+#define SWRST_RR 0x01 /* clear frame length error for Bulk-In */
+
+#define SWRST_BITS \
+ "\020" \
+ "\007IPPD" \
+ "\006IPRL" \
+ "\005BZ" \
+ "\004PRL" \
+ "\003PRTE" \
+ "\002RT" \
+ "\001RR"
+
+/* Software PHY Select Status register */
+#define SPSS_ASEL 0x02 /* 1:auto select 0:manual select */
+#define SPSS_PSEL 0x01 /* 1:intenal phy, 0:external (when ASEL=0) */
+
+#endif /* __AX88172_H__ */
diff --git a/usr/src/uts/common/io/axf/axf_usbgem.c b/usr/src/uts/common/io/axf/axf_usbgem.c
new file mode 100644
index 0000000000..28963f6849
--- /dev/null
+++ b/usr/src/uts/common/io/axf/axf_usbgem.c
@@ -0,0 +1,1539 @@
+/*
+ * axf_usbgem.c : ASIX AX88172/772 USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2004-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "@(#)axf_usbgem.c 1.3 12/02/09"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ * handle RXMODE_ENABLE in set_rx_filter()
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "ax88172reg.h"
+
+char ident[] = "ax88x72 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+#define LE16P(p) ((((uint8_t *)(p))[1] << 8) | ((uint8_t *)(p))[0])
+
+#define AX88172(dp) \
+ (((struct axf_dev *)(dp)->private)->chip->type == CHIP_TYPE_AX88172)
+
+#define AX88772(dp) \
+ (((struct axf_dev *)(dp)->private)->chip->type == CHIP_TYPE_AX88772)
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int axf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (axf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for ax88172
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * RX/TX buffer size
+ */
+
+/*
+ * Local device definitions
+ */
+struct chip_info {
+ uint16_t vid; /* usb vendor id */
+ uint16_t pid; /* usb product id */
+ int type;
+ uint8_t gpio_reset[2];
+ uint8_t gpio_speed[2];
+ uint8_t gpio_duplex[2];
+ char *name;
+#define CHIP_TYPE_AX88172 0
+#define CHIP_TYPE_AX88772 1
+#define CHIP_TYPE_AX88178 2
+};
+
+#define GPIO_DEFAULT {0x00, 0x15}, {0, 0}, {0, 0}
+struct chip_info chiptbl_88x7x[] = {
+/* AX88172 */
+{
+ /* Planex UE2-100TX, Hawking UF200, TrendNet TU2-ET100 */
+ 0x07b8, 0x420a, CHIP_TYPE_AX88172,
+
+ /*
+ * the default setting covers below:
+ * gpio bit2 has to be 0 and gpio bit0 has to be 1
+ */
+ {0, 0},
+ {GPIO_EN1, GPIO_DATA1 | GPIO_EN1},
+ {0, 0},
+ "Planex UE2-100TX", /* tested */
+},
+{
+ 0x2001, 0x1a00, CHIP_TYPE_AX88172,
+ {0x9f, 0x9e}, {0, 0}, {0, 0},
+ "D-Link dube100", /* XXX */
+},
+{
+ 0x077b, 0x2226, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Linksys USB200M",
+},
+{
+ 0x0846, 0x1040, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Netgear FA120",
+},
+{
+ 0x0b95, 0x1720, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Intellinet, ST Lab USB Ethernet",
+},
+{
+ 0x08dd, 0x90ff, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Billionton Systems, USB2AR",
+},
+{
+ 0x0557, 0x2009, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "ATEN UC210T",
+},
+{
+ 0x0411, 0x003d, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Buffalo LUA-U2-KTX",
+},
+{
+ 0x6189, 0x182d, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Sitecom LN-029 USB 2.0 10/100 Ethernet adapter",
+},
+{
+ 0x07aa, 0x0017, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "corega FEther USB2-TX",
+},
+{
+ 0x1189, 0x0893, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Surecom EP-1427X-2",
+},
+{
+ 0x1631, 0x6200, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "goodway corp usb gwusb2e",
+},
+/* AX88772 and AX88178 */
+{
+ 0x13b1, 0x0018, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "Linksys USB200M rev.2",
+},
+{
+ 0x1557, 0x7720, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "0Q0 cable ethernet",
+},
+{
+ 0x07d1, 0x3c05, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "DLink DUB E100 ver B1",
+},
+{
+ 0x2001, 0x3c05, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "DLink DUB E100 ver B1(2)",
+},
+{
+ 0x05ac, 0x1402, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "Apple Ethernet USB Adapter",
+},
+{
+ 0x1737, 0x0039, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Linksys USB1000",
+},
+{
+ 0x0411, 0x006e, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Buffalo LUA-U2-KGT/LUA-U2-GT",
+},
+{
+ 0x04bb, 0x0930, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "I/O DATA ETG-US2",
+},
+{
+ 0x050d, 0x5055, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Belkin F5D5055",
+},
+{
+ /* generic ax88772 must be the last entry */
+ /* planex UE-200TX-G */
+ 0x0b95, 0x7720, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "ASIX AX88772/AX88178", /* tested */
+},
+};
+
+#define CHIPTABLESIZE (sizeof (chiptbl_88x7x) / sizeof (struct chip_info))
+
+struct axf_dev {
+ /*
+ * Misc HW information
+ */
+ struct chip_info *chip;
+ uint8_t ipg[3];
+ uint8_t gpio;
+ uint16_t rcr;
+ uint16_t msr;
+ uint8_t last_link_state;
+ boolean_t phy_has_reset;
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t axf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void axf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int axf_reset_chip(struct usbgem_dev *);
+static int axf_init_chip(struct usbgem_dev *);
+static int axf_start_chip(struct usbgem_dev *);
+static int axf_stop_chip(struct usbgem_dev *);
+static int axf_set_media(struct usbgem_dev *);
+static int axf_set_rx_filter(struct usbgem_dev *);
+static int axf_get_stats(struct usbgem_dev *);
+static void axf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* packet operations */
+static mblk_t *axf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *axf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUT(dp, req, val, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ (req), \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define IN(dp, req, val, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ (req), \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+axf_reset_phy(struct usbgem_dev *dp)
+{
+ uint8_t phys[2];
+ uint8_t val8;
+ int err;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (AX88172(dp)) {
+ delay(drv_usectohz(5000));
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &val8, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: gpio 0x%b",
+ dp->name, __func__, val8, GPIO_BITS));
+
+ /* reset MII PHY */
+ val8 = lp->chip->gpio_reset[1]
+ | lp->chip->gpio_speed[dp->speed]
+ | lp->chip->gpio_duplex[dp->full_duplex];
+
+ OUT(dp, VCMD_WRITE_GPIO,
+ val8, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(5000));
+
+ val8 = lp->chip->gpio_reset[0]
+ | lp->chip->gpio_speed[dp->speed]
+ | lp->chip->gpio_duplex[dp->full_duplex];
+
+ OUT(dp, VCMD_WRITE_GPIO,
+ val8, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(5000));
+ } else {
+ lp->gpio = GPIO_RSE | GPIO_DATA2 | GPIO_EN2;
+ OUT(dp, VCMD_WRITE_GPIO, lp->gpio, 0,
+ 0, NULL, &err, usberr);
+ drv_usecwait(1000);
+
+ OUT(dp, VCMD_WRITE_PHY_SELECT_88772,
+ dp->mii_phy_addr == 16 ? 1 : 0, 0, 0, NULL, &err, usberr);
+
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ SWRST_IPPD | SWRST_PRL, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(150*1000));
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ 0, 0, 0, NULL, &err, usberr);
+
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ dp->mii_phy_addr == 16 ? SWRST_IPRL : SWRST_PRTE,
+ 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(150*1000));
+ }
+
+
+ return (USB_SUCCESS);
+
+usberr:
+ return (USB_FAILURE);
+}
+
+static int
+axf_reset_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ if (AX88172(dp)) {
+ /* there are no ways to reset nic */
+ return (USB_SUCCESS);
+ }
+#ifdef NEVER
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ SWRST_RR | SWRST_RT, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ 0, 0, 0, NULL, &err, usberr);
+usberr:
+#endif
+ return (err);
+}
+
+/*
+ * Setup ax88172
+ */
+static int
+axf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err = USB_SUCCESS;
+ uint16_t reg;
+ uint8_t buf[2];
+ uint16_t tmp16;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* rx conrol register: read default value */
+ if (!AX88172(dp)) {
+ /* clear rx control */
+ OUT(dp, VCMD_WRITE_RXCTRL, 0, 0, 0, NULL, &err, usberr);
+ }
+
+ IN(dp, VCMD_READ_RXCTRL, 0, 0, 2, buf, &err, usberr);
+ lp->rcr = LE16P(buf);
+ DPRINTF(0, (CE_CONT, "!%s: %s: rcr(default):%b",
+ dp->name, __func__, lp->rcr, RCR_BITS));
+
+ lp->rcr &= ~RCR_SO;
+
+ /* Media status register */
+ if (AX88172(dp)) {
+#ifdef notdef
+ lp->msr = MSR_TXABT;
+#else
+ lp->msr = 0;
+#endif
+ } else {
+ lp->msr = MSR_RE | MSR_TXABT;
+ }
+ DPRINTF(0, (CE_CONT, "!%s: %s: msr:%b",
+ dp->name, __func__, lp->msr, MSR_BITS));
+ err = axf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+
+ /* write IPG0-2 registers */
+ if (AX88172(dp)) {
+ OUT(dp, VCMD_WRITE_IPG, lp->ipg[0], 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_IPG1, lp->ipg[1], 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_IPG2, lp->ipg[2], 0, 0, NULL, &err, usberr);
+ } else {
+ /* EMPTY */
+ }
+#ifdef ENABLE_RX_IN_INIT_CHIP
+ /* enable Rx */
+ lp->rcr |= RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+axf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+#ifndef ENABLE_RX_IN_INIT_CHIP
+ /* enable Rx */
+ lp->rcr |= RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+#endif
+ return (err);
+}
+
+static int
+axf_stop_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+
+ /* Disable Rx */
+ lp->rcr &= ~RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+
+ /*
+ * Restore factory mac address
+ * if we have changed current mac address
+ */
+ if (!AX88172(dp) &&
+ bcmp(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet,
+ ETHERADDRL) != 0) {
+ OUT(dp, VCMD_WRITE_NODE_ID_88772, 0, 0,
+ ETHERADDRL, dp->cur_addr.ether_addr_octet, &err, usberr);
+ }
+usberr:
+ return (axf_reset_chip(dp));
+}
+
+static int
+axf_get_stats(struct usbgem_dev *dp)
+{
+ /* EMPTY */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+axf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_be(addr) >> (32 - 6));
+}
+
+static int
+axf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t mode;
+ uint8_t mhash[8];
+ uint8_t buf[2];
+ uint_t h;
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & RCR_SO) {
+ /* set promiscuous mode before changing it. */
+ OUT(dp, VCMD_WRITE_RXCTRL,
+ lp->rcr | RCR_PRO, 0, 0, NULL, &err, usberr);
+ }
+
+ lp->rcr &= ~(RCR_AP_88772 | RCR_AM | RCR_SEP | RCR_AMALL | RCR_PRO);
+ mode = RCR_AB; /* accept broadcast packets */
+
+ bzero(mhash, sizeof (mhash));
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_PRO;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 32) {
+ /* accept all multicast packets */
+ mode |= RCR_AMALL;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ mode |= RCR_AM;
+ for (i = 0; i < dp->mc_count; i++) {
+ h = dp->mc_list[i].hash;
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+ if (AX88172(dp)) {
+ if (bcmp(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet, ETHERADDRL) != 0) {
+ /*
+ * we use promiscious mode instead of changing the
+ * mac address in ax88172
+ */
+ mode |= RCR_PRO;
+ }
+ } else {
+ OUT(dp, VCMD_WRITE_NODE_ID_88772, 0, 0,
+ ETHERADDRL, dp->cur_addr.ether_addr_octet, &err, usberr);
+ }
+ lp->rcr |= mode;
+
+ /* set multicast hash table */
+ if (mode & RCR_AM) {
+ /* need to set up multicast hash table */
+ OUT(dp, VCMD_WRITE_MCAST_FILTER, 0, 0,
+ sizeof (mhash), mhash, &err, usberr);
+ }
+
+ /* update rcr */
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0,
+ 0, NULL, &err, usberr);
+
+#if DEBUG_LEVEL > 1
+ /* verify rxctrl reg */
+ IN(dp, VCMD_READ_RXCTRL, 0, 0, 2, buf, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, LE16P(buf), RCR_BITS);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+axf_set_media(struct usbgem_dev *dp)
+{
+ uint8_t val8;
+ uint8_t gpio;
+ uint8_t gpio_old;
+ int err = USB_SUCCESS;
+ uint16_t msr;
+ struct axf_dev *lp = dp->private;
+
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &gpio, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called, gpio:%b",
+ dp->name, __func__, gpio, GPIO_BITS));
+
+ msr = lp->msr;
+ gpio_old = gpio;
+ gpio = lp->chip->gpio_reset[0];
+
+ /* setup speed */
+ if (AX88172(dp)) {
+ /* EMPTY */
+ } else {
+ msr &= ~(MSR_PS | MSR_GM | MSR_ENCK);
+
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ msr |= MSR_GM | MSR_ENCK;
+ break;
+
+ case USBGEM_SPD_100:
+ msr |= MSR_PS;
+ break;
+
+ case USBGEM_SPD_10:
+ break;
+ }
+ }
+ gpio |= lp->chip->gpio_speed[dp->speed == USBGEM_SPD_100 ? 1 : 0];
+
+ /* select duplex */
+ msr &= ~MSR_FDPX;
+ if (dp->full_duplex) {
+ msr |= MSR_FDPX;
+
+ /* select flow control */
+ if (AX88172(dp)) {
+ msr &= ~MSR_FCEN;
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_TX_PAUSE:
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_RX_PAUSE:
+ msr |= MSR_FCEN;
+ break;
+ }
+ } else {
+ msr &= ~(MSR_RFC | MSR_TFC);
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_TX_PAUSE:
+ msr |= MSR_TFC;
+ break;
+
+ case FLOW_CONTROL_SYMMETRIC:
+ msr |= MSR_TFC | MSR_RFC;
+ break;
+
+ case FLOW_CONTROL_RX_PAUSE:
+ msr |= MSR_RFC;
+ break;
+ }
+ }
+ }
+ gpio |= lp->chip->gpio_duplex[dp->full_duplex ? 1 : 0];
+
+ /* update medium status register */
+ lp->msr = msr;
+ OUT(dp, VCMD_WRITE_MEDIUM_STATUS, lp->msr, 0,
+ 0, NULL, &err, usberr);
+
+ if (gpio != gpio_old) {
+ /* LED control required for some products */
+ OUT(dp, VCMD_WRITE_GPIO,
+ gpio, 0, 0, NULL, &err, usberr);
+ }
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+#define FILL_PKT_HEADER(bp, len) { \
+ (bp)[0] = (uint8_t)(len); \
+ (bp)[1] = (uint8_t)((len) >> 8); \
+ (bp)[2] = (uint8_t)(~(len)); \
+ (bp)[3] = (uint8_t)((~(len)) >> 8); \
+}
+
+#define PKT_HEADER_SIZE 4
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+axf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int n;
+ size_t len;
+ size_t pkt_size;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ uint_t align_mask;
+ size_t header_size;
+ int pad_size;
+
+ len = msgdsize(mp);
+
+ if (AX88172(dp)) {
+#ifdef notdef
+ align_mask = 63;
+#else
+ align_mask = 511;
+#endif
+ header_size = 0;
+
+ if (len >= ETHERMIN && mp->b_cont == NULL &&
+ (len & align_mask) != 0) {
+ /* use the mp "as is" */
+ return (mp);
+ }
+ } else {
+ align_mask = 511;
+ header_size = PKT_HEADER_SIZE;
+ }
+
+ /*
+ * re-allocate the mp
+ */
+ /* minimum ethernet packet size of ETHERMIN */
+ pkt_size = max(len, ETHERMIN);
+
+ if (((pkt_size + header_size) & align_mask) == 0) {
+ /* padding is required in usb communication */
+ pad_size = PKT_HEADER_SIZE;
+ } else {
+ pad_size = 0;
+ }
+
+ if ((new = allocb(header_size + pkt_size + pad_size, 0)) == NULL) {
+ return (NULL);
+ }
+
+ bp = new->b_rptr;
+ if (header_size) {
+ uint16_t tmp;
+
+ /* add a header */
+ tmp = (uint16_t)pkt_size;
+ FILL_PKT_HEADER(bp, tmp);
+ bp += header_size;
+ }
+
+ /* copy contents of the buffer */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ n = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, n);
+ bp += n;
+ }
+
+ /* add pads for ethernet packets */
+ last_pos = new->b_rptr + header_size + pkt_size;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ /* add a zero-length pad segment for usb communications */
+ if (pad_size) {
+ /* add a dummy header for zero-length packet */
+ FILL_PKT_HEADER(bp, 0);
+ bp += pad_size;
+ }
+
+ /* close the payload of the packet */
+ new->b_wptr = bp;
+
+ return (new);
+}
+
+static void
+axf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+axf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ mblk_t *tp;
+ int rest;
+
+ if (AX88172(dp)) {
+ return (mp);
+ }
+
+ tp = mp;
+ rest = tp->b_wptr - tp->b_rptr;
+
+ if (rest <= PKT_HEADER_SIZE) {
+ /*
+ * the usb bulk-in frame doesn't include any valid
+ * ethernet packets.
+ */
+ return (NULL);
+ }
+
+ for (; ; ) {
+ uint16_t len;
+ uint16_t cksum;
+
+ /* analyse the header of the received usb frame */
+ len = LE16P(tp->b_rptr + 0);
+ cksum = LE16P(tp->b_rptr + 2);
+
+ /* test if the header is valid */
+ if (len + cksum != 0xffff) {
+ /* discard whole the packet */
+ cmn_err(CE_WARN,
+ "!%s: %s: corrupted header:%04x %04x",
+ dp->name, __func__, len, cksum);
+ return (NULL);
+ }
+#if DEBUG_LEVEL > 0
+ if (len < ETHERMIN || len > ETHERMAX) {
+ cmn_err(CE_NOTE,
+ "!%s: %s: incorrect pktsize:%d",
+ dp->name, __func__, len);
+ }
+#endif
+ /* extract a ethernet packet from the bulk-in frame */
+ tp->b_rptr += PKT_HEADER_SIZE;
+ tp->b_wptr = tp->b_rptr + len;
+
+ if (len & 1) {
+ /*
+ * skip a tailing pad byte if the packet
+ * length is odd
+ */
+ len++;
+ }
+ rest -= len + PKT_HEADER_SIZE;
+
+ if (rest <= PKT_HEADER_SIZE) {
+ /* no more vaild ethernet packets */
+ break;
+ }
+
+#if DEBUG_LEVEL > 10
+ axf_dump_packet(dp, tp->b_wptr, 18);
+#endif
+ /* allocate a mblk_t header for the next ethernet packet */
+ tp->b_next = dupb(mp);
+ tp->b_next->b_rptr = tp->b_rptr + len;
+ tp = tp->b_next;
+ }
+
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+axf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint8_t buf[2];
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+
+ /* switch to software MII operation mode */
+ OUT(dp, VCMD_SOFTWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ /* Read MII register */
+ IN(dp, VCMD_READ_MII_REG, dp->mii_phy_addr, index,
+ 2, buf, errp, usberr);
+
+ /* switch to hardware MII operation mode */
+ OUT(dp, VCMD_HARDWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ return (LE16P(buf));
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+axf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ uint8_t buf[2];
+
+ DPRINTF(4, (CE_CONT, "!%s: %s called, reg:%x val:%x",
+ dp->name, __func__, index, val));
+
+ /* switch software MII operation mode */
+ OUT(dp, VCMD_SOFTWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ /* Write to the specified MII register */
+ buf[0] = (uint8_t)val;
+ buf[1] = (uint8_t)(val >> 8);
+ OUT(dp, VCMD_WRITE_MII_REG, dp->mii_phy_addr, index,
+ 2, buf, errp, usberr);
+
+ /* switch to hardware MII operation mode */
+ OUT(dp, VCMD_HARDWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+usberr:
+ ;
+}
+
+static void
+axf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *bp;
+ struct axf_dev *lp = dp->private;
+
+ bp = mp->b_rptr;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: size:%d, %02x %02x %02x %02x %02x %02x %02x %02x",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]));
+
+ if (lp->last_link_state ^ bp[2]) {
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_link_state = bp[2];
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+#ifdef DEBUG_LEVEL
+static void
+axf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint8_t w0[2], w1[2], w2[2], w3[2];
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+
+ err = USB_SUCCESS;
+
+ for (i = 0; i < size; i += 4) {
+ IN(dp, VCMD_READ_SROM, i + 0, 0, 2, w0, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 1, 0, 2, w1, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 2, 0, 2, w2, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 3, 0, 2, w3, &err, usberr);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i,
+ (w0[1] << 8) | w0[0],
+ (w1[1] << 8) | w1[0],
+ (w2[1] << 8) | w2[0],
+ (w3[1] << 8) | w3[0]);
+ }
+usberr:
+ ;
+}
+#endif
+
+static int
+axf_attach_chip(struct usbgem_dev *dp)
+{
+ uint8_t phys[2];
+ int err;
+ uint_t vcmd;
+ int ret;
+#ifdef CONFIG_FULLSIZE_VLAN
+ uint8_t maxpktsize[2];
+ uint16_t vlan_pktsize;
+#endif
+#ifdef DEBUG_LEVEL
+ uint8_t val8;
+#endif
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s enter", dp->name, __func__));
+
+ ret = USB_SUCCESS;
+ /*
+ * mac address in EEPROM has loaded to ID registers.
+ */
+ vcmd = AX88172(dp) ? VCMD_READ_NODE_ID : VCMD_READ_NODE_ID_88772;
+ IN(dp, vcmd, 0, 0,
+ ETHERADDRL, dp->dev_addr.ether_addr_octet, &err, usberr);
+
+ /*
+ * setup IPG values
+ */
+ lp->ipg[0] = 0x15;
+ lp->ipg[1] = 0x0c;
+ lp->ipg[2] = 0x12;
+
+ /*
+ * We cannot scan phy because the nic returns undefined
+ * value, i.e. remained garbage, when MII phy is not at the
+ * specified index.
+ */
+#ifdef DEBUG_LEVELx
+ if (lp->chip->vid == 0x07b8 && lp->chip->pid == 0x420a) {
+ /*
+ * restore the original phy address of brain
+ * damaged Planex UE2-100TX
+ */
+ OUT(dp, VCMD_WRITE_SROM_ENABLE, 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM, 0x11, 0xe004, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE, 0, 0, 0, NULL, &err, usberr);
+ }
+#endif
+ if (AX88172(dp)) {
+ IN(dp, VCMD_READ_PHY_IDS, 0, 0, 2, &phys, &err, usberr);
+ dp->mii_phy_addr = phys[1];
+ DPRINTF(0, (CE_CONT, "!%s: %s: phys_addr:%d %d",
+ dp->name, __func__, phys[0], phys[1]));
+ } else {
+ /* use built-in phy */
+ dp->mii_phy_addr = 0x10;
+ }
+
+ dp->misc_flag |= USBGEM_VLAN;
+#ifdef CONFIG_FULLSIZE_VLAN
+ if (AX88172(dp) || AX88772(dp)) {
+ /* check max packet size in srom */
+ IN(dp, VCMD_READ_SROM, 0x10, 0, 2, maxpktsize, &err, usberr);
+ vlan_pktsize = ETHERMAX + ETHERFCSL + 4 /* VTAG_SIZE */;
+
+ if (LE16P(maxpktsize) < vlan_pktsize) {
+ cmn_err(CE_CONT,
+ "!%s: %s: max packet size in srom is too small, "
+ "changing %d -> %d, do power cycle for the device",
+ dp->name, __func__,
+ LE16P(maxpktsize), vlan_pktsize);
+ OUT(dp, VCMD_WRITE_SROM_ENABLE,
+ 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM, 0x10,
+ vlan_pktsize, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE,
+ 0, 0, 0, NULL, &err, usberr);
+
+ /* need to power off the device */
+ ret = USB_FAILURE;
+ }
+ }
+#endif
+#ifdef DEBUG_LEVEL
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &val8, &err, usberr);
+ cmn_err(CE_CONT,
+ "!%s: %s: ipg 0x%02x 0x%02x 0x%02x, gpio 0x%b",
+ dp->name, __func__, lp->ipg[0], lp->ipg[1], lp->ipg[2],
+ val8, GPIO_BITS);
+#endif
+ /* fix rx buffer size */
+ if (!AX88172(dp)) {
+ dp->rx_buf_len = 2048;
+ }
+
+#if DEBUG_LEVEL > 0
+ axf_eeprom_dump(dp, 0x20);
+#endif
+ return (ret);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: usb error detected (%d)",
+ dp->name, __func__, err);
+ return (USB_FAILURE);
+}
+
+static boolean_t
+axf_scan_phy(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint16_t val;
+ int phy_addr_saved;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ phy_addr_saved = dp->mii_phy_addr;
+
+ /* special probe routine for unreliable MII addr */
+#define PROBE_PAT \
+ (MII_ABILITY_100BASE_TX_FD | \
+ MII_ABILITY_100BASE_TX | \
+ MII_ABILITY_10BASE_T_FD | \
+ MII_ABILITY_10BASE_T)
+
+ for (i = 0; i < 32; i++) {
+ dp->mii_phy_addr = i;
+ axf_mii_write(dp, MII_AN_ADVERT, 0, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ val = axf_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ if (val != 0) {
+ DPRINTF(0, (CE_CONT, "!%s: %s: index:%d, val %b != 0",
+ dp->name, __func__, i, val, MII_ABILITY_BITS));
+ continue;
+ }
+
+ axf_mii_write(dp, MII_AN_ADVERT, PROBE_PAT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ val = axf_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ if ((val & MII_ABILITY_TECH) != PROBE_PAT) {
+ DPRINTF(0, (CE_CONT, "!%s: %s: "
+ "index:%d, pat:%x != val:%b",
+ dp->name, __func__, i,
+ PROBE_PAT, val, MII_ABILITY_BITS));
+ continue;
+ }
+
+ /* found */
+ dp->mii_phy_addr = phy_addr_saved;
+ return (i);
+ }
+#undef PROBE_PAT
+ if (i == 32) {
+ cmn_err(CE_CONT, "!%s: %s: no mii phy found",
+ dp->name, __func__);
+ } else {
+ cmn_err(CE_CONT, "!%s: %s: i/o error while scanning phy",
+ dp->name, __func__);
+ }
+ dp->mii_phy_addr = phy_addr_saved;
+ return (-1);
+}
+
+static int
+axf_mii_probe(struct usbgem_dev *dp)
+{
+ int my_guess;
+ int err;
+ uint8_t old_11th[2];
+ uint8_t new_11th[2];
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ (void) axf_reset_phy(dp);
+ lp->phy_has_reset = B_TRUE;
+
+ if (AX88172(dp)) {
+ my_guess = axf_scan_phy(dp);
+ if (my_guess >= 0 && my_guess < 32 &&
+ my_guess != dp->mii_phy_addr) {
+ /*
+ * phy addr in srom is wrong, need to fix it
+ */
+ IN(dp, VCMD_READ_SROM,
+ 0x11, 0, 2, old_11th, &err, usberr);
+
+ new_11th[0] = my_guess;
+ new_11th[1] = old_11th[1];
+
+ OUT(dp, VCMD_WRITE_SROM_ENABLE,
+ 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM,
+ 0x11, LE16P(new_11th), 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE,
+ 0, 0, 0, NULL, &err, usberr);
+#if 1
+ /* XXX - read back, but it doesn't work, why? */
+ delay(drv_usectohz(1000*1000));
+ IN(dp, VCMD_READ_SROM,
+ 0x11, 0, 2, new_11th, &err, usberr);
+#endif
+ cmn_err(CE_NOTE, "!%s: %s: phy addr in srom fixed: "
+ "%04x -> %04x",
+ dp->name, __func__,
+ LE16P(old_11th), LE16P(new_11th));
+ return (USBGEM_FAILURE);
+usberr:
+ cmn_err(CE_NOTE,
+ "!%s: %s: failed to patch phy addr, "
+ "current: %04x",
+ dp->name, __func__, LE16P(old_11th));
+ return (USBGEM_FAILURE);
+ }
+ }
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+axf_mii_init(struct usbgem_dev *dp)
+{
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (!lp->phy_has_reset) {
+ (void) axf_reset_phy(dp);
+ }
+
+ /* prepare to reset phy on the next reconnect or resume */
+ lp->phy_has_reset = B_FALSE;
+
+ return (USB_SUCCESS);
+}
+
+static int
+axfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int revid;
+ int unit;
+ int vid;
+ int pid;
+ struct chip_info *p;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct axf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * Check if the chip is supported.
+ */
+ vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-vendor-id", -1);
+ pid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-product-id", -1);
+ revid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-revision-id", -1);
+
+ for (i = 0, p = chiptbl_88x7x; i < CHIPTABLESIZE; i++, p++) {
+ if (p->vid == vid && p->pid == pid) {
+ /* found */
+ cmn_err(CE_CONT, "!%s%d: %s "
+ "(vid: 0x%04x, did: 0x%04x, revid: 0x%02x)",
+ drv_name, unit, p->name, vid, pid, revid);
+ goto chip_found;
+ }
+ }
+
+ /* Not found */
+ cmn_err(CE_WARN, "!%s: %s: wrong usb venid/prodid (0x%x, 0x%x)",
+ drv_name, __func__, vid, pid);
+
+ /* assume 88772 */
+ p = &chiptbl_88x7x[CHIPTABLESIZE - 1];
+chip_found:
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ /*
+ * softmac requires that ppa is the instance number
+ * of the device, otherwise it hangs in seaching the device.
+ */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 64;
+
+ ugcp->usbgc_rx_header_len = 0;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+
+ /* flow control */
+ /*
+ * XXX - flow control caused link down frequently under
+ * heavy traffic
+ */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = 0;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+ ugcp->usbgc_mii_hw_link_detection = B_TRUE;
+ ugcp->usbgc_mii_stop_mac_on_linkdown = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &axf_attach_chip;
+ ugcp->usbgc_reset_chip = &axf_reset_chip;
+ ugcp->usbgc_init_chip = &axf_init_chip;
+ ugcp->usbgc_start_chip = &axf_start_chip;
+ ugcp->usbgc_stop_chip = &axf_stop_chip;
+ ugcp->usbgc_multicast_hash = &axf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &axf_set_rx_filter;
+ ugcp->usbgc_set_media = &axf_set_media;
+ ugcp->usbgc_get_stats = &axf_get_stats;
+ ugcp->usbgc_interrupt = &axf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &axf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &axf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &axf_mii_probe;
+ ugcp->usbgc_mii_init = &axf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &axf_mii_read;
+ ugcp->usbgc_mii_write = &axf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct axf_dev), KM_SLEEP);
+ lp->chip = p;
+ lp->last_link_state = 0;
+ lp->phy_has_reset = B_FALSE;
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct axf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct axf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+
+ return (DDI_FAILURE);
+}
+
+static int
+axfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(axf_ops, axfattach, axfdetach);
+#else
+static struct module_info axfminfo = {
+ 0, /* mi_idnum */
+ "axf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit axfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &axfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit axfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &axfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab axf_info = {
+ &axfrinit, /* st_rdinit */
+ &axfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_axf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &axf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops axf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ axfattach, /* devo_attach */
+ axfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_axf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &axf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!axf: _init: called"));
+
+ status = usbgem_mod_init(&axf_ops, "axf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&axf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!axf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&axf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/bge/bge_chip2.c b/usr/src/uts/common/io/bge/bge_chip2.c
index f687ce4892..a459f867f3 100644
--- a/usr/src/uts/common/io/bge/bge_chip2.c
+++ b/usr/src/uts/common/io/bge/bge_chip2.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include "bge_impl.h"
@@ -363,7 +363,34 @@ bge_chip_cfg_init(bge_t *bgep, chip_id_t *cidp, boolean_t enable_dma)
if (DEVICE_5717_SERIES_CHIPSETS(bgep))
pci_config_put32(handle, PCI_CONF_BGE_MHCR, 0);
mhcr = pci_config_get32(handle, PCI_CONF_BGE_MHCR);
- cidp->asic_rev = mhcr & MHCR_CHIP_REV_MASK;
+ cidp->asic_rev = (mhcr & MHCR_CHIP_REV_MASK) >> MHCR_CHIP_REV_SHIFT;
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_PRODID) {
+ uint32_t reg;
+ switch (cidp->device) {
+ case DEVICE_ID_5717:
+ case DEVICE_ID_5718:
+ case DEVICE_ID_5719:
+ case DEVICE_ID_5720:
+ reg = PCI_CONF_GEN2_PRODID_ASICREV;
+ break;
+ case DEVICE_ID_57781:
+ case DEVICE_ID_57785:
+ case DEVICE_ID_57761:
+ case DEVICE_ID_57765:
+ case DEVICE_ID_57791:
+ case DEVICE_ID_57795:
+ case DEVICE_ID_57762:
+ case DEVICE_ID_57766:
+ case DEVICE_ID_57782:
+ case DEVICE_ID_57786:
+ reg = PCI_CONF_GEN15_PRODID_ASICREV;
+ break;
+ default:
+ reg = PCI_CONF_PRODID_ASICREV;
+ break;
+ }
+ cidp->asic_rev = pci_config_get32(handle, reg);
+ }
cidp->businfo = pci_config_get32(handle, PCI_CONF_BGE_PCISTATE);
cidp->command = pci_config_get16(handle, PCI_CONF_COMM);
@@ -386,6 +413,45 @@ bge_chip_cfg_init(bge_t *bgep, chip_id_t *cidp, boolean_t enable_dma)
BGE_DEBUG(("bge_chip_cfg_init: clsize %d latency %d command 0x%x",
cidp->clsize, cidp->latency, cidp->command));
+ cidp->chip_type = 0;
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5717 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5719 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5720)
+ cidp->chip_type |= CHIP_TYPE_5717_PLUS;
+
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_57765 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_57766)
+ cidp->chip_type |= CHIP_TYPE_57765_CLASS;
+
+ if (cidp->chip_type & CHIP_TYPE_57765_CLASS ||
+ cidp->chip_type & CHIP_TYPE_5717_PLUS)
+ cidp->chip_type |= CHIP_TYPE_57765_PLUS;
+
+ /* Intentionally exclude ASIC_REV_5906 */
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5755 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5787 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5784 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5761 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5785 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_57780 ||
+ cidp->chip_type & CHIP_TYPE_57765_PLUS)
+ cidp->chip_type |= CHIP_TYPE_5755_PLUS;
+
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5780 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5714)
+ cidp->chip_type |= CHIP_TYPE_5780_CLASS;
+
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5750 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5752 ||
+ MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5906 ||
+ cidp->chip_type & CHIP_TYPE_5755_PLUS ||
+ cidp->chip_type & CHIP_TYPE_5780_CLASS)
+ cidp->chip_type |= CHIP_TYPE_5750_PLUS;
+
+ if (MHCR_CHIP_ASIC_REV(cidp->asic_rev) == MHCR_CHIP_ASIC_REV_5705 ||
+ cidp->chip_type & CHIP_TYPE_5750_PLUS)
+ cidp->chip_type |= CHIP_TYPE_5705_PLUS;
+
/*
* Step 2 (also step 6): disable and clear interrupts.
* Steps 11-13: configure PIO endianness options, and enable
@@ -445,8 +511,9 @@ bge_chip_cfg_init(bge_t *bgep, chip_id_t *cidp, boolean_t enable_dma)
* see whether the host is truly up to date, and regenerate
* its interrupt if not.
*/
- mhcr = MHCR_ENABLE_INDIRECT_ACCESS |
+ mhcr = MHCR_ENABLE_INDIRECT_ACCESS |
MHCR_ENABLE_TAGGED_STATUS_MODE |
+ MHCR_ENABLE_PCI_STATE_WRITE |
MHCR_MASK_INTERRUPT_MODE |
MHCR_CLEAR_INTERRUPT_INTA;
@@ -1896,10 +1963,16 @@ bge_nvmem_id(bge_t *bgep)
case DEVICE_ID_5705_2:
case DEVICE_ID_5717:
case DEVICE_ID_5718:
+ case DEVICE_ID_5719:
+ case DEVICE_ID_5720:
case DEVICE_ID_5724:
+ case DEVICE_ID_57760:
case DEVICE_ID_57780:
+ case DEVICE_ID_57788:
+ case DEVICE_ID_57790:
case DEVICE_ID_5780:
case DEVICE_ID_5782:
+ case DEVICE_ID_5784M:
case DEVICE_ID_5785:
case DEVICE_ID_5787:
case DEVICE_ID_5787M:
@@ -1918,6 +1991,8 @@ bge_nvmem_id(bge_t *bgep)
case DEVICE_ID_5723:
case DEVICE_ID_5761:
case DEVICE_ID_5761E:
+ case DEVICE_ID_5761S:
+ case DEVICE_ID_5761SE:
case DEVICE_ID_5764:
case DEVICE_ID_5714C:
case DEVICE_ID_5714S:
@@ -2023,14 +2098,35 @@ bge_chip_id_init(bge_t *bgep)
cidp->msi_enabled = B_FALSE;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) >
+ MHCR_CHIP_ASIC_REV_PRODID ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5906 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5700 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5701 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5750)
+ /*
+ * Just a plain reset; the "check" code breaks these chips
+ */
+ cidp->flags |= CHIP_FLAG_NO_CHECK_RESET;
+
switch (cidp->device) {
case DEVICE_ID_5717:
case DEVICE_ID_5718:
+ case DEVICE_ID_5719:
+ case DEVICE_ID_5720:
case DEVICE_ID_5724:
if (cidp->device == DEVICE_ID_5717)
cidp->chip_label = 5717;
else if (cidp->device == DEVICE_ID_5718)
cidp->chip_label = 5718;
+ else if (cidp->device == DEVICE_ID_5719)
+ cidp->chip_label = 5719;
+ else if (cidp->device == DEVICE_ID_5720)
+ cidp->chip_label = 5720;
else
cidp->chip_label = 5724;
cidp->msi_enabled = bge_enable_msi;
@@ -2044,7 +2140,7 @@ bge_chip_id_init(bge_t *bgep)
cidp->mbuf_hi_water = MBUF_HIWAT_5717;
cidp->mbuf_base = bge_mbuf_pool_base_5705;
cidp->mbuf_length = bge_mbuf_pool_len_5705;
- cidp->recv_slots = BGE_RECV_SLOTS_5705;
+ cidp->recv_slots = BGE_RECV_SLOTS_5717;
cidp->bge_mlcr_default = MLCR_DEFAULT_5717;
cidp->rx_rings = BGE_RECV_RINGS_MAX_5705;
cidp->tx_rings = BGE_SEND_RINGS_MAX_5705;
@@ -2220,7 +2316,13 @@ bge_chip_id_init(bge_t *bgep)
case DEVICE_ID_5723:
case DEVICE_ID_5761:
case DEVICE_ID_5761E:
+ case DEVICE_ID_5761S:
+ case DEVICE_ID_5761SE:
+ case DEVICE_ID_5784M:
+ case DEVICE_ID_57760:
case DEVICE_ID_57780:
+ case DEVICE_ID_57788:
+ case DEVICE_ID_57790:
cidp->msi_enabled = bge_enable_msi;
/*
* We don't use MSI for BCM5764 and BCM5785, as the
@@ -2234,10 +2336,18 @@ bge_chip_id_init(bge_t *bgep)
cidp->chip_label = 5723;
else if (cidp->device == DEVICE_ID_5764)
cidp->chip_label = 5764;
+ else if (cidp->device == DEVICE_ID_5784M)
+ cidp->chip_label = 5784;
else if (cidp->device == DEVICE_ID_5785)
cidp->chip_label = 5785;
+ else if (cidp->device == DEVICE_ID_57760)
+ cidp->chip_label = 57760;
else if (cidp->device == DEVICE_ID_57780)
cidp->chip_label = 57780;
+ else if (cidp->device == DEVICE_ID_57788)
+ cidp->chip_label = 57788;
+ else if (cidp->device == DEVICE_ID_57790)
+ cidp->chip_label = 57790;
else
cidp->chip_label = 5761;
cidp->bge_dma_rwctrl = bge_dma_rwctrl_5721;
@@ -3401,18 +3511,27 @@ bge_chip_reset(bge_t *bgep, boolean_t enable_dma)
mhcr = MHCR_ENABLE_INDIRECT_ACCESS |
MHCR_ENABLE_TAGGED_STATUS_MODE |
MHCR_MASK_INTERRUPT_MODE |
- MHCR_MASK_PCI_INT_OUTPUT |
MHCR_CLEAR_INTERRUPT_INTA |
MHCR_ENABLE_ENDIAN_WORD_SWAP |
MHCR_ENABLE_ENDIAN_BYTE_SWAP;
+
+ if (bgep->intr_type == DDI_INTR_TYPE_FIXED)
+ mhcr |= MHCR_MASK_PCI_INT_OUTPUT;
+
if (DEVICE_5717_SERIES_CHIPSETS(bgep))
pci_config_put32(bgep->cfg_handle, PCI_CONF_BGE_MHCR,
0);
+#else
+ mhcr = MHCR_ENABLE_INDIRECT_ACCESS |
+ MHCR_ENABLE_TAGGED_STATUS_MODE |
+ MHCR_MASK_INTERRUPT_MODE |
+ MHCR_MASK_PCI_INT_OUTPUT |
+ MHCR_CLEAR_INTERRUPT_INTA;
+#endif
pci_config_put32(bgep->cfg_handle, PCI_CONF_BGE_MHCR, mhcr);
bge_reg_put32(bgep, MEMORY_ARBITER_MODE_REG,
bge_reg_get32(bgep, MEMORY_ARBITER_MODE_REG) |
MEMORY_ARBITER_ENABLE);
-#endif
if (asf_mode == ASF_MODE_INIT) {
bge_asf_pre_reset_operations(bgep, BGE_INIT_RESET);
} else if (asf_mode == ASF_MODE_SHUTDOWN) {
@@ -3436,9 +3555,13 @@ bge_chip_reset(bge_t *bgep, boolean_t enable_dma)
mhcr = MHCR_ENABLE_INDIRECT_ACCESS |
MHCR_ENABLE_TAGGED_STATUS_MODE |
+ MHCR_ENABLE_PCI_STATE_WRITE |
MHCR_MASK_INTERRUPT_MODE |
- MHCR_MASK_PCI_INT_OUTPUT |
MHCR_CLEAR_INTERRUPT_INTA;
+
+ if (bgep->intr_type == DDI_INTR_TYPE_FIXED)
+ mhcr |= MHCR_MASK_PCI_INT_OUTPUT;
+
#ifdef _BIG_ENDIAN
mhcr |= MHCR_ENABLE_ENDIAN_WORD_SWAP | MHCR_ENABLE_ENDIAN_BYTE_SWAP;
#endif /* _BIG_ENDIAN */
@@ -3449,6 +3572,12 @@ bge_chip_reset(bge_t *bgep, boolean_t enable_dma)
if (bgep->asf_enabled)
bgep->asf_wordswapped = B_FALSE;
#endif
+
+ if (DEVICE_IS_5755_PLUS(bgep) ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5752)
+ bge_reg_put32(bgep, GRC_FASTBOOT_PC, 0);
+
/*
* NVRAM Corruption Workaround
*/
@@ -3508,6 +3637,11 @@ bge_chip_reset(bge_t *bgep, boolean_t enable_dma)
#else
modeflags = MODE_WORD_SWAP_FRAME | MODE_BYTE_SWAP_FRAME;
#endif /* _BIG_ENDIAN */
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720)
+ modeflags |=
+ MODE_BYTE_SWAP_B2HRX_DATA | MODE_WORD_SWAP_B2HRX_DATA |
+ MODE_B2HRX_ENABLE | MODE_HTX2B_ENABLE;
#ifdef BGE_IPMI_ASF
if (bgep->asf_enabled)
modeflags |= MODE_HOST_STACK_UP;
@@ -3592,6 +3726,13 @@ bge_chip_reset(bge_t *bgep, boolean_t enable_dma)
*/
bge_reg_put32(bgep, ETHERNET_MAC_MODE_REG, 0);
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720) {
+ uint32_t regval = bge_reg_get32(bgep, CPMU_CLCK_ORIDE_REG);
+ bge_reg_put32(bgep, CPMU_CLCK_ORIDE_REG,
+ regval & ~CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
+ }
+
/*
* Step 21: restore cache-line-size, latency timer, and
* subsystem ID registers to their original values (not
@@ -3818,8 +3959,17 @@ bge_chip_start(bge_t *bgep, boolean_t reset_phys)
/*
* Steps 34-36: enable buffer manager & internal h/w queues
*/
- if (!bge_chip_enable_engine(bgep, BUFFER_MANAGER_MODE_REG,
- STATE_MACHINE_ATTN_ENABLE_BIT))
+
+ regval = STATE_MACHINE_ATTN_ENABLE_BIT;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5719)
+ regval |= BUFF_MGR_NO_TX_UNDERRUN;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5717 ||
+ bgep->chipid.asic_rev == MHCR_CHIP_REV_5719_A0 ||
+ bgep->chipid.asic_rev == MHCR_CHIP_REV_5720_A0)
+ regval |= BUFF_MGR_MBUF_LOW_ATTN_ENABLE;
+ if (!bge_chip_enable_engine(bgep, BUFFER_MANAGER_MODE_REG, regval))
retval = DDI_FAILURE;
if (!bge_chip_enable_engine(bgep, FTQ_RESET_REG, 0))
retval = DDI_FAILURE;
@@ -3913,7 +4063,13 @@ bge_chip_start(bge_t *bgep, boolean_t reset_phys)
/*
* Step 50: configure the IPG et al
*/
- bge_reg_put32(bgep, MAC_TX_LENGTHS_REG, MAC_TX_LENGTHS_DEFAULT);
+ regval = MAC_TX_LENGTHS_DEFAULT;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev)
+ == MHCR_CHIP_ASIC_REV_5720)
+ regval |= bge_reg_get32(bgep, MAC_TX_LENGTHS_REG) &
+ (MAC_TX_LENGTHS_JMB_FRM_LEN_MSK |
+ MAC_TX_LENGTHS_CNT_DWN_VAL_MSK);
+ bge_reg_put32(bgep, MAC_TX_LENGTHS_REG, regval);
/*
* Step 51: configure the default Rx Return Ring
@@ -4068,22 +4224,45 @@ bge_chip_start(bge_t *bgep, boolean_t reset_phys)
retval = DDI_FAILURE;
dma_wrprio = (bge_dma_wrprio << DMA_PRIORITY_SHIFT) |
ALL_DMA_ATTN_BITS;
- if ((MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
- MHCR_CHIP_ASIC_REV_5755) ||
- (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
- MHCR_CHIP_ASIC_REV_5723) ||
- (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
- MHCR_CHIP_ASIC_REV_5906)) {
+ if (DEVICE_IS_5755_PLUS(bgep))
dma_wrprio |= DMA_STATUS_TAG_FIX_CQ12384;
- }
if (!bge_chip_enable_engine(bgep, WRITE_DMA_MODE_REG,
dma_wrprio))
retval = DDI_FAILURE;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5761 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5784 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5785 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_57780 ||
+ DEVICE_IS_57765_PLUS(bgep)) {
+ regval = bge_reg_get32(bgep, READ_DMA_RESERVED_CONTROL_REG);
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5719 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720) {
+ regval &= ~(RDMA_RSRVCTRL_TXMRGN_MASK |
+ RDMA_RSRVCTRL_FIFO_LWM_MASK |
+ RDMA_RSRVCTRL_FIFO_HWM_MASK);
+ regval |= RDMA_RSRVCTRL_TXMRGN_320B |
+ RDMA_RSRVCTRL_FIFO_LWM_1_5K |
+ RDMA_RSRVCTRL_FIFO_HWM_1_5K;
+ }
+ bge_reg_put32(bgep, READ_DMA_RESERVED_CONTROL_REG,
+ regval | RDMA_RSRVCTRL_FIFO_OFLW_FIX);
+ }
if (DEVICE_5723_SERIES_CHIPSETS(bgep) ||
DEVICE_5717_SERIES_CHIPSETS(bgep))
bge_dma_rdprio = 0;
+ regval = bge_dma_rdprio << DMA_PRIORITY_SHIFT;
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720)
+ regval |= bge_reg_get32(bgep, READ_DMA_MODE_REG) &
+ DMA_H2BNC_VLAN_DET;
if (!bge_chip_enable_engine(bgep, READ_DMA_MODE_REG,
- (bge_dma_rdprio << DMA_PRIORITY_SHIFT) | ALL_DMA_ATTN_BITS))
+ regval | ALL_DMA_ATTN_BITS))
retval = DDI_FAILURE;
if (!bge_chip_enable_engine(bgep, RCV_DATA_COMPLETION_MODE_REG,
STATE_MACHINE_ATTN_ENABLE_BIT))
@@ -4116,7 +4295,23 @@ bge_chip_start(bge_t *bgep, boolean_t reset_phys)
* Step 88: download firmware -- doesn't apply
* Steps 89-90: enable Transmit & Receive MAC Engines
*/
- if (!bge_chip_enable_engine(bgep, TRANSMIT_MAC_MODE_REG, 0))
+ if (DEVICE_IS_5755_PLUS(bgep) ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5906) {
+ regval = bge_reg_get32(bgep, TRANSMIT_MAC_MODE_REG);
+ regval |= TRANSMIT_MODE_MBUF_LOCKUP_FIX;
+ } else {
+ regval = 0;
+ }
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720) {
+ regval &= ~(TRANSMIT_MODE_HTX2B_JMB_FRM_LEN |
+ TRANSMIT_MODE_HTX2B_CNT_DN_MODE);
+ regval |= bge_reg_get32(bgep, TRANSMIT_MAC_MODE_REG) &
+ (TRANSMIT_MODE_HTX2B_JMB_FRM_LEN |
+ TRANSMIT_MODE_HTX2B_CNT_DN_MODE);
+ }
+ if (!bge_chip_enable_engine(bgep, TRANSMIT_MAC_MODE_REG, regval))
retval = DDI_FAILURE;
#ifdef BGE_IPMI_ASF
if (!bgep->asf_enabled) {
@@ -4219,7 +4414,6 @@ bge_chip_start(bge_t *bgep, boolean_t reset_phys)
if (bgep->intr_type == DDI_INTR_TYPE_FIXED)
bge_cfg_clr32(bgep, PCI_CONF_BGE_MHCR,
bgep->chipid.mask_pci_int);
-
/*
* All done!
*/
diff --git a/usr/src/uts/common/io/bge/bge_hw.h b/usr/src/uts/common/io/bge/bge_hw.h
index f8e6c4d09a..cfcae929dd 100644
--- a/usr/src/uts/common/io/bge/bge_hw.h
+++ b/usr/src/uts/common/io/bge/bge_hw.h
@@ -23,6 +23,10 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
#ifndef _BGE_HW_H
#define _BGE_HW_H
@@ -68,9 +72,12 @@ extern "C" {
#define DEVICE_ID_5724 0x165c
#define DEVICE_ID_5705M 0x165d
#define DEVICE_ID_5705MA3 0x165e
+#define DEVICE_ID_5719 0x1657
+#define DEVICE_ID_5720 0x165f
#define DEVICE_ID_5705F 0x166e
#define DEVICE_ID_5780 0x166a
#define DEVICE_ID_5782 0x1696
+#define DEVICE_ID_5784M 0x1698
#define DEVICE_ID_5785 0x1699
#define DEVICE_ID_5787 0x169b
#define DEVICE_ID_5787M 0x1693
@@ -92,12 +99,27 @@ extern "C" {
#define DEVICE_ID_5714S 0x1669
#define DEVICE_ID_5715C 0x1678
#define DEVICE_ID_5715S 0x1679
-#define DEVICE_ID_5761E 0x1680
#define DEVICE_ID_5761 0x1681
+#define DEVICE_ID_5761E 0x1680
+#define DEVICE_ID_5761S 0x1688
+#define DEVICE_ID_5761SE 0x1689
#define DEVICE_ID_5764 0x1684
#define DEVICE_ID_5906 0x1712
#define DEVICE_ID_5906M 0x1713
+#define DEVICE_ID_57760 0x1690
#define DEVICE_ID_57780 0x1692
+#define DEVICE_ID_57788 0x1691
+#define DEVICE_ID_57790 0x1694
+#define DEVICE_ID_57781 0x16b1
+#define DEVICE_ID_57785 0x16b5
+#define DEVICE_ID_57761 0x16b0
+#define DEVICE_ID_57765 0x16b4
+#define DEVICE_ID_57791 0x16b2
+#define DEVICE_ID_57795 0x16b6
+#define DEVICE_ID_57762 0x1682
+#define DEVICE_ID_57766 0x1686
+#define DEVICE_ID_57786 0x16b3
+#define DEVICE_ID_57782 0x16b7
#define REVISION_ID_5700_B0 0x10
#define REVISION_ID_5700_B2 0x12
@@ -189,15 +211,23 @@ extern "C" {
#define DEVICE_5717_SERIES_CHIPSETS(bgep) \
(bgep->chipid.device == DEVICE_ID_5717) ||\
(bgep->chipid.device == DEVICE_ID_5718) ||\
+ (bgep->chipid.device == DEVICE_ID_5719) ||\
+ (bgep->chipid.device == DEVICE_ID_5720) ||\
(bgep->chipid.device == DEVICE_ID_5724)
#define DEVICE_5723_SERIES_CHIPSETS(bgep) \
((bgep->chipid.device == DEVICE_ID_5723) ||\
(bgep->chipid.device == DEVICE_ID_5761) ||\
(bgep->chipid.device == DEVICE_ID_5761E) ||\
+ (bgep->chipid.device == DEVICE_ID_5761S) ||\
+ (bgep->chipid.device == DEVICE_ID_5761SE) ||\
(bgep->chipid.device == DEVICE_ID_5764) ||\
+ (bgep->chipid.device == DEVICE_ID_5784M) ||\
(bgep->chipid.device == DEVICE_ID_5785) ||\
- (bgep->chipid.device == DEVICE_ID_57780))
+ (bgep->chipid.device == DEVICE_ID_57760) ||\
+ (bgep->chipid.device == DEVICE_ID_57780) ||\
+ (bgep->chipid.device == DEVICE_ID_57788) ||\
+ (bgep->chipid.device == DEVICE_ID_57790))
#define DEVICE_5714_SERIES_CHIPSETS(bgep) \
((bgep->chipid.device == DEVICE_ID_5714C) ||\
@@ -209,6 +239,20 @@ extern "C" {
((bgep->chipid.device == DEVICE_ID_5906) ||\
(bgep->chipid.device == DEVICE_ID_5906M))
+
+#define CHIP_TYPE_5705_PLUS (1 << 0)
+#define CHIP_TYPE_5750_PLUS (1 << 1)
+#define CHIP_TYPE_5780_CLASS (1 << 2)
+#define CHIP_TYPE_5755_PLUS (1 << 3)
+#define CHIP_TYPE_57765_CLASS (1 << 4)
+#define CHIP_TYPE_57765_PLUS (1 << 5)
+#define CHIP_TYPE_5717_PLUS (1 << 6)
+
+#define DEVICE_IS_57765_PLUS(bgep) \
+ (bgep->chipid.chip_type & CHIP_TYPE_57765_PLUS)
+#define DEVICE_IS_5755_PLUS(bgep) \
+ (bgep->chipid.chip_type & CHIP_TYPE_5755_PLUS)
+
/*
* Second section:
* Offsets of important registers & definitions for bits therein
@@ -225,6 +269,7 @@ extern "C" {
*/
#define PCI_CONF_BGE_MHCR 0x68
#define MHCR_CHIP_REV_MASK 0xffff0000
+#define MHCR_CHIP_REV_SHIFT 16
#define MHCR_ENABLE_TAGGED_STATUS_MODE 0x00000200
#define MHCR_MASK_INTERRUPT_MODE 0x00000100
#define MHCR_ENABLE_INDIRECT_ACCESS 0x00000080
@@ -236,95 +281,38 @@ extern "C" {
#define MHCR_MASK_PCI_INT_OUTPUT 0x00000002
#define MHCR_CLEAR_INTERRUPT_INTA 0x00000001
-#define MHCR_CHIP_REV_5700_B0 0x71000000
-#define MHCR_CHIP_REV_5700_B2 0x71020000
-#define MHCR_CHIP_REV_5700_B3 0x71030000
-#define MHCR_CHIP_REV_5700_C0 0x72000000
-#define MHCR_CHIP_REV_5700_C1 0x72010000
-#define MHCR_CHIP_REV_5700_C2 0x72020000
-
-#define MHCR_CHIP_REV_5701_A0 0x00000000
-#define MHCR_CHIP_REV_5701_A2 0x00020000
-#define MHCR_CHIP_REV_5701_A3 0x00030000
-#define MHCR_CHIP_REV_5701_A5 0x01050000
-
-#define MHCR_CHIP_REV_5702_A0 0x10000000
-#define MHCR_CHIP_REV_5702_A1 0x10010000
-#define MHCR_CHIP_REV_5702_A2 0x10020000
-
-#define MHCR_CHIP_REV_5703_A0 0x10000000
-#define MHCR_CHIP_REV_5703_A1 0x10010000
-#define MHCR_CHIP_REV_5703_A2 0x10020000
-#define MHCR_CHIP_REV_5703_B0 0x11000000
-#define MHCR_CHIP_REV_5703_B1 0x11010000
-
-#define MHCR_CHIP_REV_5704_A0 0x20000000
-#define MHCR_CHIP_REV_5704_A1 0x20010000
-#define MHCR_CHIP_REV_5704_A2 0x20020000
-#define MHCR_CHIP_REV_5704_A3 0x20030000
-#define MHCR_CHIP_REV_5704_B0 0x21000000
-
-#define MHCR_CHIP_REV_5705_A0 0x30000000
-#define MHCR_CHIP_REV_5705_A1 0x30010000
-#define MHCR_CHIP_REV_5705_A2 0x30020000
-#define MHCR_CHIP_REV_5705_A3 0x30030000
-#define MHCR_CHIP_REV_5705_A5 0x30050000
-
-#define MHCR_CHIP_REV_5782_A0 0x30030000
-#define MHCR_CHIP_REV_5782_A1 0x30030088
-
-#define MHCR_CHIP_REV_5788_A1 0x30050000
-
-#define MHCR_CHIP_REV_5751_A0 0x40000000
-#define MHCR_CHIP_REV_5751_A1 0x40010000
-
-#define MHCR_CHIP_REV_5721_A0 0x41000000
-#define MHCR_CHIP_REV_5721_A1 0x41010000
-
-#define MHCR_CHIP_REV_5714_A0 0x50000000
-#define MHCR_CHIP_REV_5714_A1 0x90010000
-
-#define MHCR_CHIP_REV_5715_A0 0x50000000
-#define MHCR_CHIP_REV_5715_A1 0x90010000
-
-#define MHCR_CHIP_REV_5715S_A0 0x50000000
-#define MHCR_CHIP_REV_5715S_A1 0x90010000
-
-#define MHCR_CHIP_REV_5754_A0 0xb0000000
-#define MHCR_CHIP_REV_5754_A1 0xb0010000
-
-#define MHCR_CHIP_REV_5787_A0 0xb0000000
-#define MHCR_CHIP_REV_5787_A1 0xb0010000
-#define MHCR_CHIP_REV_5787_A2 0xb0020000
-
-#define MHCR_CHIP_REV_5755_A0 0xa0000000
-#define MHCR_CHIP_REV_5755_A1 0xa0010000
-
-#define MHCR_CHIP_REV_5906_A0 0xc0000000
-#define MHCR_CHIP_REV_5906_A1 0xc0010000
-#define MHCR_CHIP_REV_5906_A2 0xc0020000
-
-#define MHCR_CHIP_REV_5723_A0 0xf0000000
-#define MHCR_CHIP_REV_5723_A1 0xf0010000
-#define MHCR_CHIP_REV_5723_A2 0xf0020000
-#define MHCR_CHIP_REV_5723_B0 0xf1000000
-
-#define MHCR_CHIP_ASIC_REV(ChipRevId) ((ChipRevId) & 0xf0000000)
-#define MHCR_CHIP_ASIC_REV_5700 (0x7 << 28)
-#define MHCR_CHIP_ASIC_REV_5701 (0x0 << 28)
-#define MHCR_CHIP_ASIC_REV_5703 (0x1 << 28)
-#define MHCR_CHIP_ASIC_REV_5704 (0x2 << 28)
-#define MHCR_CHIP_ASIC_REV_5705 (0x3 << 28)
-#define MHCR_CHIP_ASIC_REV_5721_5751 (0x4 << 28)
-#define MHCR_CHIP_ASIC_REV_5714 (0x5 << 28)
-#define MHCR_CHIP_ASIC_REV_5752 (0x6 << 28)
-#define MHCR_CHIP_ASIC_REV_5754 (0xb << 28)
-#define MHCR_CHIP_ASIC_REV_5787 ((uint32_t)0xb << 28)
-#define MHCR_CHIP_ASIC_REV_5755 ((uint32_t)0xa << 28)
-#define MHCR_CHIP_ASIC_REV_5715 ((uint32_t)0x9 << 28)
-#define MHCR_CHIP_ASIC_REV_5906 ((uint32_t)0xc << 28)
-#define MHCR_CHIP_ASIC_REV_5723 ((uint32_t)0xf << 28)
-
+#define MHCR_CHIP_REV_5703_A0 0x1000
+#define MHCR_CHIP_REV_5704_A0 0x2000
+#define MHCR_CHIP_REV_5751_A0 0x4000
+#define MHCR_CHIP_REV_5721_A0 0x4100
+#define MHCR_CHIP_REV_5755_A0 0xa000
+#define MHCR_CHIP_REV_5755_A1 0xa001
+#define MHCR_CHIP_REV_5719_A0 0x05719000
+#define MHCR_CHIP_REV_5720_A0 0x05720000
+
+#define MHCR_CHIP_ASIC_REV(ChipRevId) ((ChipRevId) >> 12)
+#define MHCR_CHIP_ASIC_REV_5700 0x07
+#define MHCR_CHIP_ASIC_REV_5701 0x00
+#define MHCR_CHIP_ASIC_REV_5703 0x01
+#define MHCR_CHIP_ASIC_REV_5704 0x02
+#define MHCR_CHIP_ASIC_REV_5705 0x03
+#define MHCR_CHIP_ASIC_REV_5750 0x04
+#define MHCR_CHIP_ASIC_REV_5752 0x06
+#define MHCR_CHIP_ASIC_REV_5780 0x08
+#define MHCR_CHIP_ASIC_REV_5714 0x09
+#define MHCR_CHIP_ASIC_REV_5755 0x0a
+#define MHCR_CHIP_ASIC_REV_5787 0x0b
+#define MHCR_CHIP_ASIC_REV_5906 0x0c
+#define MHCR_CHIP_ASIC_REV_PRODID 0x0f
+#define MHCR_CHIP_ASIC_REV_5784 0x5784
+#define MHCR_CHIP_ASIC_REV_5761 0x5761
+#define MHCR_CHIP_ASIC_REV_5785 0x5785
+#define MHCR_CHIP_ASIC_REV_5717 0x5717
+#define MHCR_CHIP_ASIC_REV_5719 0x5719
+#define MHCR_CHIP_ASIC_REV_5720 0x5720
+#define MHCR_CHIP_ASIC_REV_57780 0x57780
+#define MHCR_CHIP_ASIC_REV_57765 0x57785
+#define MHCR_CHIP_ASIC_REV_57766 0x57766
/*
* PCI DMA read/write Control Register, in PCI config space
@@ -466,6 +454,10 @@ extern "C" {
#define PCI_CONF_DEV_STUS_5723 0xd6
#define DEVICE_ERROR_STUS 0xf
+#define PCI_CONF_PRODID_ASICREV 0x000000bc
+#define PCI_CONF_GEN2_PRODID_ASICREV 0x000000f4
+#define PCI_CONF_GEN15_PRODID_ASICREV 0x000000fc
+
#define NIC_MEM_WINDOW_OFFSET 0x00008000 /* 32k */
/*
@@ -541,6 +533,7 @@ extern "C" {
#define MEMORY_ARBITER_MODE_REG 0x4000
#define BUFFER_MANAGER_MODE_REG 0x4400
#define READ_DMA_MODE_REG 0x4800
+#define READ_DMA_RESERVED_CONTROL_REG 0x4900
#define WRITE_DMA_MODE_REG 0x4c00
#define DMA_COMPLETION_MODE_REG 0x6400
@@ -552,6 +545,9 @@ extern "C" {
* Transmit MAC Mode Register
* (TRANSMIT_MAC_MODE_REG, 0x045c)
*/
+#define TRANSMIT_MODE_HTX2B_CNT_DN_MODE 0x00800000
+#define TRANSMIT_MODE_HTX2B_JMB_FRM_LEN 0x00400000
+#define TRANSMIT_MODE_MBUF_LOCKUP_FIX 0x00000100
#define TRANSMIT_MODE_LONG_PAUSE 0x00000040
#define TRANSMIT_MODE_BIG_BACKOFF 0x00000020
#define TRANSMIT_MODE_FLOW_CONTROL 0x00000010
@@ -619,12 +615,14 @@ extern "C" {
*/
#define BUFF_MGR_TEST_MODE 0x00000008
#define BUFF_MGR_MBUF_LOW_ATTN_ENABLE 0x00000010
+#define BUFF_MGR_NO_TX_UNDERRUN 0x80000000
#define BUFF_MGR_ALL_ATTN_BITS 0x00000014
/*
* Read and Write DMA Mode Registers (READ_DMA_MODE_REG,
- * 0x4800 and WRITE_DMA_MODE_REG, 0x4c00)
+ * 0x4800, READ_DMA_RESERVED_CONTROL_REG, 0x4900,
+ * WRITE_DMA_MODE_REG, 0x4c00)
*
* These registers each contain a 2-bit priority field, which controls
* the relative priority of that type of DMA (read vs. write vs. MSI),
@@ -635,6 +633,15 @@ extern "C" {
#define DMA_PRIORITY_SHIFT 30
#define ALL_DMA_ATTN_BITS 0x000003fc
+#define RDMA_RSRVCTRL_FIFO_OFLW_FIX 0x00000004
+#define RDMA_RSRVCTRL_FIFO_LWM_1_5K 0x00000c00
+#define RDMA_RSRVCTRL_FIFO_LWM_MASK 0x00000ff0
+#define RDMA_RSRVCTRL_FIFO_HWM_1_5K 0x000c0000
+#define RDMA_RSRVCTRL_FIFO_HWM_MASK 0x000ff000
+#define RDMA_RSRVCTRL_TXMRGN_320B 0x28000000
+#define RDMA_RSRVCTRL_TXMRGN_MASK 0xffe00000
+
+
/*
* BCM5755, 5755M, 5906, 5906M only
* 1 - Enable Fix. Device will send out the status block before
@@ -644,6 +651,10 @@ extern "C" {
*/
#define DMA_STATUS_TAG_FIX_CQ12384 0x20000000
+/* 5720 only */
+#define DMA_H2BNC_VLAN_DET 0x20000000
+
+
/*
* End of state machine control register definitions
*/
@@ -781,6 +792,8 @@ extern "C" {
#define MAC_RX_MTU_DEFAULT 0x000005f2 /* 1522 */
#define MAC_TX_LENGTHS_REG 0x0464
#define MAC_TX_LENGTHS_DEFAULT 0x00002620
+#define MAC_TX_LENGTHS_JMB_FRM_LEN_MSK 0x00ff0000
+#define MAC_TX_LENGTHS_CNT_DWN_VAL_MSK 0xff000000
/*
* MII access registers
@@ -1069,10 +1082,16 @@ extern "C" {
#define JUMBO_RCV_BD_REPLENISH_DEFAULT 0x00000020 /* 32 */
/*
- * CPMU registers (5717/5718 only)
+ * CPMU registers (5717/5718/5719/5720 only)
*/
-#define CPMU_STATUS_REG 0x362c
-#define CPMU_STATUS_FUN_NUM 0x20000000
+#define CPMU_CLCK_ORIDE_REG 0x3624
+#define CPMU_CLCK_ORIDE_MAC_ORIDE_EN 0x80000000
+
+#define CPMU_STATUS_REG 0x362c
+#define CPMU_STATUS_FUN_NUM_5717 0x20000000
+#define CPMU_STATUS_FUN_NUM_5719 0xc0000000
+#define CPMU_STATUS_FUN_NUM_5719_SHIFT 30
+
/*
* Host Coalescing Engine Control Registers
@@ -1191,6 +1210,8 @@ extern "C" {
#define VCPU_EXT_CTL 0x6890
#define VCPU_EXT_CTL_HALF 0x00400000
+#define GRC_FASTBOOT_PC 0x6894
+
#define FTQ_RESET_REG 0x5c00
#define MSI_MODE_REG 0x6000
@@ -1210,14 +1231,18 @@ extern "C" {
#define MODE_INT_ON_TXRISC_ATTN 0x01000000
#define MODE_RECV_NO_PSEUDO_HDR_CSUM 0x00800000
#define MODE_SEND_NO_PSEUDO_HDR_CSUM 0x00100000
+#define MODE_HTX2B_ENABLE 0x00040000
#define MODE_HOST_SEND_BDS 0x00020000
#define MODE_HOST_STACK_UP 0x00010000
#define MODE_FORCE_32_BIT_PCI 0x00008000
+#define MODE_B2HRX_ENABLE 0x00008000
#define MODE_NO_INT_ON_RECV 0x00004000
#define MODE_NO_INT_ON_SEND 0x00002000
#define MODE_ALLOW_BAD_FRAMES 0x00000800
#define MODE_NO_CRC 0x00000400
#define MODE_NO_FRAME_CRACKING 0x00000200
+#define MODE_WORD_SWAP_B2HRX_DATA 0x00000080
+#define MODE_BYTE_SWAP_B2HRX_DATA 0x00000040
#define MODE_WORD_SWAP_FRAME 0x00000020
#define MODE_BYTE_SWAP_FRAME 0x00000010
#define MODE_WORD_SWAP_NONFRAME 0x00000004
@@ -1246,7 +1271,7 @@ extern "C" {
*/
#define CORE_CLOCK_MHZ 66
#define MISC_CONFIG_REG 0x6804
-#define MISC_CONFIG_GRC_RESET_DISABLE 0x20000000
+#define MISC_CONFIG_GRC_RESET_DISABLE 0x20000000
#define MISC_CONFIG_GPHY_POWERDOWN_OVERRIDE 0x04000000
#define MISC_CONFIG_POWERDOWN 0x00100000
#define MISC_CONFIG_POWER_STATE 0x00060000
@@ -1567,6 +1592,7 @@ extern "C" {
#define BGE_MINI_SLOTS_MAX 1024
#define BGE_RECV_SLOTS_MAX 2048
#define BGE_RECV_SLOTS_5705 512
+#define BGE_RECV_SLOTS_5717 1024
#define BGE_RECV_SLOTS_5782 512
#define BGE_RECV_SLOTS_5721 512
diff --git a/usr/src/uts/common/io/bge/bge_impl.h b/usr/src/uts/common/io/bge/bge_impl.h
index 772c989092..0c51c2bc8e 100644
--- a/usr/src/uts/common/io/bge/bge_impl.h
+++ b/usr/src/uts/common/io/bge/bge_impl.h
@@ -23,6 +23,10 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
#ifndef _BGE_IMPL_H
#define _BGE_IMPL_H
@@ -605,6 +609,7 @@ typedef struct {
uint8_t latency; /* latency-timer */
uint8_t flags;
+ uint32_t chip_type; /* see CHIP_TYPE_ in bge_hw.h */
uint16_t chip_label; /* numeric part only */
/* (e.g. 5703/5794/etc) */
uint32_t mbuf_base; /* Mbuf pool parameters */
@@ -640,10 +645,11 @@ typedef struct {
uint32_t mask_pci_int;
} chip_id_t;
-#define CHIP_FLAG_SUPPORTED 0x80
-#define CHIP_FLAG_SERDES 0x40
-#define CHIP_FLAG_PARTIAL_CSUM 0x20
-#define CHIP_FLAG_NO_JUMBO 0x1
+#define CHIP_FLAG_SUPPORTED 0x80
+#define CHIP_FLAG_SERDES 0x40
+#define CHIP_FLAG_PARTIAL_CSUM 0x20
+#define CHIP_FLAG_NO_CHECK_RESET 0x2
+#define CHIP_FLAG_NO_JUMBO 0x1
/*
* Collection of physical-layer functions to:
diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c
index f191f313c0..d0f309730d 100644
--- a/usr/src/uts/common/io/bge/bge_main2.c
+++ b/usr/src/uts/common/io/bge/bge_main2.c
@@ -23,6 +23,10 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
#include "bge_impl.h"
#include <sys/sdt.h>
#include <sys/mac_provider.h>
@@ -3211,13 +3215,17 @@ bge_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
*/
if (DEVICE_5717_SERIES_CHIPSETS(bgep))
pci_config_put32(bgep->cfg_handle, PCI_CONF_BGE_MHCR, 0);
+#else
+ mhcrValue = MHCR_ENABLE_INDIRECT_ACCESS |
+ MHCR_ENABLE_TAGGED_STATUS_MODE |
+ MHCR_MASK_INTERRUPT_MODE |
+ MHCR_MASK_PCI_INT_OUTPUT |
+ MHCR_CLEAR_INTERRUPT_INTA;
+#endif
pci_config_put32(bgep->cfg_handle, PCI_CONF_BGE_MHCR, mhcrValue);
bge_ind_put32(bgep, MEMORY_ARBITER_MODE_REG,
bge_ind_get32(bgep, MEMORY_ARBITER_MODE_REG) |
MEMORY_ARBITER_ENABLE);
-#else
- mhcrValue = pci_config_get32(bgep->cfg_handle, PCI_CONF_BGE_MHCR);
-#endif
if (mhcrValue & MHCR_ENABLE_ENDIAN_WORD_SWAP) {
bgep->asf_wordswapped = B_TRUE;
} else {
diff --git a/usr/src/uts/common/io/bge/bge_mii.c b/usr/src/uts/common/io/bge/bge_mii.c
index f24b6a3f16..b47c043d8c 100644
--- a/usr/src/uts/common/io/bge/bge_mii.c
+++ b/usr/src/uts/common/io/bge/bge_mii.c
@@ -23,6 +23,10 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
#include "bge_impl.h"
/*
@@ -207,6 +211,7 @@ bge_phy_reset(bge_t *bgep)
{
uint16_t control;
uint_t count;
+ boolean_t ret = B_FALSE;
BGE_TRACE(("bge_phy_reset($%p)", (void *)bgep));
@@ -221,22 +226,26 @@ bge_phy_reset(bge_t *bgep)
}
/*
- * Set the PHY RESET bit, then wait up to 5 ms for it to self-clear
+ * Set the PHY RESET bit, then wait up to 50 ms for it to self-clear
*/
bge_mii_put16(bgep, MII_CONTROL, MII_CONTROL_RESET);
- for (count = 0; ++count < 1000; ) {
- drv_usecwait(5);
+ for (count = 0; ++count < 5000; ) {
control = bge_mii_get16(bgep, MII_CONTROL);
- if (BIC(control, MII_CONTROL_RESET))
- return (B_TRUE);
+ if (BIC(control, MII_CONTROL_RESET)) {
+ drv_usecwait(40);
+ ret = B_TRUE;
+ break;
+ }
+ drv_usecwait(10);
}
- if (DEVICE_5906_SERIES_CHIPSETS(bgep))
+ if (ret == B_TRUE && DEVICE_5906_SERIES_CHIPSETS(bgep))
(void) bge_adj_volt_5906(bgep);
- BGE_DEBUG(("bge_phy_reset: FAILED, control now 0x%x", control));
+ if (ret == B_FALSE)
+ BGE_DEBUG(("bge_phy_reset: FAILED, control now 0x%x", control));
- return (B_FALSE);
+ return (ret);
}
/*
@@ -541,34 +550,14 @@ bge_restart_copper(bge_t *bgep, boolean_t powerdown)
ASSERT(mutex_owned(bgep->genlock));
- switch (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev)) {
- default:
- /*
- * Shouldn't happen; it means we don't recognise this chip.
- * It's probably a new one, so we'll try our best anyway ...
- */
- case MHCR_CHIP_ASIC_REV_5703:
- case MHCR_CHIP_ASIC_REV_5704:
- case MHCR_CHIP_ASIC_REV_5705:
- case MHCR_CHIP_ASIC_REV_5752:
- case MHCR_CHIP_ASIC_REV_5714:
- case MHCR_CHIP_ASIC_REV_5715:
- reset_ok = bge_phy_reset_and_check(bgep);
- break;
-
- case MHCR_CHIP_ASIC_REV_5906:
- case MHCR_CHIP_ASIC_REV_5700:
- case MHCR_CHIP_ASIC_REV_5701:
- case MHCR_CHIP_ASIC_REV_5723:
- case MHCR_CHIP_ASIC_REV_5721_5751:
- /*
- * Just a plain reset; the "check" code breaks these chips
- */
+ if (bgep->chipid.flags & CHIP_FLAG_NO_CHECK_RESET) {
reset_ok = bge_phy_reset(bgep);
if (!reset_ok)
bge_fm_ereport(bgep, DDI_FM_DEVICE_NO_RESPONSE);
- break;
+ } else {
+ reset_ok = bge_phy_reset_and_check(bgep);
}
+
if (!reset_ok) {
BGE_REPORT((bgep, "PHY failed to reset correctly"));
return (DDI_FAILURE);
@@ -590,7 +579,7 @@ bge_restart_copper(bge_t *bgep, boolean_t powerdown)
switch (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev)) {
case MHCR_CHIP_ASIC_REV_5705:
- case MHCR_CHIP_ASIC_REV_5721_5751:
+ case MHCR_CHIP_ASIC_REV_5750:
bge_phy_bit_err_fix(bgep);
break;
}
@@ -1507,14 +1496,22 @@ bge_phys_init(bge_t *bgep)
*/
bgep->phy_mii_addr = 1;
if (DEVICE_5717_SERIES_CHIPSETS(bgep)) {
- int regval = bge_reg_get32(bgep, CPMU_STATUS_REG);
- if (regval & CPMU_STATUS_FUN_NUM)
- bgep->phy_mii_addr += 1;
+ uint32_t regval = bge_reg_get32(bgep, CPMU_STATUS_REG);
+ if (MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5719 ||
+ MHCR_CHIP_ASIC_REV(bgep->chipid.asic_rev) ==
+ MHCR_CHIP_ASIC_REV_5720) {
+ bgep->phy_mii_addr +=
+ (regval & CPMU_STATUS_FUN_NUM_5719) >>
+ CPMU_STATUS_FUN_NUM_5719_SHIFT;
+ } else {
+ bgep->phy_mii_addr +=
+ (regval & CPMU_STATUS_FUN_NUM_5717) ? 1 : 0;
+ }
regval = bge_reg_get32(bgep, SGMII_STATUS_REG);
if (regval & MEDIA_SELECTION_MODE)
bgep->phy_mii_addr += 7;
}
-
if (bge_phy_probe(bgep)) {
bgep->chipid.flags &= ~CHIP_FLAG_SERDES;
bgep->physops = &copper_ops;
diff --git a/usr/src/uts/common/io/devpoll.c b/usr/src/uts/common/io/devpoll.c
index a3fcbbba03..5be9dc6166 100644
--- a/usr/src/uts/common/io/devpoll.c
+++ b/usr/src/uts/common/io/devpoll.c
@@ -25,6 +25,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -45,6 +46,8 @@
#include <sys/devpoll.h>
#include <sys/rctl.h>
#include <sys/resource.h>
+#include <sys/schedctl.h>
+#include <sys/epoll.h>
#define RESERVED 1
@@ -237,7 +240,8 @@ dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
* stale entries!
*/
static int
-dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp)
+dp_pcache_poll(dp_entry_t *dpep, void *dpbuf,
+ pollcache_t *pcp, nfds_t nfds, int *fdcntp)
{
int start, ostart, end;
int fdcnt, fd;
@@ -247,7 +251,10 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp)
boolean_t no_wrap;
pollhead_t *php;
polldat_t *pdp;
+ pollfd_t *pfdp;
+ epoll_event_t *epoll;
int error = 0;
+ short mask = POLLRDHUP | POLLWRBAND;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
if (pcp->pc_bitmap == NULL) {
@@ -257,6 +264,14 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp)
*/
return (error);
}
+
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ pfdp = NULL;
+ epoll = (epoll_event_t *)dpbuf;
+ } else {
+ pfdp = (pollfd_t *)dpbuf;
+ epoll = NULL;
+ }
retry:
start = ostart = pcp->pc_mapstart;
end = pcp->pc_mapend;
@@ -316,11 +331,32 @@ repoll:
* polling a closed fd. Hope this will remind
* user to do a POLLREMOVE.
*/
- pfdp[fdcnt].fd = fd;
- pfdp[fdcnt].revents = POLLNVAL;
- fdcnt++;
+ if (pfdp != NULL) {
+ pfdp[fdcnt].fd = fd;
+ pfdp[fdcnt].revents = POLLNVAL;
+ fdcnt++;
+ continue;
+ }
+
+ /*
+ * In the epoll compatibility case, we actually
+ * perform the implicit removal to remain
+ * closer to the epoll semantics.
+ */
+ ASSERT(epoll != NULL);
+
+ pdp->pd_fp = NULL;
+ pdp->pd_events = 0;
+
+ if (php != NULL) {
+ pollhead_delete(php, pdp);
+ pdp->pd_php = NULL;
+ }
+
+ BT_CLEAR(pcp->pc_bitmap, fd);
continue;
}
+
if (fp != pdp->pd_fp) {
/*
* user is polling on a cached fd which was
@@ -376,9 +412,69 @@ repoll:
}
if (revent != 0) {
- pfdp[fdcnt].fd = fd;
- pfdp[fdcnt].events = pdp->pd_events;
- pfdp[fdcnt].revents = revent;
+ if (pfdp != NULL) {
+ pfdp[fdcnt].fd = fd;
+ pfdp[fdcnt].events = pdp->pd_events;
+ pfdp[fdcnt].revents = revent;
+ } else {
+ epoll_event_t *ep = &epoll[fdcnt];
+
+ ASSERT(epoll != NULL);
+ ep->data.u64 = pdp->pd_epolldata;
+
+ /*
+ * If any of the event bits are set for
+ * which poll and epoll representations
+ * differ, swizzle in the native epoll
+ * values.
+ */
+ if (revent & mask) {
+ ep->events = (revent & ~mask) |
+ ((revent & POLLRDHUP) ?
+ EPOLLRDHUP : 0) |
+ ((revent & POLLWRBAND) ?
+ EPOLLWRBAND : 0);
+ } else {
+ ep->events = revent;
+ }
+
+ /*
+ * We define POLLWRNORM to be POLLOUT,
+ * but epoll has separate definitions
+ * for them; if POLLOUT is set and the
+ * user has asked for EPOLLWRNORM, set
+ * that as well.
+ */
+ if ((revent & POLLOUT) &&
+ (pdp->pd_events & EPOLLWRNORM)) {
+ ep->events |= EPOLLWRNORM;
+ }
+ }
+
+ /*
+ * If POLLET is set, clear the bit in the
+ * bitmap -- which effectively latches the
+ * edge on a pollwakeup() from the driver.
+ */
+ if (pdp->pd_events & POLLET)
+ BT_CLEAR(pcp->pc_bitmap, fd);
+
+ /*
+ * If POLLONESHOT is set, perform the implicit
+ * POLLREMOVE.
+ */
+ if (pdp->pd_events & POLLONESHOT) {
+ pdp->pd_fp = NULL;
+ pdp->pd_events = 0;
+
+ if (php != NULL) {
+ pollhead_delete(php, pdp);
+ pdp->pd_php = NULL;
+ }
+
+ BT_CLEAR(pcp->pc_bitmap, fd);
+ }
+
fdcnt++;
} else if (php != NULL) {
/*
@@ -392,7 +488,7 @@ repoll:
* in bitmap.
*/
if ((pdp->pd_php != NULL) &&
- ((pcp->pc_flag & T_POLLWAKE) == 0)) {
+ ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
BT_CLEAR(pcp->pc_bitmap, fd);
}
if (pdp->pd_php == NULL) {
@@ -473,11 +569,15 @@ dpopen(dev_t *devp, int flag, int otyp, cred_t *credp)
/*
* allocate a pollcache skeleton here. Delay allocating bitmap
* structures until dpwrite() time, since we don't know the
- * optimal size yet.
+ * optimal size yet. We also delay setting the pid until either
+ * dpwrite() or attempt to poll on the instance, allowing parents
+ * to create instances of /dev/poll for their children. (In the
+ * epoll compatibility case, this check isn't performed to maintain
+ * semantic compatibility.)
*/
pcp = pcache_alloc();
dpep->dpe_pcache = pcp;
- pcp->pc_pid = curproc->p_pid;
+ pcp->pc_pid = -1;
*devp = makedevice(getmajor(*devp), minordev); /* clone the driver */
mutex_enter(&devpoll_lock);
ASSERT(minordev < dptblsize);
@@ -499,7 +599,9 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
dp_entry_t *dpep;
pollcache_t *pcp;
pollfd_t *pollfdp, *pfdp;
- int error;
+ dvpoll_epollfd_t *epfdp;
+ uintptr_t limit;
+ int error, size;
ssize_t uiosize;
nfds_t pollfdnum;
struct pollhead *php = NULL;
@@ -515,11 +617,23 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
ASSERT(dpep != NULL);
mutex_exit(&devpoll_lock);
pcp = dpep->dpe_pcache;
- if (curproc->p_pid != pcp->pc_pid) {
- return (EACCES);
+
+ if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) &&
+ curproc->p_pid != pcp->pc_pid) {
+ if (pcp->pc_pid != -1)
+ return (EACCES);
+
+ pcp->pc_pid = curproc->p_pid;
}
+
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ size = sizeof (dvpoll_epollfd_t);
+ } else {
+ size = sizeof (pollfd_t);
+ }
+
uiosize = uiop->uio_resid;
- pollfdnum = uiosize / sizeof (pollfd_t);
+ pollfdnum = uiosize / size;
mutex_enter(&curproc->p_lock);
if (pollfdnum > (uint_t)rctl_enforced_value(
rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) {
@@ -534,6 +648,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
* each polled fd to the cached set.
*/
pollfdp = kmem_alloc(uiosize, KM_SLEEP);
+ limit = (uintptr_t)pollfdp + (pollfdnum * size);
/*
* Although /dev/poll uses the write(2) interface to cache fds, it's
@@ -555,9 +670,27 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
mutex_enter(&dpep->dpe_lock);
dpep->dpe_writerwait++;
while (dpep->dpe_refcnt != 0) {
+ /*
+ * We need to do a bit of a dance here: we need to drop
+ * our dpe_lock and grab the pc_lock to broadcast the pc_cv to
+ * kick any DP_POLL/DP_PPOLL sleepers.
+ */
+ mutex_exit(&dpep->dpe_lock);
+ mutex_enter(&pcp->pc_lock);
+ pcp->pc_flag |= PC_WRITEWANTED;
+ cv_broadcast(&pcp->pc_cv);
+ mutex_exit(&pcp->pc_lock);
+ mutex_enter(&dpep->dpe_lock);
+
+ if (dpep->dpe_refcnt == 0)
+ break;
+
if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
dpep->dpe_writerwait--;
mutex_exit(&dpep->dpe_lock);
+ mutex_enter(&pcp->pc_lock);
+ pcp->pc_flag &= ~PC_WRITEWANTED;
+ mutex_exit(&pcp->pc_lock);
kmem_free(pollfdp, uiosize);
return (set_errno(EINTR));
}
@@ -565,13 +698,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
dpep->dpe_writerwait--;
dpep->dpe_flag |= DP_WRITER_PRESENT;
dpep->dpe_refcnt++;
+
mutex_exit(&dpep->dpe_lock);
mutex_enter(&pcp->pc_lock);
+ pcp->pc_flag &= ~PC_WRITEWANTED;
+
if (pcp->pc_bitmap == NULL) {
pcache_create(pcp, pollfdnum);
}
- for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) {
+ for (pfdp = pollfdp; (uintptr_t)pfdp < limit;
+ pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) {
fd = pfdp->fd;
if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles)
continue;
@@ -582,7 +719,24 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
pdp->pd_fd = fd;
pdp->pd_pcache = pcp;
pcache_insert_fd(pcp, pdp, pollfdnum);
+ } else {
+ if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) &&
+ pdp->pd_fp != NULL) {
+ /*
+ * epoll semantics demand that we error
+ * out in this case.
+ */
+ error = EEXIST;
+ break;
+ }
+ }
+
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ /* LINTED pointer alignment */
+ epfdp = (dvpoll_epollfd_t *)pfdp;
+ pdp->pd_epolldata = epfdp->dpep_data;
}
+
ASSERT(pdp->pd_fd == fd);
ASSERT(pdp->pd_pcache == pcp);
if (fd >= pcp->pc_mapsize) {
@@ -665,7 +819,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
}
releasef(fd);
} else {
- if (pdp == NULL) {
+ if (pdp == NULL || pdp->pd_fp == NULL) {
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ /*
+ * As with the add case (above), epoll
+ * semantics demand that we error out
+ * in this case.
+ */
+ error = ENOENT;
+ break;
+ }
+
continue;
}
ASSERT(pdp->pd_fd == fd);
@@ -690,6 +854,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
return (error);
}
+#define DP_SIGMASK_RESTORE(ksetp) { \
+ if (ksetp != NULL) { \
+ mutex_enter(&p->p_lock); \
+ if (lwp->lwp_cursig == 0) { \
+ t->t_hold = lwp->lwp_sigoldmask; \
+ t->t_flag &= ~T_TOMASK; \
+ } \
+ mutex_exit(&p->p_lock); \
+ } \
+}
+
/*ARGSUSED*/
static int
dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
@@ -701,7 +876,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
int error = 0;
STRUCT_DECL(dvpoll, dvpoll);
- if (cmd == DP_POLL) {
+ if (cmd == DP_POLL || cmd == DP_PPOLL) {
/* do this now, before we sleep on DP_WRITER_PRESENT */
now = gethrtime();
}
@@ -713,10 +888,37 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
mutex_exit(&devpoll_lock);
ASSERT(dpep != NULL);
pcp = dpep->dpe_pcache;
- if (curproc->p_pid != pcp->pc_pid)
- return (EACCES);
mutex_enter(&dpep->dpe_lock);
+
+ if (cmd == DP_EPOLLCOMPAT) {
+ if (dpep->dpe_refcnt != 0) {
+ /*
+ * We can't turn on epoll compatibility while there
+ * are outstanding operations.
+ */
+ mutex_exit(&dpep->dpe_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * epoll compatibility is a one-way street: there's no way
+ * to turn it off for a particular open.
+ */
+ dpep->dpe_flag |= DP_ISEPOLLCOMPAT;
+ mutex_exit(&dpep->dpe_lock);
+
+ return (0);
+ }
+
+ if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) &&
+ curproc->p_pid != pcp->pc_pid) {
+ if (pcp->pc_pid != -1)
+ return (EACCES);
+
+ pcp->pc_pid = curproc->p_pid;
+ }
+
while ((dpep->dpe_flag & DP_WRITER_PRESENT) ||
(dpep->dpe_writerwait != 0)) {
if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
@@ -729,15 +931,36 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
switch (cmd) {
case DP_POLL:
+ case DP_PPOLL:
{
pollstate_t *ps;
nfds_t nfds;
int fdcnt = 0;
+ size_t size, fdsize, dpsize;
hrtime_t deadline = 0;
+ k_sigset_t *ksetp = NULL;
+ k_sigset_t kset;
+ sigset_t set;
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ struct proc *p = ttoproc(curthread);
STRUCT_INIT(dvpoll, mode);
- error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll),
- STRUCT_SIZE(dvpoll));
+
+ /*
+ * The dp_setp member is only required/consumed for DP_PPOLL,
+ * which otherwise uses the same structure as DP_POLL.
+ */
+ if (cmd == DP_POLL) {
+ dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) -
+ (uintptr_t)STRUCT_FADDR(dvpoll, dp_fds);
+ } else {
+ ASSERT(cmd == DP_PPOLL);
+ dpsize = STRUCT_SIZE(dvpoll);
+ }
+
+ error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize);
+
if (error) {
DP_REFRELE(dpep);
return (EFAULT);
@@ -755,6 +978,52 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
deadline += now;
}
+ if (cmd == DP_PPOLL) {
+ void *setp = STRUCT_FGETP(dvpoll, dp_setp);
+
+ if (setp != NULL) {
+ if (copyin(setp, &set, sizeof (set))) {
+ DP_REFRELE(dpep);
+ return (EFAULT);
+ }
+
+ sigutok(&set, &kset);
+ ksetp = &kset;
+
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = *ksetp;
+ t->t_flag |= T_TOMASK;
+
+ /*
+ * Like ppoll() with a non-NULL sigset, we'll
+ * call cv_reltimedwait_sig() just to check for
+ * signals. This call will return immediately
+ * with either 0 (signalled) or -1 (no signal).
+ * There are some conditions whereby we can
+ * get 0 from cv_reltimedwait_sig() without
+ * a true signal (e.g., a directed stop), so
+ * we restore our signal mask in the unlikely
+ * event that lwp_cursig is 0.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv,
+ &p->p_lock, 0, TR_CLOCK_TICK)) {
+ if (lwp->lwp_cursig == 0) {
+ t->t_hold = lwp->lwp_sigoldmask;
+ t->t_flag &= ~T_TOMASK;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ DP_REFRELE(dpep);
+ return (EINTR);
+ }
+
+ mutex_exit(&p->p_lock);
+ }
+ }
+
if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) {
/*
* We are just using DP_POLL to sleep, so
@@ -762,17 +1031,29 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
* Do not check for signals if we have a zero timeout.
*/
DP_REFRELE(dpep);
- if (deadline == 0)
+ if (deadline == 0) {
+ DP_SIGMASK_RESTORE(ksetp);
return (0);
+ }
+
mutex_enter(&curthread->t_delay_lock);
while ((error =
cv_timedwait_sig_hrtime(&curthread->t_delay_cv,
&curthread->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&curthread->t_delay_lock);
+
+ DP_SIGMASK_RESTORE(ksetp);
+
return (error == 0 ? EINTR : 0);
}
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ size = nfds * (fdsize = sizeof (epoll_event_t));
+ } else {
+ size = nfds * (fdsize = sizeof (pollfd_t));
+ }
+
/*
* XXX It would be nice not to have to alloc each time, but it
* requires another per thread structure hook. This can be
@@ -782,37 +1063,45 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
curthread->t_pollstate = pollstate_create();
ps = curthread->t_pollstate;
}
- if (ps->ps_dpbufsize < nfds) {
- struct proc *p = ttoproc(curthread);
+
+ if (ps->ps_dpbufsize < size) {
/*
- * The maximum size should be no large than
- * current maximum open file count.
+ * If nfds is larger than twice the current maximum
+ * open file count, we'll silently clamp it. This
+ * only limits our exposure to allocating an
+ * inordinate amount of kernel memory; it doesn't
+ * otherwise affect the semantics. (We have this
+ * check at twice the maximum instead of merely the
+ * maximum because some applications pass an nfds that
+ * is only slightly larger than their limit.)
*/
mutex_enter(&p->p_lock);
- if (nfds > p->p_fno_ctl) {
- mutex_exit(&p->p_lock);
- DP_REFRELE(dpep);
- return (EINVAL);
+ if ((nfds >> 1) > p->p_fno_ctl) {
+ nfds = p->p_fno_ctl;
+ size = nfds * fdsize;
}
mutex_exit(&p->p_lock);
- kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) *
- ps->ps_dpbufsize);
- ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) *
- nfds, KM_SLEEP);
- ps->ps_dpbufsize = nfds;
+
+ if (ps->ps_dpbufsize < size) {
+ kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize);
+ ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP);
+ ps->ps_dpbufsize = size;
+ }
}
mutex_enter(&pcp->pc_lock);
for (;;) {
- pcp->pc_flag = 0;
- error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt);
+ pcp->pc_flag &= ~PC_POLLWAKE;
+
+ error = dp_pcache_poll(dpep, ps->ps_dpbuf,
+ pcp, nfds, &fdcnt);
if (fdcnt > 0 || error != 0)
break;
/*
* A pollwake has happened since we polled cache.
*/
- if (pcp->pc_flag & T_POLLWAKE)
+ if (pcp->pc_flag & PC_POLLWAKE)
continue;
/*
@@ -822,8 +1111,40 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
/* immediate timeout; do not check signals */
break;
}
- error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
- &pcp->pc_lock, deadline);
+
+ if (!(pcp->pc_flag & PC_WRITEWANTED)) {
+ error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
+ &pcp->pc_lock, deadline);
+ } else {
+ error = 1;
+ }
+
+ if (error > 0 && (pcp->pc_flag & PC_WRITEWANTED)) {
+ /*
+ * We've been kicked off of our cv because a
+ * writer wants in. We're going to drop our
+ * reference count and then wait until the
+ * writer is gone -- at which point we'll
+ * reacquire the pc_lock and call into
+ * dp_pcache_poll() to get the updated state.
+ */
+ mutex_exit(&pcp->pc_lock);
+
+ mutex_enter(&dpep->dpe_lock);
+ dpep->dpe_refcnt--;
+ cv_broadcast(&dpep->dpe_cv);
+
+ while ((dpep->dpe_flag & DP_WRITER_PRESENT) ||
+ (dpep->dpe_writerwait != 0)) {
+ error = cv_wait_sig_swap(&dpep->dpe_cv,
+ &dpep->dpe_lock);
+ }
+
+ dpep->dpe_refcnt++;
+ mutex_exit(&dpep->dpe_lock);
+ mutex_enter(&pcp->pc_lock);
+ }
+
/*
* If we were awakened by a signal or timeout
* then break the loop, else poll again.
@@ -837,9 +1158,11 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
}
mutex_exit(&pcp->pc_lock);
+ DP_SIGMASK_RESTORE(ksetp);
+
if (error == 0 && fdcnt > 0) {
- if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll,
- dp_fds), sizeof (pollfd_t) * fdcnt)) {
+ if (copyout(ps->ps_dpbuf,
+ STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
DP_REFRELE(dpep);
return (EFAULT);
}
@@ -901,10 +1224,25 @@ static int
dppoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
+ minor_t minor;
+ dp_entry_t *dpep;
+
+ minor = getminor(dev);
+
+ mutex_enter(&devpoll_lock);
+ dpep = devpolltbl[minor];
+ ASSERT(dpep != NULL);
+ mutex_exit(&devpoll_lock);
+
/*
* Polling on a /dev/poll fd is not fully supported yet.
*/
- *reventsp = POLLERR;
+ if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) {
+ /* no error in epoll compat. mode */
+ *reventsp = 0;
+ } else {
+ *reventsp = POLLERR;
+ }
return (0);
}
diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c
index 40cbe86170..2152ce0baa 100644
--- a/usr/src/uts/common/io/dld/dld_drv.c
+++ b/usr/src/uts/common/io/dld/dld_drv.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -701,7 +702,8 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set,
err = EACCES;
goto done;
}
- err = dls_devnet_setzid(dlh, dzp->diz_zid);
+ err = dls_devnet_setzid(dlh, dzp->diz_zid,
+ dzp->diz_transient);
} else {
kprop->pr_perm_flags = MAC_PROP_PERM_RW;
(*(zoneid_t *)kprop->pr_val) = dls_devnet_getzid(dlh);
@@ -865,7 +867,7 @@ drv_ioc_rename(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
return (err);
if ((err = dls_devnet_rename(dir->dir_linkid1, dir->dir_linkid2,
- dir->dir_link)) != 0)
+ dir->dir_link, dir->dir_zoneinit)) != 0)
return (err);
if (dir->dir_linkid2 == DATALINK_INVALID_LINKID)
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c
index a438e43d91..79c3d8260a 100644
--- a/usr/src/uts/common/io/dld/dld_proto.c
+++ b/usr/src/uts/common/io/dld/dld_proto.c
@@ -41,7 +41,7 @@ static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
- proto_notify_req, proto_passive_req;
+ proto_notify_req, proto_passive_req, proto_exclusive_req;
static void proto_capability_advertise(dld_str_t *, mblk_t *);
static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
@@ -121,6 +121,9 @@ dld_proto(dld_str_t *dsp, mblk_t *mp)
case DL_PASSIVE_REQ:
proto_passive_req(dsp, mp);
break;
+ case DL_EXCLUSIVE_REQ:
+ proto_exclusive_req(dsp, mp);
+ break;
default:
proto_req(dsp, mp);
break;
@@ -605,6 +608,10 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
new_flags |= DLS_PROMISC_PHYS;
break;
+ case DL_PROMISC_RX_ONLY:
+ new_flags |= DLS_PROMISC_RX_ONLY;
+ break;
+
default:
dl_err = DL_NOTSUPPORTED;
goto failed2;
@@ -692,12 +699,24 @@ proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
new_flags &= ~DLS_PROMISC_PHYS;
break;
+ case DL_PROMISC_RX_ONLY:
+ if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
+ dl_err = DL_NOTENAB;
+ goto failed;
+ }
+ new_flags &= ~DLS_PROMISC_RX_ONLY;
+ break;
+
default:
dl_err = DL_NOTSUPPORTED;
mac_perim_exit(mph);
goto failed;
}
+ /* DLS_PROMISC_RX_ONLY can't be a solo flag */
+ if (new_flags == DLS_PROMISC_RX_ONLY)
+ new_flags = 0;
+
/*
* Adjust channel promiscuity.
*/
@@ -1295,7 +1314,8 @@ proto_passive_req(dld_str_t *dsp, mblk_t *mp)
* If we've already become active by issuing an active primitive,
* then it's too late to try to become passive.
*/
- if (dsp->ds_passivestate == DLD_ACTIVE) {
+ if (dsp->ds_passivestate == DLD_ACTIVE ||
+ dsp->ds_passivestate == DLD_EXCLUSIVE) {
dl_err = DL_OUTSTATE;
goto failed;
}
@@ -1354,7 +1374,12 @@ dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
direct->di_rx_ch);
- direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
+ if (direct->di_flags & DI_DIRECT_RAW) {
+ direct->di_tx_df =
+ (uintptr_t)str_mdata_raw_fastpath_put;
+ } else {
+ direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
+ }
direct->di_tx_dh = dsp;
direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
direct->di_tx_cb_dh = dsp->ds_mch;
@@ -1516,8 +1541,9 @@ dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
* completes. So we limit the check to DLD_ENABLE case.
*/
if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
- (dsp->ds_sap != ETHERTYPE_IP ||
- !check_mod_above(dsp->ds_rq, "ip"))) {
+ ((dsp->ds_sap != ETHERTYPE_IP ||
+ !check_mod_above(dsp->ds_rq, "ip")) &&
+ !check_mod_above(dsp->ds_rq, "vnd"))) {
return (ENOTSUP);
}
@@ -1599,9 +1625,15 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
}
/*
- * Direct capability negotiation interface between IP and DLD
+ * Direct capability negotiation interface between IP/VND and DLD. Note
+ * that for vnd we only allow the case where the media type is the
+ * native media type so we know that there are no transformations that
+ * would have to happen to the mac header that it receives.
*/
- if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
+ if ((dsp->ds_sap == ETHERTYPE_IP &&
+ check_mod_above(dsp->ds_rq, "ip")) ||
+ (check_mod_above(dsp->ds_rq, "vnd") &&
+ dsp->ds_mip->mi_media == dsp->ds_mip->mi_nativemedia)) {
dld_capable = B_TRUE;
subsize += sizeof (dl_capability_sub_t) +
sizeof (dl_capab_dld_t);
@@ -1720,3 +1752,36 @@ dld_capabilities_disable(dld_str_t *dsp)
if (dsp->ds_polling)
(void) dld_capab_poll_disable(dsp, NULL);
}
+
+static void
+proto_exclusive_req(dld_str_t *dsp, mblk_t *mp)
+{
+ int ret = 0;
+ t_uscalar_t dl_err;
+ mac_perim_handle_t mph;
+
+ if (dsp->ds_passivestate != DLD_UNINITIALIZED) {
+ dl_err = DL_OUTSTATE;
+ goto failed;
+ }
+
+ if (MBLKL(mp) < DL_EXCLUSIVE_REQ_SIZE) {
+ dl_err = DL_BADPRIM;
+ goto failed;
+ }
+
+ mac_perim_enter_by_mh(dsp->ds_mh, &mph);
+ ret = dls_exclusive_set(dsp, B_TRUE);
+ mac_perim_exit(mph);
+
+ if (ret != 0) {
+ dl_err = DL_SYSERR;
+ goto failed;
+ }
+
+ dsp->ds_passivestate = DLD_EXCLUSIVE;
+ dlokack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ);
+ return;
+failed:
+ dlerrorack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ, dl_err, (t_uscalar_t)ret);
+}
diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c
index 6f0d0b9a6c..f5308e70ff 100644
--- a/usr/src/uts/common/io/dld/dld_str.c
+++ b/usr/src/uts/common/io/dld/dld_str.c
@@ -854,6 +854,77 @@ i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid,
return (mp);
}
+static boolean_t
+i_dld_raw_ether_check(dld_str_t *dsp, mac_header_info_t *mhip, mblk_t **mpp)
+{
+ mblk_t *mp = *mpp;
+ mblk_t *newmp;
+ uint_t pri, vid, dvid;
+
+ dvid = mac_client_vid(dsp->ds_mch);
+
+ /*
+ * Discard the packet if this is a VLAN stream but the VID in
+ * the packet is not correct.
+ */
+ vid = VLAN_ID(mhip->mhi_tci);
+ if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
+ return (B_FALSE);
+
+ /*
+ * Discard the packet if this packet is a tagged packet
+ * but both pri and VID are 0.
+ */
+ pri = VLAN_PRI(mhip->mhi_tci);
+ if (mhip->mhi_istagged && !mhip->mhi_ispvid && pri == 0 &&
+ vid == VLAN_ID_NONE)
+ return (B_FALSE);
+
+ /*
+ * Update the priority bits to the per-stream priority if
+ * priority is not set in the packet. Update the VID for
+ * packets on a VLAN stream.
+ */
+ pri = (pri == 0) ? dsp->ds_pri : 0;
+ if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
+ if ((newmp = i_dld_ether_header_update_tag(mp, pri,
+ dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
+ return (B_FALSE);
+ }
+ *mpp = newmp;
+ }
+
+ return (B_TRUE);
+}
+
+mac_tx_cookie_t
+str_mdata_raw_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint,
+ uint16_t flag)
+{
+ boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
+ mac_header_info_t mhi;
+ mac_tx_cookie_t cookie;
+
+ if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
+ goto discard;
+
+ if (is_ethernet) {
+ if (i_dld_raw_ether_check(dsp, &mhi, &mp) == B_FALSE)
+ goto discard;
+ }
+
+ if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) {
+ DLD_SETQFULL(dsp);
+ }
+ return (cookie);
+discard:
+ /* TODO: bump kstat? */
+ freemsg(mp);
+ return (NULL);
+}
+
+
+
/*
* M_DATA put (IP fast-path mode)
*/
@@ -902,7 +973,6 @@ str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
mblk_t *bp, *newmp;
size_t size;
mac_header_info_t mhi;
- uint_t pri, vid, dvid;
uint_t max_sdu;
/*
@@ -948,38 +1018,8 @@ str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
goto discard;
if (is_ethernet) {
- dvid = mac_client_vid(dsp->ds_mch);
-
- /*
- * Discard the packet if this is a VLAN stream but the VID in
- * the packet is not correct.
- */
- vid = VLAN_ID(mhi.mhi_tci);
- if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
- goto discard;
-
- /*
- * Discard the packet if this packet is a tagged packet
- * but both pri and VID are 0.
- */
- pri = VLAN_PRI(mhi.mhi_tci);
- if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 &&
- vid == VLAN_ID_NONE)
+ if (i_dld_raw_ether_check(dsp, &mhi, &mp) == B_FALSE)
goto discard;
-
- /*
- * Update the priority bits to the per-stream priority if
- * priority is not set in the packet. Update the VID for
- * packets on a VLAN stream.
- */
- pri = (pri == 0) ? dsp->ds_pri : 0;
- if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
- if ((newmp = i_dld_ether_header_update_tag(mp, pri,
- dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
- goto discard;
- }
- mp = newmp;
- }
}
if (DLD_TX(dsp, mp, 0, 0) != NULL) {
diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c
index 92993ada58..9fa649943c 100644
--- a/usr/src/uts/common/io/dls/dls.c
+++ b/usr/src/uts/common/io/dls/dls.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -248,19 +248,44 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
{
int err = 0;
uint32_t old_flags = dsp->ds_promisc;
+ uint32_t new_type = new_flags & ~DLS_PROMISC_RX_ONLY;
mac_client_promisc_type_t mptype = MAC_CLIENT_PROMISC_ALL;
+ uint16_t mac_flags = 0;
ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
ASSERT(!(new_flags & ~(DLS_PROMISC_SAP | DLS_PROMISC_MULTI |
- DLS_PROMISC_PHYS)));
+ DLS_PROMISC_PHYS | DLS_PROMISC_RX_ONLY)));
+
+ /*
+ * Asking us just to turn on DLS_PROMISC_RX_ONLY is not valid.
+ */
+ if (new_flags == DLS_PROMISC_RX_ONLY)
+ return (EINVAL);
/*
* If the user has only requested DLS_PROMISC_MULTI then we need to make
* sure that they don't see all packets.
*/
- if (new_flags == DLS_PROMISC_MULTI)
+ if (new_type == DLS_PROMISC_MULTI)
mptype = MAC_CLIENT_PROMISC_MULTI;
+ /*
+ * Look at new flags and figure out the correct mac promisc flags.
+ * If we've only requested DLS_PROMISC_SAP and not _MULTI or _PHYS,
+ * don't turn on physical promisc mode.
+ */
+ if (new_flags & DLS_PROMISC_RX_ONLY)
+ mac_flags |= MAC_PROMISC_FLAGS_NO_TX_LOOP;
+ if (new_type == DLS_PROMISC_SAP)
+ mac_flags |= MAC_PROMISC_FLAGS_NO_PHYS;
+
+ /*
+ * There are three cases we care about here with respect to MAC. Going
+ * from nothing to something, something to nothing, something to
+ * something where we need to change how we're getting stuff from mac.
+ * In the last case, as long as they're not equal, we need to assume
+ * something has changed and do something about it.
+ */
if (dsp->ds_promisc == 0 && new_flags != 0) {
/*
* If only DLS_PROMISC_SAP, we don't turn on the
@@ -268,9 +293,7 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
*/
dsp->ds_promisc = new_flags;
err = mac_promisc_add(dsp->ds_mch, mptype,
- dls_rx_promisc, dsp, &dsp->ds_mph,
- (new_flags != DLS_PROMISC_SAP) ? 0 :
- MAC_PROMISC_FLAGS_NO_PHYS);
+ dls_rx_promisc, dsp, &dsp->ds_mph, mac_flags);
if (err != 0) {
dsp->ds_promisc = old_flags;
return (err);
@@ -296,19 +319,13 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
MAC_CLIENT_PROMISC_ALL, dls_rx_vlan_promisc, dsp,
&dsp->ds_vlan_mph, MAC_PROMISC_FLAGS_NO_PHYS);
}
- } else if (dsp->ds_promisc == DLS_PROMISC_SAP && new_flags != 0 &&
- new_flags != dsp->ds_promisc) {
- /*
- * If the old flag is PROMISC_SAP, but the current flag has
- * changed to some new non-zero value, we need to turn the
- * physical promiscuous mode.
- */
+ } else if (new_flags != 0 && new_flags != old_flags) {
ASSERT(dsp->ds_mph != NULL);
mac_promisc_remove(dsp->ds_mph);
/* Honors both after-remove and before-add semantics! */
dsp->ds_promisc = new_flags;
err = mac_promisc_add(dsp->ds_mch, mptype,
- dls_rx_promisc, dsp, &dsp->ds_mph, 0);
+ dls_rx_promisc, dsp, &dsp->ds_mph, mac_flags);
if (err != 0)
dsp->ds_promisc = old_flags;
} else {
@@ -629,6 +646,22 @@ boolean_t
dls_accept_promisc(dld_str_t *dsp, mac_header_info_t *mhip, dls_rx_t *ds_rx,
void **ds_rx_arg, boolean_t loopback)
{
+ if (dsp->ds_promisc == 0) {
+ /*
+ * If there are active walkers of the mi_promisc_list when
+ * promiscuousness is disabled, ds_promisc will be cleared,
+ * but the DLS will remain on the mi_promisc_list until the
+ * walk is completed. If we do not recognize this case here,
+ * we won't properly execute the ds_promisc case in the common
+ * accept routine -- and we will potentially accept a packet
+ * that has originated with this DLS (which in turn can
+ * induce recursion and death by stack overflow). If
+ * ds_promisc is zero, we know that we are in this window --
+ * and we refuse to accept the packet.
+ */
+ return (B_FALSE);
+ }
+
return (dls_accept_common(dsp, mhip, ds_rx, ds_rx_arg, B_TRUE,
loopback));
}
@@ -659,7 +692,10 @@ dls_mac_active_set(dls_link_t *dlp)
* Set the function to start receiving packets.
*/
mac_rx_set(dlp->dl_mch, i_dls_link_rx, dlp);
+ } else if (dlp->dl_exclusive == B_TRUE) {
+ return (EBUSY);
}
+
dlp->dl_nactive++;
return (0);
}
@@ -685,7 +721,11 @@ dls_active_set(dld_str_t *dsp)
if (dsp->ds_passivestate == DLD_PASSIVE)
return (0);
- /* If we're already active, then there's nothing more to do. */
+ if (dsp->ds_dlp->dl_exclusive == B_TRUE &&
+ dsp->ds_passivestate != DLD_EXCLUSIVE)
+ return (EBUSY);
+
+ /* If we're already active, we need to check the link's exclusivity */
if ((dsp->ds_nactive == 0) &&
((err = dls_mac_active_set(dsp->ds_dlp)) != 0)) {
/* except for ENXIO all other errors are mapped to EBUSY */
@@ -694,7 +734,8 @@ dls_active_set(dld_str_t *dsp)
return (err);
}
- dsp->ds_passivestate = DLD_ACTIVE;
+ dsp->ds_passivestate = dsp->ds_dlp->dl_exclusive == B_TRUE ?
+ DLD_EXCLUSIVE : DLD_ACTIVE;
dsp->ds_nactive++;
return (0);
}
@@ -725,7 +766,32 @@ dls_active_clear(dld_str_t *dsp, boolean_t all)
if (dsp->ds_nactive != 0)
return;
- ASSERT(dsp->ds_passivestate == DLD_ACTIVE);
+ ASSERT(dsp->ds_passivestate == DLD_ACTIVE ||
+ dsp->ds_passivestate == DLD_EXCLUSIVE);
dls_mac_active_clear(dsp->ds_dlp);
+ /*
+ * We verify below to ensure that no other part of DLS has mucked with
+ * our exclusive state.
+ */
+ if (dsp->ds_passivestate == DLD_EXCLUSIVE)
+ VERIFY(dls_exclusive_set(dsp, B_FALSE) == 0);
dsp->ds_passivestate = DLD_UNINITIALIZED;
}
+
+int
+dls_exclusive_set(dld_str_t *dsp, boolean_t enable)
+{
+ ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
+
+ if (enable == B_FALSE) {
+ dsp->ds_dlp->dl_exclusive = B_FALSE;
+ return (0);
+ }
+
+ if (dsp->ds_dlp->dl_nactive != 0)
+ return (EBUSY);
+
+ dsp->ds_dlp->dl_exclusive = B_TRUE;
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c
index 6b92a81e77..c5363e8194 100644
--- a/usr/src/uts/common/io/dls/dls_link.c
+++ b/usr/src/uts/common/io/dls/dls_link.c
@@ -602,6 +602,7 @@ i_dls_link_destroy(dls_link_t *dlp)
dlp->dl_mip = NULL;
dlp->dl_unknowns = 0;
dlp->dl_nonip_cnt = 0;
+ dlp->dl_exclusive = B_FALSE;
kmem_cache_free(i_dls_link_cachep, dlp);
}
diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c
index 049c4bd757..6111d62475 100644
--- a/usr/src/uts/common/io/dls/dls_mgmt.c
+++ b/usr/src/uts/common/io/dls/dls_mgmt.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
/*
@@ -105,12 +106,13 @@ typedef struct dls_devnet_s {
zoneid_t dd_zid; /* current zone */
boolean_t dd_prop_loaded;
taskqid_t dd_prop_taskid;
+ boolean_t dd_transient; /* link goes away when zone does */
} dls_devnet_t;
static int i_dls_devnet_create_iptun(const char *, const char *,
datalink_id_t *);
static int i_dls_devnet_destroy_iptun(datalink_id_t);
-static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
+static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
/*ARGSUSED*/
@@ -145,7 +147,12 @@ dls_zone_remove(datalink_id_t linkid, void *arg)
dls_devnet_t *ddp;
if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
- (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
+ /*
+ * Don't bother moving transient links back to the global zone
+ * since we will simply delete them in dls_devnet_unset.
+ */
+ if (!ddp->dd_transient)
+ (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
dls_devnet_rele_tmp(ddp);
}
return (0);
@@ -526,6 +533,27 @@ dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = getzoneid();
+
+ if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
+ sizeof (retval))) == 0) {
+ *linkid = retval.lr_linkid;
+ }
+ return (err);
+}
+
+int
+dls_mgmt_get_linkid_in_zone(const char *link, datalink_id_t *linkid,
+ zoneid_t zid)
+{
+ dlmgmt_door_getlinkid_t getlinkid;
+ dlmgmt_getlinkid_retval_t retval;
+ int err;
+
+ ASSERT(getzoneid() == GLOBAL_ZONEID || zid == getzoneid());
+ getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
+ (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = zid;
if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
sizeof (retval))) == 0) {
@@ -534,6 +562,7 @@ dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
return (err);
}
+
datalink_id_t
dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
datalink_media_t dmedia, uint32_t flags)
@@ -740,12 +769,23 @@ dls_devnet_stat_update(kstat_t *ksp, int rw)
* Create the "link" kstats.
*/
static void
-dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
+dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
{
kstat_t *ksp;
+ char *nm;
+ char kname[MAXLINKNAMELEN];
+
+ if (zoneid != newzoneid) {
+ ASSERT(zoneid == GLOBAL_ZONEID);
+ (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
+ ddp->dd_linkname);
+ nm = kname;
+ } else {
+ nm = ddp->dd_linkname;
+ }
- if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
- dls_devnet_stat_update, ddp, &ksp) == 0) {
+ if (dls_stat_create("link", 0, nm, zoneid,
+ dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
ASSERT(ksp != NULL);
if (zoneid == ddp->dd_owner_zid) {
ASSERT(ddp->dd_ksp == NULL);
@@ -765,12 +805,12 @@ dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
{
if (zoneid == ddp->dd_owner_zid) {
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
} else {
if (ddp->dd_zone_ksp != NULL) {
- kstat_delete(ddp->dd_zone_ksp);
+ dls_stat_delete(ddp->dd_zone_ksp);
ddp->dd_zone_ksp = NULL;
}
}
@@ -781,15 +821,25 @@ dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
* and create the new set using the new name.
*/
static void
-dls_devnet_stat_rename(dls_devnet_t *ddp)
+dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
{
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
- /* We can't rename a link while it's assigned to a non-global zone. */
+ if (zoneinit && ddp->dd_zone_ksp != NULL) {
+ dls_stat_delete(ddp->dd_zone_ksp);
+ ddp->dd_zone_ksp = NULL;
+ }
+ /*
+ * We can't rename a link while it's assigned to a non-global zone
+ * unless we're first initializing the zone while readying it.
+ */
ASSERT(ddp->dd_zone_ksp == NULL);
- dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
+ dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
+ (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
+ if (zoneinit)
+ dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
}
/*
@@ -878,7 +928,8 @@ done:
rw_exit(&i_dls_devnet_lock);
if (err == 0) {
if (zoneid != GLOBAL_ZONEID &&
- (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
+ (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
+ B_FALSE)) != 0)
(void) dls_devnet_unset(macname, &linkid, B_TRUE);
/*
* The kstat subsystem holds its own locks (rather perimeter)
@@ -887,7 +938,7 @@ done:
* lock hierarchy is kstat locks -> i_dls_devnet_lock.
*/
if (stat_create)
- dls_devnet_stat_create(ddp, zoneid);
+ dls_devnet_stat_create(ddp, zoneid, zoneid);
if (ddpp != NULL)
*ddpp = ddp;
}
@@ -924,17 +975,78 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
ASSERT(ddp->dd_ref != 0);
if ((ddp->dd_ref != 1) || (!wait &&
(ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
- mutex_exit(&ddp->dd_mutex);
- rw_exit(&i_dls_devnet_lock);
- return (EBUSY);
+ int zstatus = 0;
+
+ /*
+ * There are a couple of alternatives that might be going on
+ * here; a) the zone is shutting down and it has a transient
+ * link assigned, in which case we want to clean it up instead
+ * of moving it back to the global zone, or b) its possible
+ * that we're trying to clean up an orphaned vnic that was
+ * delegated to a zone and which wasn't cleaned up properly
+ * when the zone went away. Check for either of these cases
+ * before we simply return EBUSY.
+ *
+ * zstatus indicates which situation we are dealing with:
+ * 0 - means return EBUSY
+ * 1 - means case (a), cleanup transient link
+ * -1 - means case (b), orphained VNIC
+ */
+ if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+ zone_t *zp;
+
+ if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+ zstatus = -1;
+ } else {
+ if (ddp->dd_transient) {
+ zone_status_t s = zone_status_get(zp);
+
+ if (s >= ZONE_IS_SHUTTING_DOWN)
+ zstatus = 1;
+ }
+ zone_rele(zp);
+ }
+ }
+
+ if (zstatus == 0) {
+ mutex_exit(&ddp->dd_mutex);
+ rw_exit(&i_dls_devnet_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * We want to delete the link, reset ref to 1;
+ */
+ if (zstatus == -1)
+ /* Log a warning, but continue in this case */
+ cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+ ddp->dd_linkname);
+ ddp->dd_ref = 1;
}
ddp->dd_flags |= DD_CONDEMNED;
ddp->dd_ref--;
*id = ddp->dd_linkid;
- if (ddp->dd_zid != GLOBAL_ZONEID)
- (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
+ if (ddp->dd_zid != GLOBAL_ZONEID) {
+ /*
+ * We need to release the dd_mutex before we try and destroy the
+ * stat. When we destroy it, we'll need to grab the lock for the
+ * kstat but if there's a concurrent reader of the kstat, we'll
+ * be blocked on it. This will lead to deadlock because these
+ * kstats employ a ks_update function (dls_devnet_stat_update)
+ * which needs the dd_mutex that we currently hold.
+ *
+ * Because we've already flagged the dls_devnet_t as
+ * DD_CONDEMNED and we still have a write lock on
+ * i_dls_devnet_lock, we should be able to release the dd_mutex.
+ */
+ mutex_exit(&ddp->dd_mutex);
+ dls_devnet_stat_destroy(ddp, ddp->dd_zid);
+ mutex_enter(&ddp->dd_mutex);
+ (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
+ B_FALSE);
+ }
/*
* Remove this dls_devnet_t from the hash table.
@@ -960,8 +1072,15 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
}
- if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
+ if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
+ /*
+ * See the earlier call in this function for an explanation.
+ */
+ mutex_exit(&ddp->dd_mutex);
dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+ mutex_enter(&ddp->dd_mutex);
+ }
+
ddp->dd_prop_loaded = B_FALSE;
ddp->dd_linkid = DATALINK_INVALID_LINKID;
@@ -1111,7 +1230,7 @@ dls_devnet_rele(dls_devnet_t *ddp)
}
static int
-dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
+dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp, zoneid_t zid)
{
char drv[MAXLINKNAMELEN];
uint_t ppa;
@@ -1121,7 +1240,7 @@ dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
dls_dev_handle_t ddh;
int err;
- if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
+ if ((err = dls_mgmt_get_linkid_in_zone(link, &linkid, zid)) == 0)
return (dls_devnet_hold(linkid, ddpp));
/*
@@ -1261,9 +1380,15 @@ dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
*
* This case does not change the <link name, linkid> mapping, so the link's
* kstats need to be updated with using name associated the given id2.
+ *
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
int
-dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
+dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
+ boolean_t zoneinit)
{
dls_dev_handle_t ddh = NULL;
int err = 0;
@@ -1313,13 +1438,16 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
* is currently accessing the link kstats, or if the link is on-loan
* to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
* prevent any access to the kstats while we delete and recreate
- * kstats below.
+ * kstats below. However, we skip this check if we're renaming the
+ * vnic as part of bringing it up for a zone.
*/
mutex_enter(&ddp->dd_mutex);
- if (ddp->dd_ref > 1) {
- mutex_exit(&ddp->dd_mutex);
- err = EBUSY;
- goto done;
+ if (!zoneinit) {
+ if (ddp->dd_ref > 1) {
+ mutex_exit(&ddp->dd_mutex);
+ err = EBUSY;
+ goto done;
+ }
}
ddp->dd_flags |= DD_KSTAT_CHANGING;
@@ -1333,7 +1461,15 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
/* rename mac client name and its flow if exists */
if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
goto done;
- (void) mac_rename_primary(mh, link);
+ if (zoneinit) {
+ char tname[MAXLINKNAMELEN];
+
+ (void) snprintf(tname, sizeof (tname), "z%d_%s",
+ ddp->dd_zid, link);
+ (void) mac_rename_primary(mh, tname);
+ } else {
+ (void) mac_rename_primary(mh, link);
+ }
mac_close(mh);
goto done;
}
@@ -1406,7 +1542,7 @@ done:
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0)
- dls_devnet_stat_rename(ddp);
+ dls_devnet_stat_rename(ddp, zoneinit);
if (clear_dd_flag) {
mutex_enter(&ddp->dd_mutex);
@@ -1421,7 +1557,8 @@ done:
}
static int
-i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
+i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
+ boolean_t transient)
{
int err;
mac_perim_handle_t mph;
@@ -1454,6 +1591,7 @@ i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
}
if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
ddp->dd_zid = new_zoneid;
+ ddp->dd_transient = transient;
devnet_need_rebuild = B_TRUE;
}
@@ -1468,7 +1606,7 @@ done:
}
int
-dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
+dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
{
dls_devnet_t *ddp;
int err;
@@ -1490,7 +1628,7 @@ dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
refheld = B_TRUE;
}
- if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
+ if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
if (refheld)
dls_devnet_rele(ddp);
return (err);
@@ -1507,7 +1645,7 @@ dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
if (old_zid != GLOBAL_ZONEID)
dls_devnet_stat_destroy(ddh, old_zid);
if (new_zid != GLOBAL_ZONEID)
- dls_devnet_stat_create(ddh, new_zid);
+ dls_devnet_stat_create(ddh, new_zid, new_zid);
return (0);
}
@@ -1545,15 +1683,19 @@ dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
* Access a vanity naming node.
*/
int
-dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
+dls_devnet_open_in_zone(const char *link, dls_dl_handle_t *dhp, dev_t *devp,
+ zoneid_t zid)
{
dls_devnet_t *ddp;
dls_link_t *dlp;
- zoneid_t zid = getzoneid();
+ zoneid_t czid = getzoneid();
int err;
mac_perim_handle_t mph;
- if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
+ if (czid != GLOBAL_ZONEID && czid != zid)
+ return (ENOENT);
+
+ if ((err = dls_devnet_hold_by_name(link, &ddp, zid)) != 0)
return (err);
dls_devnet_prop_task_wait(ddp);
@@ -1586,6 +1728,12 @@ dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
return (0);
}
+int
+dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
+{
+ return (dls_devnet_open_in_zone(link, dhp, devp, getzoneid()));
+}
+
/*
* Close access to a vanity naming node.
*/
diff --git a/usr/src/uts/common/io/dls/dls_stat.c b/usr/src/uts/common/io/dls/dls_stat.c
index 51e4be7260..82dceff278 100644
--- a/usr/src/uts/common/io/dls/dls_stat.c
+++ b/usr/src/uts/common/io/dls/dls_stat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
@@ -30,30 +31,33 @@
#include <sys/dld_impl.h>
#include <sys/mac_ether.h>
-static mac_stat_info_t i_dls_si[] = {
- { MAC_STAT_IFSPEED, "ifspeed", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_BRDCSTRCV, "brdcstrcv", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_MULTIXMT, "multixmt", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_BRDCSTXMT, "brdcstxmt", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_NORCVBUF, "norcvbuf", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_IERRORS, "ierrors", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_NOXMTBUF, "noxmtbuf", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_COLLISIONS, "collisions", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_RBYTES, "rbytes64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_IPACKETS, "ipackets64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_LINK_STATE, "link_state", KSTAT_DATA_UINT32,
- (uint64_t)LINK_STATE_UNKNOWN}
-};
-
-#define STAT_INFO_COUNT (sizeof (i_dls_si) / sizeof (i_dls_si[0]))
+/*
+ * structure for link kstats
+ */
+typedef struct {
+ kstat_named_t dk_ifspeed;
+ kstat_named_t dk_multircv;
+ kstat_named_t dk_brdcstrcv;
+ kstat_named_t dk_multixmt;
+ kstat_named_t dk_brdcstxmt;
+ kstat_named_t dk_norcvbuf;
+ kstat_named_t dk_ierrors;
+ kstat_named_t dk_noxmtbuf;
+ kstat_named_t dk_oerrors;
+ kstat_named_t dk_collisions;
+ kstat_named_t dk_rbytes;
+ kstat_named_t dk_ipackets;
+ kstat_named_t dk_obytes;
+ kstat_named_t dk_opackets;
+ kstat_named_t dk_rbytes64;
+ kstat_named_t dk_ipackets64;
+ kstat_named_t dk_obytes64;
+ kstat_named_t dk_opackets64;
+ kstat_named_t dk_link_state;
+ kstat_named_t dk_link_duplex;
+ kstat_named_t dk_unknowns;
+ kstat_named_t dk_zonename;
+} dls_kstat_t;
/*
* Exported functions.
@@ -61,42 +65,54 @@ static mac_stat_info_t i_dls_si[] = {
int
dls_stat_update(kstat_t *ksp, dls_link_t *dlp, int rw)
{
- kstat_named_t *knp;
- uint_t i;
- uint64_t val;
+ dls_kstat_t *dkp = ksp->ks_data;
if (rw != KSTAT_READ)
return (EACCES);
- knp = (kstat_named_t *)ksp->ks_data;
- for (i = 0; i < STAT_INFO_COUNT; i++) {
- val = mac_stat_get(dlp->dl_mh, i_dls_si[i].msi_stat);
-
- switch (i_dls_si[i].msi_type) {
- case KSTAT_DATA_UINT64:
- knp->value.ui64 = val;
- break;
- case KSTAT_DATA_UINT32:
- knp->value.ui32 = (uint32_t)val;
- break;
- default:
- ASSERT(B_FALSE);
- }
-
- knp++;
- }
+ dkp->dk_ifspeed.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_IFSPEED);
+ dkp->dk_multircv.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_MULTIRCV);
+ dkp->dk_brdcstrcv.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_BRDCSTRCV);
+ dkp->dk_multixmt.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_MULTIXMT);
+ dkp->dk_brdcstxmt.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_BRDCSTXMT);
+ dkp->dk_norcvbuf.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_NORCVBUF);
+ dkp->dk_ierrors.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_IERRORS);
+ dkp->dk_noxmtbuf.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_NOXMTBUF);
+ dkp->dk_oerrors.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_OERRORS);
+ dkp->dk_collisions.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_COLLISIONS);
+ dkp->dk_rbytes.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_RBYTES);
+ dkp->dk_ipackets.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_IPACKETS);
+ dkp->dk_obytes.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_OBYTES);
+ dkp->dk_opackets.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_OPACKETS);
+ dkp->dk_rbytes64.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_RBYTES);
+ dkp->dk_ipackets64.value.ui64 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_IPACKETS);
+ dkp->dk_obytes64.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_OBYTES);
+ dkp->dk_opackets64.value.ui64 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_OPACKETS);
+ dkp->dk_link_state.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_LINK_STATE);
/*
* Ethernet specific kstat "link_duplex"
*/
if (dlp->dl_mip->mi_nativemedia != DL_ETHER) {
- knp->value.ui32 = LINK_DUPLEX_UNKNOWN;
+ dkp->dk_link_duplex.value.ui32 = LINK_DUPLEX_UNKNOWN;
} else {
- val = mac_stat_get(dlp->dl_mh, ETHER_STAT_LINK_DUPLEX);
- knp->value.ui32 = (uint32_t)val;
+ dkp->dk_link_duplex.value.ui32 =
+ (uint32_t)mac_stat_get(dlp->dl_mh, ETHER_STAT_LINK_DUPLEX);
}
- knp++;
- knp->value.ui32 = dlp->dl_unknowns;
+
+ dkp->dk_unknowns.value.ui32 = dlp->dl_unknowns;
return (0);
}
@@ -104,30 +120,66 @@ dls_stat_update(kstat_t *ksp, dls_link_t *dlp, int rw)
int
dls_stat_create(const char *module, int instance, const char *name,
zoneid_t zoneid, int (*update)(struct kstat *, int), void *private,
- kstat_t **kspp)
+ kstat_t **kspp, zoneid_t newzoneid)
{
kstat_t *ksp;
- kstat_named_t *knp;
- uint_t i;
+ zone_t *zone;
+ dls_kstat_t *dkp;
if ((ksp = kstat_create_zone(module, instance, name, "net",
- KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 2, 0, zoneid)) == NULL) {
+ KSTAT_TYPE_NAMED, sizeof (dls_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zoneid)) == NULL) {
return (EINVAL);
}
ksp->ks_update = update;
ksp->ks_private = private;
+ dkp = ksp->ks_data = kmem_zalloc(sizeof (dls_kstat_t), KM_SLEEP);
+ if ((zone = zone_find_by_id(newzoneid)) != NULL) {
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ }
- knp = (kstat_named_t *)ksp->ks_data;
- for (i = 0; i < STAT_INFO_COUNT; i++) {
- kstat_named_init(knp, i_dls_si[i].msi_name,
- i_dls_si[i].msi_type);
- knp++;
+ kstat_named_init(&dkp->dk_ifspeed, "ifspeed", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_multircv, "multircv", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_brdcstrcv, "brdcstrcv", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_multixmt, "multixmt", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_brdcstxmt, "brdcstxmt", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_norcvbuf, "norcvbuf", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_ierrors, "ierrors", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_noxmtbuf, "noxmtbuf", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_oerrors, "oerrors", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_collisions, "collisions", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_rbytes, "rbytes", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_ipackets, "ipackets", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_obytes, "obytes", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_opackets, "opackets", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_rbytes64, "rbytes64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_ipackets64, "ipackets64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_obytes64, "obytes64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_opackets64, "opackets64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_link_state, "link_state", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_link_duplex, "link_duplex",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_unknowns, "unknowns", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_zonename, "zonename", KSTAT_DATA_STRING);
+
+ if (zone != NULL) {
+ kstat_named_setstr(&dkp->dk_zonename, zone->zone_name);
+ zone_rele(zone);
}
- kstat_named_init(knp++, "link_duplex", KSTAT_DATA_UINT32);
- kstat_named_init(knp, "unknowns", KSTAT_DATA_UINT32);
kstat_install(ksp);
*kspp = ksp;
return (0);
}
+
+void
+dls_stat_delete(kstat_t *ksp)
+{
+ void *data;
+ if (ksp != NULL) {
+ data = ksp->ks_data;
+ kstat_delete(ksp);
+ kmem_free(data, sizeof (dls_kstat_t));
+ }
+}
diff --git a/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..00aefb6f51
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE
@@ -0,0 +1,32 @@
+/*
+ * MegaRAID device driver for SAS2.0 controllers
+ * Copyright (c) 2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
diff --git a/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..ac6d2d1b15
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+DR_SAS DRIVER
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.c b/usr/src/uts/common/io/dr_sas/dr_sas.c
new file mode 100644
index 0000000000..5b1dc82938
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.c
@@ -0,0 +1,5506 @@
+/*
+ * dr_sas.c: source for dr_sas driver
+ *
+ * MegaRAID device driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Version:
+ * Author:
+ * Arun Chandrashekhar
+ * Manju R
+ * Rajesh Prabhakaran
+ * Seokmann Ju
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/cmn_err.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#include <sys/pci.h>
+#include <sys/scsi/scsi.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/atomic.h>
+#include <sys/signal.h>
+#include <sys/fs/dv_node.h> /* devfs_clean */
+
+#include "dr_sas.h"
+
+/*
+ * FMA header files
+ */
+#include <sys/ddifm.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
+#include <sys/fm/io/ddi.h>
+
+/*
+ * Local static data
+ */
+static void *drsas_state = NULL;
+static int debug_level_g = CL_NONE;
+
+#pragma weak scsi_hba_open
+#pragma weak scsi_hba_close
+#pragma weak scsi_hba_ioctl
+
+static ddi_dma_attr_t drsas_generic_dma_attr = {
+ DMA_ATTR_V0, /* dma_attr_version */
+ 0, /* low DMA address range */
+ 0xFFFFFFFFU, /* high DMA address range */
+ 0xFFFFFFFFU, /* DMA counter register */
+ 8, /* DMA address alignment */
+ 0x07, /* DMA burstsizes */
+ 1, /* min DMA size */
+ 0xFFFFFFFFU, /* max DMA size */
+ 0xFFFFFFFFU, /* segment boundary */
+ DRSAS_MAX_SGE_CNT, /* dma_attr_sglen */
+ 512, /* granularity of device */
+ 0 /* bus specific DMA flags */
+};
+
+int32_t drsas_max_cap_maxxfer = 0x1000000;
+
+/*
+ * cb_ops contains base level routines
+ */
+static struct cb_ops drsas_cb_ops = {
+ drsas_open, /* open */
+ drsas_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ drsas_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ nodev, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_HOTPLUG, /* cb_flag */
+ CB_REV, /* cb_rev */
+ nodev, /* cb_aread */
+ nodev /* cb_awrite */
+};
+
+/*
+ * dev_ops contains configuration routines
+ */
+static struct dev_ops drsas_ops = {
+ DEVO_REV, /* rev, */
+ 0, /* refcnt */
+ drsas_getinfo, /* getinfo */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ drsas_attach, /* attach */
+ drsas_detach, /* detach */
+ drsas_reset, /* reset */
+ &drsas_cb_ops, /* char/block ops */
+ NULL, /* bus ops */
+ NULL, /* power */
+ ddi_quiesce_not_supported, /* quiesce */
+};
+
+char _depends_on[] = "misc/scsi";
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type - driver */
+ DRSAS_VERSION,
+ &drsas_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, /* ml_rev - must be MODREV_1 */
+ &modldrv, /* ml_linkage */
+ NULL /* end of driver linkage */
+};
+
+static struct ddi_device_acc_attr endian_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for loadable kernel modules *
+ * *
+ * ************************************************************************** *
+ */
+
+int
+_init(void)
+{
+ int ret;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ ret = ddi_soft_state_init(&drsas_state,
+ sizeof (struct drsas_instance), 0);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: could not init state"));
+ return (ret);
+ }
+
+ if ((ret = scsi_hba_init(&modlinkage)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: could not init scsi hba"));
+ ddi_soft_state_fini(&drsas_state);
+ return (ret);
+ }
+
+ ret = mod_install(&modlinkage);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: mod_install failed"));
+ scsi_hba_fini(&modlinkage);
+ ddi_soft_state_fini(&drsas_state);
+ }
+
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if ((ret = mod_remove(&modlinkage)) != DDI_SUCCESS)
+ return (ret);
+
+ scsi_hba_fini(&modlinkage);
+
+ ddi_soft_state_fini(&drsas_state);
+
+ return (ret);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for autoconfiguration *
+ * *
+ * ************************************************************************** *
+ */
+
+static int
+drsas_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance_no;
+ int nregs;
+ uint8_t added_isr_f = 0;
+ uint8_t added_soft_isr_f = 0;
+ uint8_t create_devctl_node_f = 0;
+ uint8_t create_scsi_node_f = 0;
+ uint8_t create_ioc_node_f = 0;
+ uint8_t tran_alloc_f = 0;
+ uint8_t irq;
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t subsysvid;
+ uint16_t subsysid;
+ uint16_t command;
+ off_t reglength = 0;
+ int intr_types = 0;
+ char *data;
+ int msi_enable = 0;
+
+ scsi_hba_tran_t *tran;
+ ddi_dma_attr_t tran_dma_attr;
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* CONSTCOND */
+ ASSERT(NO_COMPETING_THREADS);
+
+ instance_no = ddi_get_instance(dip);
+
+ /*
+ * check to see whether this device is in a DMA-capable slot.
+ */
+ if (ddi_slaveonly(dip) == DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Device in slave-only slot, unused",
+ instance_no));
+ return (DDI_FAILURE);
+ }
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ con_log(CL_DLEVEL1, (CE_NOTE, "dr_sas: DDI_ATTACH"));
+ /* allocate the soft state for the instance */
+ if (ddi_soft_state_zalloc(drsas_state, instance_no)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Failed to allocate soft state",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ instance = (struct drsas_instance *)ddi_get_soft_state
+ (drsas_state, instance_no);
+
+ if (instance == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Bad soft state", instance_no));
+
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ bzero((caddr_t)instance,
+ sizeof (struct drsas_instance));
+
+ instance->func_ptr = kmem_zalloc(
+ sizeof (struct drsas_func_ptr), KM_SLEEP);
+ ASSERT(instance->func_ptr);
+
+ /* Setup the PCI configuration space handles */
+ if (pci_config_setup(dip, &instance->pci_handle) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: pci config setup failed ",
+ instance_no));
+
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_dev_nregs(dip, &nregs) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to get registers."));
+
+ pci_config_teardown(&instance->pci_handle);
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ vendor_id = pci_config_get16(instance->pci_handle,
+ PCI_CONF_VENID);
+ device_id = pci_config_get16(instance->pci_handle,
+ PCI_CONF_DEVID);
+
+ subsysvid = pci_config_get16(instance->pci_handle,
+ PCI_CONF_SUBVENID);
+ subsysid = pci_config_get16(instance->pci_handle,
+ PCI_CONF_SUBSYSID);
+
+ pci_config_put16(instance->pci_handle, PCI_CONF_COMM,
+ (pci_config_get16(instance->pci_handle,
+ PCI_CONF_COMM) | PCI_COMM_ME));
+ irq = pci_config_get8(instance->pci_handle,
+ PCI_CONF_ILINE);
+
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "0x%x:0x%x 0x%x:0x%x, irq:%d drv-ver:%s",
+ instance_no, vendor_id, device_id, subsysvid,
+ subsysid, irq, DRSAS_VERSION));
+
+ /* enable bus-mastering */
+ command = pci_config_get16(instance->pci_handle,
+ PCI_CONF_COMM);
+
+ if (!(command & PCI_COMM_ME)) {
+ command |= PCI_COMM_ME;
+
+ pci_config_put16(instance->pci_handle,
+ PCI_CONF_COMM, command);
+
+ con_log(CL_ANN, (CE_CONT, "dr_sas%d: "
+ "enable bus-mastering", instance_no));
+ } else {
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "bus-mastering already set", instance_no));
+ }
+
+ /* initialize function pointers */
+ if ((device_id == PCI_DEVICE_ID_LSI_2108VDE) ||
+ (device_id == PCI_DEVICE_ID_LSI_2108V)) {
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "2108V/DE detected", instance_no));
+ instance->func_ptr->read_fw_status_reg =
+ read_fw_status_reg_ppc;
+ instance->func_ptr->issue_cmd = issue_cmd_ppc;
+ instance->func_ptr->issue_cmd_in_sync_mode =
+ issue_cmd_in_sync_mode_ppc;
+ instance->func_ptr->issue_cmd_in_poll_mode =
+ issue_cmd_in_poll_mode_ppc;
+ instance->func_ptr->enable_intr =
+ enable_intr_ppc;
+ instance->func_ptr->disable_intr =
+ disable_intr_ppc;
+ instance->func_ptr->intr_ack = intr_ack_ppc;
+ } else {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: Invalid device detected"));
+
+ pci_config_teardown(&instance->pci_handle);
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ instance->baseaddress = pci_config_get32(
+ instance->pci_handle, PCI_CONF_BASE0);
+ instance->baseaddress &= 0x0fffc;
+
+ instance->dip = dip;
+ instance->vendor_id = vendor_id;
+ instance->device_id = device_id;
+ instance->subsysvid = subsysvid;
+ instance->subsysid = subsysid;
+ instance->instance = instance_no;
+
+ /* Initialize FMA */
+ instance->fm_capabilities = ddi_prop_get_int(
+ DDI_DEV_T_ANY, instance->dip, DDI_PROP_DONTPASS,
+ "fm-capable", DDI_FM_EREPORT_CAPABLE |
+ DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE
+ | DDI_FM_ERRCB_CAPABLE);
+
+ drsas_fm_init(instance);
+
+ /* Initialize Interrupts */
+ if ((ddi_dev_regsize(instance->dip,
+ REGISTER_SET_IO_2108, &reglength) != DDI_SUCCESS) ||
+ reglength < MINIMUM_MFI_MEM_SZ) {
+ return (DDI_FAILURE);
+ }
+ if (reglength > DEFAULT_MFI_MEM_SZ) {
+ reglength = DEFAULT_MFI_MEM_SZ;
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "dr_sas: register length to map is "
+ "0x%lx bytes", reglength));
+ }
+ if (ddi_regs_map_setup(instance->dip,
+ REGISTER_SET_IO_2108, &instance->regmap, 0,
+ reglength, &endian_attr, &instance->regmap_handle)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: couldn't map control registers"));
+ goto fail_attach;
+ }
+
+ /*
+ * Disable Interrupt Now.
+ * Setup Software interrupt
+ */
+ instance->func_ptr->disable_intr(instance);
+
+ msi_enable = 0;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, 0,
+ "drsas-enable-msi", &data) == DDI_SUCCESS) {
+ if (strncmp(data, "yes", 3) == 0) {
+ msi_enable = 1;
+ con_log(CL_ANN, (CE_WARN,
+ "msi_enable = %d ENABLED",
+ msi_enable));
+ }
+ ddi_prop_free(data);
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, "msi_enable = %d",
+ msi_enable));
+
+ /* Check for all supported interrupt types */
+ if (ddi_intr_get_supported_types(
+ dip, &intr_types) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "ddi_intr_get_supported_types() failed"));
+ goto fail_attach;
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "ddi_intr_get_supported_types() ret: 0x%x",
+ intr_types));
+
+ /* Initialize and Setup Interrupt handler */
+ if (msi_enable && (intr_types & DDI_INTR_TYPE_MSIX)) {
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_MSIX) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "MSIX interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_MSIX;
+ } else if (msi_enable && (intr_types &
+ DDI_INTR_TYPE_MSI)) {
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_MSI) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "MSI interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_MSI;
+ } else if (intr_types & DDI_INTR_TYPE_FIXED) {
+ msi_enable = 0;
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_FIXED) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "FIXED interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_FIXED;
+ } else {
+ con_log(CL_ANN, (CE_WARN, "Device cannot "
+ "suppport either FIXED or MSI/X "
+ "interrupts"));
+ goto fail_attach;
+ }
+
+ added_isr_f = 1;
+
+ /* setup the mfi based low level driver */
+ if (init_mfi(instance) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: "
+ "could not initialize the low level driver"));
+
+ goto fail_attach;
+ }
+
+ /* Initialize all Mutex */
+ INIT_LIST_HEAD(&instance->completed_pool_list);
+ mutex_init(&instance->completed_pool_mtx,
+ "completed_pool_mtx", MUTEX_DRIVER,
+ DDI_INTR_PRI(instance->intr_pri));
+
+ mutex_init(&instance->int_cmd_mtx, "int_cmd_mtx",
+ MUTEX_DRIVER, DDI_INTR_PRI(instance->intr_pri));
+ cv_init(&instance->int_cmd_cv, NULL, CV_DRIVER, NULL);
+
+ mutex_init(&instance->cmd_pool_mtx, "cmd_pool_mtx",
+ MUTEX_DRIVER, DDI_INTR_PRI(instance->intr_pri));
+
+ /* Register our soft-isr for highlevel interrupts. */
+ instance->isr_level = instance->intr_pri;
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ if (ddi_add_softintr(dip, DDI_SOFTINT_HIGH,
+ &instance->soft_intr_id, NULL, NULL,
+ drsas_softintr, (caddr_t)instance) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ " Software ISR did not register"));
+
+ goto fail_attach;
+ }
+
+ added_soft_isr_f = 1;
+ }
+
+ /* Allocate a transport structure */
+ tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
+
+ if (tran == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "scsi_hba_tran_alloc failed"));
+ goto fail_attach;
+ }
+
+ tran_alloc_f = 1;
+
+ instance->tran = tran;
+
+ tran->tran_hba_private = instance;
+ tran->tran_tgt_init = drsas_tran_tgt_init;
+ tran->tran_tgt_probe = scsi_hba_probe;
+ tran->tran_tgt_free = drsas_tran_tgt_free;
+ tran->tran_init_pkt = drsas_tran_init_pkt;
+ tran->tran_start = drsas_tran_start;
+ tran->tran_abort = drsas_tran_abort;
+ tran->tran_reset = drsas_tran_reset;
+ tran->tran_getcap = drsas_tran_getcap;
+ tran->tran_setcap = drsas_tran_setcap;
+ tran->tran_destroy_pkt = drsas_tran_destroy_pkt;
+ tran->tran_dmafree = drsas_tran_dmafree;
+ tran->tran_sync_pkt = drsas_tran_sync_pkt;
+ tran->tran_bus_config = drsas_tran_bus_config;
+
+ tran_dma_attr = drsas_generic_dma_attr;
+ tran_dma_attr.dma_attr_sgllen = instance->max_num_sge;
+
+ /* Attach this instance of the hba */
+ if (scsi_hba_attach_setup(dip, &tran_dma_attr, tran, 0)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "scsi_hba_attach failed"));
+
+ goto fail_attach;
+ }
+
+ /* create devctl node for cfgadm command */
+ if (ddi_create_minor_node(dip, "devctl",
+ S_IFCHR, INST2DEVCTL(instance_no),
+ DDI_NT_SCSI_NEXUS, 0) == DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create devctl node."));
+
+ goto fail_attach;
+ }
+
+ create_devctl_node_f = 1;
+
+ /* create scsi node for cfgadm command */
+ if (ddi_create_minor_node(dip, "scsi", S_IFCHR,
+ INST2SCSI(instance_no),
+ DDI_NT_SCSI_ATTACHMENT_POINT, 0) ==
+ DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create scsi node."));
+
+ goto fail_attach;
+ }
+
+ create_scsi_node_f = 1;
+
+ (void) sprintf(instance->iocnode, "%d:lsirdctl",
+ instance_no);
+
+ /*
+ * Create a node for applications
+ * for issuing ioctl to the driver.
+ */
+ if (ddi_create_minor_node(dip, instance->iocnode,
+ S_IFCHR, INST2LSIRDCTL(instance_no),
+ DDI_PSEUDO, 0) == DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create ioctl node."));
+
+ goto fail_attach;
+ }
+
+ create_ioc_node_f = 1;
+
+ /* Create a taskq to handle dr events */
+ if ((instance->taskq = ddi_taskq_create(dip,
+ "drsas_dr_taskq", 1,
+ TASKQ_DEFAULTPRI, 0)) == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create taskq "));
+ instance->taskq = NULL;
+ goto fail_attach;
+ }
+
+ /* enable interrupt */
+ instance->func_ptr->enable_intr(instance);
+
+ /* initiate AEN */
+ if (start_mfi_aen(instance)) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to initiate AEN."));
+ goto fail_initiate_aen;
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "AEN started for instance %d.", instance_no));
+
+ /* Finally! We are on the air. */
+ ddi_report_dev(dip);
+
+ if (drsas_check_acc_handle(instance->regmap_handle) !=
+ DDI_SUCCESS) {
+ goto fail_attach;
+ }
+ if (drsas_check_acc_handle(instance->pci_handle) !=
+ DDI_SUCCESS) {
+ goto fail_attach;
+ }
+ instance->dr_ld_list =
+ kmem_zalloc(MRDRV_MAX_LD * sizeof (struct drsas_ld),
+ KM_SLEEP);
+ break;
+ case DDI_PM_RESUME:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: DDI_PM_RESUME"));
+ break;
+ case DDI_RESUME:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: DDI_RESUME"));
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: invalid attach cmd=%x", cmd));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+
+fail_initiate_aen:
+fail_attach:
+ if (create_devctl_node_f) {
+ ddi_remove_minor_node(dip, "devctl");
+ }
+
+ if (create_scsi_node_f) {
+ ddi_remove_minor_node(dip, "scsi");
+ }
+
+ if (create_ioc_node_f) {
+ ddi_remove_minor_node(dip, instance->iocnode);
+ }
+
+ if (tran_alloc_f) {
+ scsi_hba_tran_free(tran);
+ }
+
+
+ if (added_soft_isr_f) {
+ ddi_remove_softintr(instance->soft_intr_id);
+ }
+
+ if (added_isr_f) {
+ drsas_rem_intrs(instance);
+ }
+
+ if (instance && instance->taskq) {
+ ddi_taskq_destroy(instance->taskq);
+ }
+
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+
+ drsas_fm_fini(instance);
+
+ pci_config_teardown(&instance->pci_handle);
+
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: return failure from drsas_attach"));
+
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+drsas_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
+{
+ int rval;
+ int drsas_minor = getminor((dev_t)arg);
+
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ instance = (struct drsas_instance *)
+ ddi_get_soft_state(drsas_state,
+ MINOR2INST(drsas_minor));
+
+ if (instance == NULL) {
+ *resultp = NULL;
+ rval = DDI_FAILURE;
+ } else {
+ *resultp = instance->dip;
+ rval = DDI_SUCCESS;
+ }
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)instance;
+ rval = DDI_SUCCESS;
+ break;
+ default:
+ *resultp = NULL;
+ rval = DDI_FAILURE;
+ }
+
+ return (rval);
+}
+
+static int
+drsas_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance_no;
+
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* CONSTCOND */
+ ASSERT(NO_COMPETING_THREADS);
+
+ instance_no = ddi_get_instance(dip);
+
+ instance = (struct drsas_instance *)ddi_get_soft_state(drsas_state,
+ instance_no);
+
+ if (!instance) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas:%d could not get instance in detach",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas%d: detaching device 0x%4x:0x%4x:0x%4x:0x%4x",
+ instance_no, instance->vendor_id, instance->device_id,
+ instance->subsysvid, instance->subsysid));
+
+ switch (cmd) {
+ case DDI_DETACH:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_DETACH"));
+
+ if (scsi_hba_detach(dip) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas:%d failed to detach",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ scsi_hba_tran_free(instance->tran);
+
+ flush_cache(instance);
+
+ if (abort_aen_cmd(instance, instance->aen_cmd)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_detach: "
+ "failed to abort prevous AEN command"));
+
+ return (DDI_FAILURE);
+ }
+
+ instance->func_ptr->disable_intr(instance);
+
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ ddi_remove_softintr(instance->soft_intr_id);
+ }
+
+ drsas_rem_intrs(instance);
+
+ if (instance->taskq) {
+ ddi_taskq_destroy(instance->taskq);
+ }
+ kmem_free(instance->dr_ld_list, MRDRV_MAX_LD
+ * sizeof (struct drsas_ld));
+ free_space_for_mfi(instance);
+
+ drsas_fm_fini(instance);
+
+ pci_config_teardown(&instance->pci_handle);
+
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+
+ ddi_soft_state_free(drsas_state, instance_no);
+ break;
+ case DDI_PM_SUSPEND:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_PM_SUSPEND"));
+
+ break;
+ case DDI_SUSPEND:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_SUSPEND"));
+
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN,
+ "invalid detach command:0x%x", cmd));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for character driver types *
+ * *
+ * ************************************************************************** *
+ */
+static int
+drsas_open(dev_t *dev, int openflags, int otyp, cred_t *credp)
+{
+ int rval = 0;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* Check root permissions */
+ if (drv_priv(credp) != 0) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: Non-root ioctl access denied!"));
+ return (EPERM);
+ }
+
+ /* Verify we are being opened as a character device */
+ if (otyp != OTYP_CHR) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: ioctl node must be a char node"));
+ return (EINVAL);
+ }
+
+ if (ddi_get_soft_state(drsas_state, MINOR2INST(getminor(*dev)))
+ == NULL) {
+ return (ENXIO);
+ }
+
+ if (scsi_hba_open) {
+ rval = scsi_hba_open(dev, openflags, otyp, credp);
+ }
+
+ return (rval);
+}
+
+static int
+drsas_close(dev_t dev, int openflags, int otyp, cred_t *credp)
+{
+ int rval = 0;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* no need for locks! */
+
+ if (scsi_hba_close) {
+ rval = scsi_hba_close(dev, openflags, otyp, credp);
+ }
+
+ return (rval);
+}
+
+static int
+drsas_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ int rval = 0;
+
+ struct drsas_instance *instance;
+ struct drsas_ioctl *ioctl;
+ struct drsas_aen aen;
+ int i;
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ instance = ddi_get_soft_state(drsas_state, MINOR2INST(getminor(dev)));
+
+ if (instance == NULL) {
+ /* invalid minor number */
+ con_log(CL_ANN, (CE_WARN, "dr_sas: adapter not found."));
+ return (ENXIO);
+ }
+
+ ioctl = (struct drsas_ioctl *)kmem_zalloc(sizeof (struct drsas_ioctl),
+ KM_SLEEP);
+ ASSERT(ioctl);
+
+ switch ((uint_t)cmd) {
+ case DRSAS_IOCTL_FIRMWARE:
+ for (i = 0; i < sizeof (struct drsas_ioctl); i++) {
+ if (ddi_copyin((uint8_t *)arg+i,
+ (uint8_t *)ioctl+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_ioctl "
+ "ERROR IOCTL copyin"));
+ kmem_free(ioctl,
+ sizeof (struct drsas_ioctl));
+ return (EFAULT);
+ }
+ }
+ if (ioctl->control_code == DRSAS_DRIVER_IOCTL_COMMON) {
+ rval = handle_drv_ioctl(instance, ioctl, mode);
+ } else {
+ rval = handle_mfi_ioctl(instance, ioctl, mode);
+ }
+ for (i = 0; i < sizeof (struct drsas_ioctl) - 1; i++) {
+ if (ddi_copyout((uint8_t *)ioctl+i,
+ (uint8_t *)arg+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: ddi_copyout "
+ "failed"));
+ rval = 1;
+ break;
+ }
+ }
+
+ break;
+ case DRSAS_IOCTL_AEN:
+ for (i = 0; i < sizeof (struct drsas_aen); i++) {
+ if (ddi_copyin((uint8_t *)arg+i,
+ (uint8_t *)&aen+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: "
+ "ERROR AEN copyin"));
+ kmem_free(ioctl,
+ sizeof (struct drsas_ioctl));
+ return (EFAULT);
+ }
+ }
+
+ rval = handle_mfi_aen(instance, &aen);
+ for (i = 0; i < sizeof (struct drsas_aen); i++) {
+ if (ddi_copyout((uint8_t *)&aen + i,
+ (uint8_t *)arg + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: "
+ "ddi_copyout failed"));
+ rval = 1;
+ break;
+ }
+ }
+
+ break;
+ default:
+ rval = scsi_hba_ioctl(dev, cmd, arg,
+ mode, credp, rvalp);
+
+ con_log(CL_DLEVEL1, (CE_NOTE, "drsas_ioctl: "
+ "scsi_hba_ioctl called, ret = %x.", rval));
+ }
+
+ kmem_free(ioctl, sizeof (struct drsas_ioctl));
+ return (rval);
+}
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for block driver types *
+ * *
+ * ************************************************************************** *
+ */
+/*ARGSUSED*/
+static int
+drsas_reset(dev_info_t *dip, ddi_reset_cmd_t cmd)
+{
+ int instance_no;
+
+ struct drsas_instance *instance;
+
+ instance_no = ddi_get_instance(dip);
+ instance = (struct drsas_instance *)ddi_get_soft_state
+ (drsas_state, instance_no);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (!instance) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas:%d could not get adapter "
+ "in reset", instance_no));
+ return (DDI_FAILURE);
+ }
+
+ instance->func_ptr->disable_intr(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "flushing cache for instance %d",
+ instance_no));
+
+ flush_cache(instance);
+
+ return (DDI_SUCCESS);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * entry points (SCSI HBA) *
+ * *
+ * ************************************************************************** *
+ */
+/*ARGSUSED*/
+static int
+drsas_tran_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
+ scsi_hba_tran_t *tran, struct scsi_device *sd)
+{
+ struct drsas_instance *instance;
+ uint16_t tgt = sd->sd_address.a_target;
+ uint8_t lun = sd->sd_address.a_lun;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init target %d lun %d",
+ tgt, lun));
+
+ instance = ADDR2MR(&sd->sd_address);
+
+ if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
+ (void) ndi_merge_node(tgt_dip, drsas_name_node);
+ ddi_set_name_addr(tgt_dip, NULL);
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init in "
+ "ndi_dev_is_persistent_node DDI_FAILURE t = %d l = %d",
+ tgt, lun));
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init dev_dip %p tgt_dip %p",
+ (void *)instance->dr_ld_list[tgt].dip, (void *)tgt_dip));
+
+ if (tgt < MRDRV_MAX_LD && lun == 0) {
+ if (instance->dr_ld_list[tgt].dip == NULL &&
+ strcmp(ddi_driver_name(sd->sd_dev), "sd") == 0) {
+ instance->dr_ld_list[tgt].dip = tgt_dip;
+ instance->dr_ld_list[tgt].lun_type = DRSAS_LD_LUN;
+ }
+ }
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
+ scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
+{
+ struct drsas_instance *instance;
+ int tgt = sd->sd_address.a_target;
+ int lun = sd->sd_address.a_lun;
+
+ instance = ADDR2MR(&sd->sd_address);
+
+ con_log(CL_ANN1, (CE_NOTE, "tgt_free t = %d l = %d", tgt, lun));
+
+ if (tgt < MRDRV_MAX_LD && lun == 0) {
+ if (instance->dr_ld_list[tgt].dip == tgt_dip) {
+ instance->dr_ld_list[tgt].dip = NULL;
+ }
+ }
+}
+
+static dev_info_t *
+drsas_find_child(struct drsas_instance *instance, uint16_t tgt, uint8_t lun)
+{
+ dev_info_t *child = NULL;
+ char addr[SCSI_MAXNAMELEN];
+ char tmp[MAXNAMELEN];
+
+ (void) sprintf(addr, "%x,%x", tgt, lun);
+ for (child = ddi_get_child(instance->dip); child;
+ child = ddi_get_next_sibling(child)) {
+
+ if (drsas_name_node(child, tmp, MAXNAMELEN) !=
+ DDI_SUCCESS) {
+ continue;
+ }
+
+ if (strcmp(addr, tmp) == 0) {
+ break;
+ }
+ }
+ con_log(CL_ANN1, (CE_NOTE, "drsas_find_child: return child = %p",
+ (void *)child));
+ return (child);
+}
+
+static int
+drsas_name_node(dev_info_t *dip, char *name, int len)
+{
+ int tgt, lun;
+
+ tgt = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "target", -1);
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_name_node: dip %p tgt %d", (void *)dip, tgt));
+ if (tgt == -1) {
+ return (DDI_FAILURE);
+ }
+ lun = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "lun", -1);
+ con_log(CL_ANN1,
+ (CE_NOTE, "drsas_name_node: tgt %d lun %d", tgt, lun));
+ if (lun == -1) {
+ return (DDI_FAILURE);
+ }
+ (void) snprintf(name, len, "%x,%x", tgt, lun);
+ return (DDI_SUCCESS);
+}
+
+static struct scsi_pkt *
+drsas_tran_init_pkt(struct scsi_address *ap, register struct scsi_pkt *pkt,
+ struct buf *bp, int cmdlen, int statuslen, int tgtlen,
+ int flags, int (*callback)(), caddr_t arg)
+{
+ struct scsa_cmd *acmd;
+ struct drsas_instance *instance;
+ struct scsi_pkt *new_pkt;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ instance = ADDR2MR(ap);
+
+ /* step #1 : pkt allocation */
+ if (pkt == NULL) {
+ pkt = scsi_hba_pkt_alloc(instance->dip, ap, cmdlen, statuslen,
+ tgtlen, sizeof (struct scsa_cmd), callback, arg);
+ if (pkt == NULL) {
+ return (NULL);
+ }
+
+ acmd = PKT2CMD(pkt);
+
+ /*
+ * Initialize the new pkt - we redundantly initialize
+ * all the fields for illustrative purposes.
+ */
+ acmd->cmd_pkt = pkt;
+ acmd->cmd_flags = 0;
+ acmd->cmd_scblen = statuslen;
+ acmd->cmd_cdblen = cmdlen;
+ acmd->cmd_dmahandle = NULL;
+ acmd->cmd_ncookies = 0;
+ acmd->cmd_cookie = 0;
+ acmd->cmd_cookiecnt = 0;
+ acmd->cmd_nwin = 0;
+
+ pkt->pkt_address = *ap;
+ pkt->pkt_comp = (void (*)())NULL;
+ pkt->pkt_flags = 0;
+ pkt->pkt_time = 0;
+ pkt->pkt_resid = 0;
+ pkt->pkt_state = 0;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_reason = 0;
+ new_pkt = pkt;
+ } else {
+ acmd = PKT2CMD(pkt);
+ new_pkt = NULL;
+ }
+
+ /* step #2 : dma allocation/move */
+ if (bp && bp->b_bcount != 0) {
+ if (acmd->cmd_dmahandle == NULL) {
+ if (drsas_dma_alloc(instance, pkt, bp, flags,
+ callback) == DDI_FAILURE) {
+ if (new_pkt) {
+ scsi_hba_pkt_free(ap, new_pkt);
+ }
+ return ((struct scsi_pkt *)NULL);
+ }
+ } else {
+ if (drsas_dma_move(instance, pkt, bp) == DDI_FAILURE) {
+ return ((struct scsi_pkt *)NULL);
+ }
+ }
+ }
+
+ return (pkt);
+}
+
+static int
+drsas_tran_start(struct scsi_address *ap, register struct scsi_pkt *pkt)
+{
+ uchar_t cmd_done = 0;
+
+ struct drsas_instance *instance = ADDR2MR(ap);
+ struct drsas_cmd *cmd;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d:SCSI CDB[0]=0x%x",
+ __func__, __LINE__, pkt->pkt_cdbp[0]));
+
+ pkt->pkt_reason = CMD_CMPLT;
+ *pkt->pkt_scbp = STATUS_GOOD; /* clear arq scsi_status */
+
+ cmd = build_cmd(instance, ap, pkt, &cmd_done);
+
+ /*
+ * Check if the command is already completed by the drsas_build_cmd()
+ * routine. In which case the busy_flag would be clear and scb will be
+ * NULL and appropriate reason provided in pkt_reason field
+ */
+ if (cmd_done) {
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET
+ | STATE_SENT_CMD;
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) && pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ return (TRAN_ACCEPT);
+ }
+
+ if (cmd == NULL) {
+ return (TRAN_BUSY);
+ }
+
+ if ((pkt->pkt_flags & FLAG_NOINTR) == 0) {
+ if (instance->fw_outstanding > instance->max_fw_cmds) {
+ con_log(CL_ANN, (CE_CONT, "dr_sas:Firmware busy"));
+ return_mfi_pkt(instance, cmd);
+ return (TRAN_BUSY);
+ }
+
+ /* Synchronize the Cmd frame for the controller */
+ (void) ddi_dma_sync(cmd->frame_dma_obj.dma_handle, 0, 0,
+ DDI_DMA_SYNC_FORDEV);
+
+ instance->func_ptr->issue_cmd(cmd, instance);
+
+ } else {
+ struct drsas_header *hdr = &cmd->frame->hdr;
+
+ cmd->sync_cmd = DRSAS_TRUE;
+
+ instance->func_ptr-> issue_cmd_in_poll_mode(instance, cmd);
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_state |= STATE_XFERRED_DATA | STATE_GOT_STATUS;
+
+ switch (ddi_get8(cmd->frame_dma_obj.acc_handle,
+ &hdr->cmd_status)) {
+ case MFI_STAT_OK:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+
+ case MFI_STAT_SCSI_DONE_WITH_ERROR:
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+
+ ((struct scsi_status *)pkt->pkt_scbp)->sts_chk = 1;
+ break;
+
+ case MFI_STAT_DEVICE_NOT_FOUND:
+ pkt->pkt_reason = CMD_DEV_GONE;
+ pkt->pkt_statistics = STAT_DISCON;
+ break;
+
+ default:
+ ((struct scsi_status *)pkt->pkt_scbp)->sts_busy = 1;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ (void) drsas_common_check(instance, cmd);
+
+ if (pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ }
+
+ return (TRAN_ACCEPT);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* abort command not supported by H/W */
+
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_reset(struct scsi_address *ap, int level)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* reset command not supported by H/W */
+
+ return (DDI_FAILURE);
+
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_getcap(struct scsi_address *ap, char *cap, int whom)
+{
+ int rval = 0;
+
+ struct drsas_instance *instance = ADDR2MR(ap);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* we do allow inquiring about capabilities for other targets */
+ if (cap == NULL) {
+ return (-1);
+ }
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_DMA_MAX:
+ /* Limit to 16MB max transfer */
+ rval = drsas_max_cap_maxxfer;
+ break;
+ case SCSI_CAP_MSG_OUT:
+ rval = 1;
+ break;
+ case SCSI_CAP_DISCONNECT:
+ rval = 0;
+ break;
+ case SCSI_CAP_SYNCHRONOUS:
+ rval = 0;
+ break;
+ case SCSI_CAP_WIDE_XFER:
+ rval = 1;
+ break;
+ case SCSI_CAP_TAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_UNTAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_PARITY:
+ rval = 1;
+ break;
+ case SCSI_CAP_INITIATOR_ID:
+ rval = instance->init_id;
+ break;
+ case SCSI_CAP_ARQ:
+ rval = 1;
+ break;
+ case SCSI_CAP_LINKED_CMDS:
+ rval = 0;
+ break;
+ case SCSI_CAP_RESET_NOTIFICATION:
+ rval = 1;
+ break;
+ case SCSI_CAP_GEOMETRY:
+ rval = -1;
+
+ break;
+ default:
+ con_log(CL_DLEVEL2, (CE_NOTE, "Default cap coming 0x%x",
+ scsi_hba_lookup_capstr(cap)));
+ rval = -1;
+ break;
+ }
+
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_setcap(struct scsi_address *ap, char *cap, int value, int whom)
+{
+ int rval = 1;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* We don't allow setting capabilities for other targets */
+ if (cap == NULL || whom == 0) {
+ return (-1);
+ }
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_DMA_MAX:
+ case SCSI_CAP_MSG_OUT:
+ case SCSI_CAP_PARITY:
+ case SCSI_CAP_LINKED_CMDS:
+ case SCSI_CAP_RESET_NOTIFICATION:
+ case SCSI_CAP_DISCONNECT:
+ case SCSI_CAP_SYNCHRONOUS:
+ case SCSI_CAP_UNTAGGED_QING:
+ case SCSI_CAP_WIDE_XFER:
+ case SCSI_CAP_INITIATOR_ID:
+ case SCSI_CAP_ARQ:
+ /*
+ * None of these are settable via
+ * the capability interface.
+ */
+ break;
+ case SCSI_CAP_TAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_SECTOR_SIZE:
+ rval = 1;
+ break;
+
+ case SCSI_CAP_TOTAL_SECTORS:
+ rval = 1;
+ break;
+ default:
+ rval = -1;
+ break;
+ }
+
+ return (rval);
+}
+
+static void
+drsas_tran_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+
+ (void) ddi_dma_unbind_handle(acmd->cmd_dmahandle);
+
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+
+ acmd->cmd_dmahandle = NULL;
+ }
+
+ /* free the pkt */
+ scsi_hba_pkt_free(ap, pkt);
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ register struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+
+ (void) ddi_dma_unbind_handle(acmd->cmd_dmahandle);
+
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+
+ acmd->cmd_dmahandle = NULL;
+ }
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ register struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle, acmd->cmd_dma_offset,
+ acmd->cmd_dma_len, (acmd->cmd_flags & CFLAG_DMASEND) ?
+ DDI_DMA_SYNC_FORDEV : DDI_DMA_SYNC_FORCPU);
+ }
+}
+
+/*
+ * drsas_isr(caddr_t)
+ *
+ * The Interrupt Service Routine
+ *
+ * Collect status for all completed commands and do callback
+ *
+ */
+static uint_t
+drsas_isr(struct drsas_instance *instance)
+{
+ int need_softintr;
+ uint32_t producer;
+ uint32_t consumer;
+ uint32_t context;
+
+ struct drsas_cmd *cmd;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ ASSERT(instance);
+ if ((instance->intr_type == DDI_INTR_TYPE_FIXED) &&
+ !instance->func_ptr->intr_ack(instance)) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ (void) ddi_dma_sync(instance->mfi_internal_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORCPU);
+
+ if (drsas_check_dma_handle(instance->mfi_internal_dma_obj.dma_handle)
+ != DDI_SUCCESS) {
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ producer = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->producer);
+ consumer = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->consumer);
+
+ con_log(CL_ANN1, (CE_CONT, " producer %x consumer %x ",
+ producer, consumer));
+ if (producer == consumer) {
+ con_log(CL_ANN1, (CE_WARN, "producer = consumer case"));
+ return (DDI_INTR_UNCLAIMED);
+ }
+ mutex_enter(&instance->completed_pool_mtx);
+
+ while (consumer != producer) {
+ context = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ &instance->reply_queue[consumer]);
+ cmd = instance->cmd_list[context];
+ mlist_add_tail(&cmd->list, &instance->completed_pool_list);
+
+ consumer++;
+ if (consumer == (instance->max_fw_cmds + 1)) {
+ consumer = 0;
+ }
+ }
+
+ mutex_exit(&instance->completed_pool_mtx);
+
+ ddi_put32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->consumer, consumer);
+ (void) ddi_dma_sync(instance->mfi_internal_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORDEV);
+
+ if (instance->softint_running) {
+ need_softintr = 0;
+ } else {
+ need_softintr = 1;
+ }
+
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ if (need_softintr) {
+ ddi_trigger_softintr(instance->soft_intr_id);
+ }
+ } else {
+ /*
+ * Not a high-level interrupt, therefore call the soft level
+ * interrupt explicitly
+ */
+ (void) drsas_softintr(instance);
+ }
+
+ return (DDI_INTR_CLAIMED);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * libraries *
+ * *
+ * ************************************************************************** *
+ */
+/*
+ * get_mfi_pkt : Get a command from the free pool
+ * After successful allocation, the caller of this routine
+ * must clear the frame buffer (memset to zero) before
+ * using the packet further.
+ *
+ * ***** Note *****
+ * After clearing the frame buffer the context id of the
+ * frame buffer SHOULD be restored back.
+ */
+static struct drsas_cmd *
+get_mfi_pkt(struct drsas_instance *instance)
+{
+ mlist_t *head = &instance->cmd_pool_list;
+ struct drsas_cmd *cmd = NULL;
+
+ mutex_enter(&instance->cmd_pool_mtx);
+ ASSERT(mutex_owned(&instance->cmd_pool_mtx));
+
+ if (!mlist_empty(head)) {
+ cmd = mlist_entry(head->next, struct drsas_cmd, list);
+ mlist_del_init(head->next);
+ }
+ if (cmd != NULL)
+ cmd->pkt = NULL;
+ mutex_exit(&instance->cmd_pool_mtx);
+
+ return (cmd);
+}
+
+/*
+ * return_mfi_pkt : Return a cmd to free command pool
+ */
+static void
+return_mfi_pkt(struct drsas_instance *instance, struct drsas_cmd *cmd)
+{
+ mutex_enter(&instance->cmd_pool_mtx);
+ ASSERT(mutex_owned(&instance->cmd_pool_mtx));
+
+ mlist_add(&cmd->list, &instance->cmd_pool_list);
+
+ mutex_exit(&instance->cmd_pool_mtx);
+}
+
+/*
+ * destroy_mfi_frame_pool
+ */
+static void
+destroy_mfi_frame_pool(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ struct drsas_cmd *cmd;
+
+ /* return all frames to pool */
+ for (i = 0; i < max_cmd+1; i++) {
+
+ cmd = instance->cmd_list[i];
+
+ if (cmd->frame_dma_obj_status == DMA_OBJ_ALLOCATED)
+ (void) drsas_free_dma_obj(instance, cmd->frame_dma_obj);
+
+ cmd->frame_dma_obj_status = DMA_OBJ_FREED;
+ }
+
+}
+
+/*
+ * create_mfi_frame_pool
+ */
+static int
+create_mfi_frame_pool(struct drsas_instance *instance)
+{
+ int i = 0;
+ int cookie_cnt;
+ uint16_t max_cmd;
+ uint16_t sge_sz;
+ uint32_t sgl_sz;
+ uint32_t tot_frame_size;
+
+ struct drsas_cmd *cmd;
+
+ max_cmd = instance->max_fw_cmds;
+
+ sge_sz = sizeof (struct drsas_sge64);
+
+ /* calculated the number of 64byte frames required for SGL */
+ sgl_sz = sge_sz * instance->max_num_sge;
+ tot_frame_size = sgl_sz + MRMFI_FRAME_SIZE + SENSE_LENGTH;
+
+ con_log(CL_DLEVEL3, (CE_NOTE, "create_mfi_frame_pool: "
+ "sgl_sz %x tot_frame_size %x", sgl_sz, tot_frame_size));
+
+ while (i < max_cmd+1) {
+ cmd = instance->cmd_list[i];
+
+ cmd->frame_dma_obj.size = tot_frame_size;
+ cmd->frame_dma_obj.dma_attr = drsas_generic_dma_attr;
+ cmd->frame_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ cmd->frame_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ cmd->frame_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ cmd->frame_dma_obj.dma_attr.dma_attr_align = 64;
+
+
+ cookie_cnt = drsas_alloc_dma_obj(instance, &cmd->frame_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC);
+
+ if (cookie_cnt == -1 || cookie_cnt > 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "create_mfi_frame_pool: could not alloc."));
+ return (DDI_FAILURE);
+ }
+
+ bzero(cmd->frame_dma_obj.buffer, tot_frame_size);
+
+ cmd->frame_dma_obj_status = DMA_OBJ_ALLOCATED;
+ cmd->frame = (union drsas_frame *)cmd->frame_dma_obj.buffer;
+ cmd->frame_phys_addr =
+ cmd->frame_dma_obj.dma_cookie[0].dmac_address;
+
+ cmd->sense = (uint8_t *)(((unsigned long)
+ cmd->frame_dma_obj.buffer) +
+ tot_frame_size - SENSE_LENGTH);
+ cmd->sense_phys_addr =
+ cmd->frame_dma_obj.dma_cookie[0].dmac_address +
+ tot_frame_size - SENSE_LENGTH;
+
+ if (!cmd->frame || !cmd->sense) {
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: pci_pool_alloc failed"));
+
+ return (ENOMEM);
+ }
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->io.context, cmd->index);
+ i++;
+
+ con_log(CL_DLEVEL3, (CE_NOTE, "[%x]-%x",
+ cmd->index, cmd->frame_phys_addr));
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * free_additional_dma_buffer
+ */
+static void
+free_additional_dma_buffer(struct drsas_instance *instance)
+{
+ if (instance->mfi_internal_dma_obj.status == DMA_OBJ_ALLOCATED) {
+ (void) drsas_free_dma_obj(instance,
+ instance->mfi_internal_dma_obj);
+ instance->mfi_internal_dma_obj.status = DMA_OBJ_FREED;
+ }
+
+ if (instance->mfi_evt_detail_obj.status == DMA_OBJ_ALLOCATED) {
+ (void) drsas_free_dma_obj(instance,
+ instance->mfi_evt_detail_obj);
+ instance->mfi_evt_detail_obj.status = DMA_OBJ_FREED;
+ }
+}
+
+/*
+ * alloc_additional_dma_buffer
+ */
+static int
+alloc_additional_dma_buffer(struct drsas_instance *instance)
+{
+ uint32_t reply_q_sz;
+ uint32_t internal_buf_size = PAGESIZE*2;
+
+ /* max cmds plus 1 + producer & consumer */
+ reply_q_sz = sizeof (uint32_t) * (instance->max_fw_cmds + 1 + 2);
+
+ instance->mfi_internal_dma_obj.size = internal_buf_size;
+ instance->mfi_internal_dma_obj.dma_attr = drsas_generic_dma_attr;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_count_max =
+ 0xFFFFFFFFU;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_sgllen = 1;
+
+ if (drsas_alloc_dma_obj(instance, &instance->mfi_internal_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: could not alloc reply queue"));
+ return (DDI_FAILURE);
+ }
+
+ bzero(instance->mfi_internal_dma_obj.buffer, internal_buf_size);
+
+ instance->mfi_internal_dma_obj.status |= DMA_OBJ_ALLOCATED;
+
+ instance->producer = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer);
+ instance->consumer = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer + 4);
+ instance->reply_queue = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer + 8);
+ instance->internal_buf = (caddr_t)(((unsigned long)
+ instance->mfi_internal_dma_obj.buffer) + reply_q_sz + 8);
+ instance->internal_buf_dmac_add =
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address +
+ (reply_q_sz + 8);
+ instance->internal_buf_size = internal_buf_size -
+ (reply_q_sz + 8);
+
+ /* allocate evt_detail */
+ instance->mfi_evt_detail_obj.size = sizeof (struct drsas_evt_detail);
+ instance->mfi_evt_detail_obj.dma_attr = drsas_generic_dma_attr;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_sgllen = 1;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_align = 1;
+
+ if (drsas_alloc_dma_obj(instance, &instance->mfi_evt_detail_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "alloc_additional_dma_buffer: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ bzero(instance->mfi_evt_detail_obj.buffer,
+ sizeof (struct drsas_evt_detail));
+
+ instance->mfi_evt_detail_obj.status |= DMA_OBJ_ALLOCATED;
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * free_space_for_mfi
+ */
+static void
+free_space_for_mfi(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ /* already freed */
+ if (instance->cmd_list == NULL) {
+ return;
+ }
+
+ free_additional_dma_buffer(instance);
+
+ /* first free the MFI frame pool */
+ destroy_mfi_frame_pool(instance);
+
+ /* free all the commands in the cmd_list */
+ for (i = 0; i < instance->max_fw_cmds+1; i++) {
+ kmem_free(instance->cmd_list[i],
+ sizeof (struct drsas_cmd));
+
+ instance->cmd_list[i] = NULL;
+ }
+
+ /* free the cmd_list buffer itself */
+ kmem_free(instance->cmd_list,
+ sizeof (struct drsas_cmd *) * (max_cmd+1));
+
+ instance->cmd_list = NULL;
+
+ INIT_LIST_HEAD(&instance->cmd_pool_list);
+}
+
+/*
+ * alloc_space_for_mfi
+ */
+static int
+alloc_space_for_mfi(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd;
+ size_t sz;
+
+ struct drsas_cmd *cmd;
+
+ max_cmd = instance->max_fw_cmds;
+
+ /* reserve 1 more slot for flush_cache */
+ sz = sizeof (struct drsas_cmd *) * (max_cmd+1);
+
+ /*
+ * instance->cmd_list is an array of struct drsas_cmd pointers.
+ * Allocate the dynamic array first and then allocate individual
+ * commands.
+ */
+ instance->cmd_list = kmem_zalloc(sz, KM_SLEEP);
+ ASSERT(instance->cmd_list);
+
+ for (i = 0; i < max_cmd+1; i++) {
+ instance->cmd_list[i] = kmem_zalloc(sizeof (struct drsas_cmd),
+ KM_SLEEP);
+ ASSERT(instance->cmd_list[i]);
+ }
+
+ INIT_LIST_HEAD(&instance->cmd_pool_list);
+
+ /* add all the commands to command pool (instance->cmd_pool) */
+ for (i = 0; i < max_cmd; i++) {
+ cmd = instance->cmd_list[i];
+ cmd->index = i;
+
+ mlist_add_tail(&cmd->list, &instance->cmd_pool_list);
+ }
+
+ /* single slot for flush_cache won't be added in command pool */
+ cmd = instance->cmd_list[max_cmd];
+ cmd->index = i;
+
+ /* create a frame pool and assign one frame to each cmd */
+ if (create_mfi_frame_pool(instance)) {
+ con_log(CL_ANN, (CE_NOTE, "error creating frame DMA pool"));
+ return (DDI_FAILURE);
+ }
+
+ /* create a frame pool and assign one frame to each cmd */
+ if (alloc_additional_dma_buffer(instance)) {
+ con_log(CL_ANN, (CE_NOTE, "error creating frame DMA pool"));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * get_ctrl_info
+ */
+static int
+get_ctrl_info(struct drsas_instance *instance,
+ struct drsas_ctrl_info *ctrl_info)
+{
+ int ret = 0;
+
+ struct drsas_cmd *cmd;
+ struct drsas_dcmd_frame *dcmd;
+ struct drsas_ctrl_info *ci;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to get a cmd for ctrl info"));
+ return (DDI_FAILURE);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ ci = (struct drsas_ctrl_info *)instance->internal_buf;
+
+ if (!ci) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to alloc mem for ctrl info"));
+ return_mfi_pkt(instance, cmd);
+ return (DDI_FAILURE);
+ }
+
+ (void) memset(ci, 0, sizeof (struct drsas_ctrl_info));
+
+ /* for( i = 0; i < DCMD_MBOX_SZ; i++ ) dcmd->mbox.b[i] = 0; */
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_ctrl_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_GET_INFO);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ instance->internal_buf_dmac_add);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_ctrl_info));
+
+ cmd->frame_count = 1;
+
+ if (!instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ ret = 0;
+ ddi_rep_get8(cmd->frame_dma_obj.acc_handle,
+ (uint8_t *)ctrl_info, (uint8_t *)ci,
+ sizeof (struct drsas_ctrl_info), DDI_DEV_AUTOINCR);
+ } else {
+ con_log(CL_ANN, (CE_WARN, "get_ctrl_info: Ctrl info failed"));
+ ret = -1;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ ret = -1;
+ }
+
+ return (ret);
+}
+
+/*
+ * abort_aen_cmd
+ */
+static int
+abort_aen_cmd(struct drsas_instance *instance,
+ struct drsas_cmd *cmd_to_abort)
+{
+ int ret = 0;
+
+ struct drsas_cmd *cmd;
+ struct drsas_abort_frame *abort_fr;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to get a cmd for ctrl info"));
+ return (DDI_FAILURE);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ abort_fr = &cmd->frame->abort;
+
+ /* prepare and issue the abort frame */
+ ddi_put8(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->cmd, MFI_CMD_OP_ABORT);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &abort_fr->cmd_status,
+ MFI_CMD_STATUS_SYNC_MODE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &abort_fr->flags, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &abort_fr->abort_context,
+ cmd_to_abort->index);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->abort_mfi_phys_addr_lo, cmd_to_abort->frame_phys_addr);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->abort_mfi_phys_addr_hi, 0);
+
+ instance->aen_cmd->abort_aen = 1;
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "abort_aen_cmd: issue_cmd_in_sync_mode failed"));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ instance->aen_cmd->abort_aen = 1;
+ instance->aen_cmd = 0;
+
+ return_mfi_pkt(instance, cmd);
+ (void) drsas_common_check(instance, cmd);
+
+ return (ret);
+}
+
+/*
+ * init_mfi
+ */
+static int
+init_mfi(struct drsas_instance *instance)
+{
+ struct drsas_cmd *cmd;
+ struct drsas_ctrl_info ctrl_info;
+ struct drsas_init_frame *init_frame;
+ struct drsas_init_queue_info *initq_info;
+
+ /* we expect the FW state to be READY */
+ if (mfi_state_transition_to_ready(instance)) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: F/W is not ready"));
+ goto fail_ready_state;
+ }
+
+ /* get various operational parameters from status register */
+ instance->max_num_sge =
+ (instance->func_ptr->read_fw_status_reg(instance) &
+ 0xFF0000) >> 0x10;
+ /*
+ * Reduce the max supported cmds by 1. This is to ensure that the
+ * reply_q_sz (1 more than the max cmd that driver may send)
+ * does not exceed max cmds that the FW can support
+ */
+ instance->max_fw_cmds =
+ instance->func_ptr->read_fw_status_reg(instance) & 0xFFFF;
+ instance->max_fw_cmds = instance->max_fw_cmds - 1;
+
+ instance->max_num_sge =
+ (instance->max_num_sge > DRSAS_MAX_SGE_CNT) ?
+ DRSAS_MAX_SGE_CNT : instance->max_num_sge;
+
+ /* create a pool of commands */
+ if (alloc_space_for_mfi(instance) != DDI_SUCCESS)
+ goto fail_alloc_fw_space;
+
+ /*
+ * Prepare a init frame. Note the init frame points to queue info
+ * structure. Each frame has SGL allocated after first 64 bytes. For
+ * this frame - since we don't need any SGL - we use SGL's space as
+ * queue info structure
+ */
+ cmd = get_mfi_pkt(instance);
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ init_frame = (struct drsas_init_frame *)cmd->frame;
+ initq_info = (struct drsas_init_queue_info *)
+ ((unsigned long)init_frame + 64);
+
+ (void) memset(init_frame, 0, MRMFI_FRAME_SIZE);
+ (void) memset(initq_info, 0, sizeof (struct drsas_init_queue_info));
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &initq_info->init_flags, 0);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_entries, instance->max_fw_cmds + 1);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->producer_index_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->producer_index_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->consumer_index_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->consumer_index_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address + 4);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_start_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_start_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address + 8);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle,
+ &init_frame->cmd, MFI_CMD_OP_INIT);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &init_frame->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &init_frame->flags, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &init_frame->queue_info_new_phys_addr_lo,
+ cmd->frame_phys_addr + 64);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &init_frame->queue_info_new_phys_addr_hi, 0);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &init_frame->data_xfer_len,
+ sizeof (struct drsas_init_queue_info));
+
+ cmd->frame_count = 1;
+
+ /* issue the init frame in polled mode */
+ if (instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "failed to init firmware"));
+ goto fail_fw_init;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ goto fail_fw_init;
+ }
+
+ /* gather misc FW related information */
+ if (!get_ctrl_info(instance, &ctrl_info)) {
+ instance->max_sectors_per_req = ctrl_info.max_request_size;
+ con_log(CL_ANN1, (CE_NOTE, "product name %s ld present %d",
+ ctrl_info.product_name, ctrl_info.ld_present_count));
+ } else {
+ instance->max_sectors_per_req = instance->max_num_sge *
+ PAGESIZE / 512;
+ }
+
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ goto fail_fw_init;
+ }
+
+ return (DDI_SUCCESS);
+
+fail_fw_init:
+fail_alloc_fw_space:
+
+ free_space_for_mfi(instance);
+
+fail_ready_state:
+ ddi_regs_map_free(&instance->regmap_handle);
+
+fail_mfi_reg_setup:
+ return (DDI_FAILURE);
+}
+
+/*
+ * mfi_state_transition_to_ready : Move the FW to READY state
+ *
+ * @reg_set : MFI register set
+ */
+static int
+mfi_state_transition_to_ready(struct drsas_instance *instance)
+{
+ int i;
+ uint8_t max_wait;
+ uint32_t fw_ctrl;
+ uint32_t fw_state;
+ uint32_t cur_state;
+
+ fw_state =
+ instance->func_ptr->read_fw_status_reg(instance) & MFI_STATE_MASK;
+ con_log(CL_ANN1, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW state = 0x%x", fw_state));
+
+ while (fw_state != MFI_STATE_READY) {
+ con_log(CL_ANN, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW state%x", fw_state));
+
+ switch (fw_state) {
+ case MFI_STATE_FAULT:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW in FAULT state!!"));
+
+ return (ENODEV);
+ case MFI_STATE_WAIT_HANDSHAKE:
+ /* set the CLR bit in IMR0 */
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW waiting for HANDSHAKE"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG)
+ * to be set
+ */
+ /* WR_IB_MSG_0(MFI_INIT_CLEAR_HANDSHAKE, instance); */
+ WR_IB_DOORBELL(MFI_INIT_CLEAR_HANDSHAKE |
+ MFI_INIT_HOTPLUG, instance);
+
+ max_wait = 2;
+ cur_state = MFI_STATE_WAIT_HANDSHAKE;
+ break;
+ case MFI_STATE_BOOT_MESSAGE_PENDING:
+ /* set the CLR bit in IMR0 */
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW state boot message pending"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG)
+ * to be set
+ */
+ WR_IB_DOORBELL(MFI_INIT_HOTPLUG, instance);
+
+ max_wait = 10;
+ cur_state = MFI_STATE_BOOT_MESSAGE_PENDING;
+ break;
+ case MFI_STATE_OPERATIONAL:
+ /* bring it to READY state; assuming max wait 2 secs */
+ instance->func_ptr->disable_intr(instance);
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: FW in OPERATIONAL state"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_READY | MFI_INIT_MFIMODE | MFI_INIT_ABORT)
+ * to be set
+ */
+ /* WR_IB_DOORBELL(MFI_INIT_READY, instance); */
+ WR_IB_DOORBELL(MFI_RESET_FLAGS, instance);
+
+ max_wait = 10;
+ cur_state = MFI_STATE_OPERATIONAL;
+ break;
+ case MFI_STATE_UNDEFINED:
+ /* this state should not last for more than 2 seconds */
+ con_log(CL_ANN, (CE_NOTE, "FW state undefined"));
+
+ max_wait = 2;
+ cur_state = MFI_STATE_UNDEFINED;
+ break;
+ case MFI_STATE_BB_INIT:
+ max_wait = 2;
+ cur_state = MFI_STATE_BB_INIT;
+ break;
+ case MFI_STATE_FW_INIT:
+ max_wait = 2;
+ cur_state = MFI_STATE_FW_INIT;
+ break;
+ case MFI_STATE_DEVICE_SCAN:
+ max_wait = 10;
+ cur_state = MFI_STATE_DEVICE_SCAN;
+ break;
+ default:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: Unknown state 0x%x", fw_state));
+ return (ENODEV);
+ }
+
+ /* the cur_state should not last for more than max_wait secs */
+ for (i = 0; i < (max_wait * MILLISEC); i++) {
+ /* fw_state = RD_OB_MSG_0(instance) & MFI_STATE_MASK; */
+ fw_state =
+ instance->func_ptr->read_fw_status_reg(instance) &
+ MFI_STATE_MASK;
+
+ if (fw_state == cur_state) {
+ delay(1 * drv_usectohz(MILLISEC));
+ } else {
+ break;
+ }
+ }
+
+ /* return error if fw_state hasn't changed after max_wait */
+ if (fw_state == cur_state) {
+ con_log(CL_ANN, (CE_NOTE,
+ "FW state hasn't changed in %d secs", max_wait));
+ return (ENODEV);
+ }
+ };
+
+ fw_ctrl = RD_IB_DOORBELL(instance);
+
+ con_log(CL_ANN1, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW ctrl = 0x%x", fw_ctrl));
+
+ /*
+ * Write 0xF to the doorbell register to do the following.
+ * - Abort all outstanding commands (bit 0).
+ * - Transition from OPERATIONAL to READY state (bit 1).
+ * - Discard (possible) low MFA posted in 64-bit mode (bit-2).
+ * - Set to release FW to continue running (i.e. BIOS handshake
+ * (bit 3).
+ */
+ WR_IB_DOORBELL(0xF, instance);
+
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ return (ENODEV);
+ }
+ return (DDI_SUCCESS);
+}
+
+/*
+ * get_seq_num
+ */
+static int
+get_seq_num(struct drsas_instance *instance,
+ struct drsas_evt_log_info *eli)
+{
+ int ret = DDI_SUCCESS;
+
+ dma_obj_t dcmd_dma_obj;
+ struct drsas_cmd *cmd;
+ struct drsas_dcmd_frame *dcmd;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ cmn_err(CE_WARN, "dr_sas: failed to get a cmd");
+ return (ENOMEM);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ /* allocate the data transfer buffer */
+ dcmd_dma_obj.size = sizeof (struct drsas_evt_log_info);
+ dcmd_dma_obj.dma_attr = drsas_generic_dma_attr;
+ dcmd_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ dcmd_dma_obj.dma_attr.dma_attr_align = 1;
+
+ if (drsas_alloc_dma_obj(instance, &dcmd_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "get_seq_num: could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ (void) memset(dcmd_dma_obj.buffer, 0,
+ sizeof (struct drsas_evt_log_info));
+
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_evt_log_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_EVENT_GET_INFO);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_evt_log_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ dcmd_dma_obj.dma_cookie[0].dmac_address);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ cmn_err(CE_WARN, "get_seq_num: "
+ "failed to issue DRSAS_DCMD_CTRL_EVENT_GET_INFO");
+ ret = DDI_FAILURE;
+ } else {
+ /* copy the data back into callers buffer */
+ ddi_rep_get8(cmd->frame_dma_obj.acc_handle, (uint8_t *)eli,
+ (uint8_t *)dcmd_dma_obj.buffer,
+ sizeof (struct drsas_evt_log_info), DDI_DEV_AUTOINCR);
+ ret = DDI_SUCCESS;
+ }
+
+ if (drsas_free_dma_obj(instance, dcmd_dma_obj) != DDI_SUCCESS)
+ ret = DDI_FAILURE;
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ ret = DDI_FAILURE;
+ }
+ return (ret);
+}
+
+/*
+ * start_mfi_aen
+ */
+static int
+start_mfi_aen(struct drsas_instance *instance)
+{
+ int ret = 0;
+
+ struct drsas_evt_log_info eli;
+ union drsas_evt_class_locale class_locale;
+
+ /* get the latest sequence number from FW */
+ (void) memset(&eli, 0, sizeof (struct drsas_evt_log_info));
+
+ if (get_seq_num(instance, &eli)) {
+ cmn_err(CE_WARN, "start_mfi_aen: failed to get seq num");
+ return (-1);
+ }
+
+ /* register AEN with FW for latest sequence number plus 1 */
+ class_locale.members.reserved = 0;
+ class_locale.members.locale = DR_EVT_LOCALE_ALL;
+ class_locale.members.class = DR_EVT_CLASS_INFO;
+ ret = register_mfi_aen(instance, eli.newest_seq_num + 1,
+ class_locale.word);
+
+ if (ret) {
+ cmn_err(CE_WARN, "start_mfi_aen: aen registration failed");
+ return (-1);
+ }
+
+ return (ret);
+}
+
+/*
+ * flush_cache
+ */
+static void
+flush_cache(struct drsas_instance *instance)
+{
+ struct drsas_cmd *cmd = NULL;
+ struct drsas_dcmd_frame *dcmd;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ cmd = instance->cmd_list[max_cmd];
+
+ if (cmd == NULL)
+ return;
+
+ dcmd = &cmd->frame->dcmd;
+
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0x0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 0);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_NONE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_CACHE_FLUSH);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.b[0],
+ DR_FLUSH_CTRL_CACHE | DR_FLUSH_DISK_CACHE);
+
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ con_log(CL_ANN1, (CE_WARN,
+ "flush_cache: failed to issue MFI_DCMD_CTRL_CACHE_FLUSH"));
+ }
+ con_log(CL_DLEVEL1, (CE_NOTE, "done"));
+}
+
+/*
+ * service_mfi_aen- Completes an AEN command
+ * @instance: Adapter soft state
+ * @cmd: Command to be completed
+ *
+ */
+static void
+service_mfi_aen(struct drsas_instance *instance, struct drsas_cmd *cmd)
+{
+ uint32_t seq_num;
+ struct drsas_evt_detail *evt_detail =
+ (struct drsas_evt_detail *)instance->mfi_evt_detail_obj.buffer;
+ int rval = 0;
+ int tgt = 0;
+ ddi_acc_handle_t acc_handle;
+
+ acc_handle = cmd->frame_dma_obj.acc_handle;
+
+ cmd->cmd_status = ddi_get8(acc_handle, &cmd->frame->io.cmd_status);
+
+ if (cmd->cmd_status == ENODATA) {
+ cmd->cmd_status = 0;
+ }
+
+ /*
+ * log the MFI AEN event to the sysevent queue so that
+ * application will get noticed
+ */
+ if (ddi_log_sysevent(instance->dip, DDI_VENDOR_LSI, "LSIMEGA", "SAS",
+ NULL, NULL, DDI_NOSLEEP) != DDI_SUCCESS) {
+ int instance_no = ddi_get_instance(instance->dip);
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Failed to log AEN event", instance_no));
+ }
+ /*
+ * Check for any ld devices that has changed state. i.e. online
+ * or offline.
+ */
+ con_log(CL_ANN1, (CE_NOTE,
+ "AEN: code = %x class = %x locale = %x args = %x",
+ ddi_get32(acc_handle, &evt_detail->code),
+ evt_detail->cl.members.class,
+ ddi_get16(acc_handle, &evt_detail->cl.members.locale),
+ ddi_get8(acc_handle, &evt_detail->arg_type)));
+
+ switch (ddi_get32(acc_handle, &evt_detail->code)) {
+ case DR_EVT_CFG_CLEARED: {
+ for (tgt = 0; tgt < MRDRV_MAX_LD; tgt++) {
+ if (instance->dr_ld_list[tgt].dip != NULL) {
+ rval = drsas_service_evt(instance, tgt, 0,
+ DRSAS_EVT_UNCONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas: CFG CLEARED AEN rval = %d "
+ "tgt id = %d", rval, tgt));
+ }
+ }
+ break;
+ }
+
+ case DR_EVT_LD_DELETED: {
+ rval = drsas_service_evt(instance,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id), 0,
+ DRSAS_EVT_UNCONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: LD DELETED AEN rval = %d "
+ "tgt id = %d index = %d", rval,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id),
+ ddi_get8(acc_handle, &evt_detail->args.ld.ld_index)));
+ break;
+ } /* End of DR_EVT_LD_DELETED */
+
+ case DR_EVT_LD_CREATED: {
+ rval = drsas_service_evt(instance,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id), 0,
+ DRSAS_EVT_CONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: LD CREATED AEN rval = %d "
+ "tgt id = %d index = %d", rval,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id),
+ ddi_get8(acc_handle, &evt_detail->args.ld.ld_index)));
+ break;
+ } /* End of DR_EVT_LD_CREATED */
+ } /* End of Main Switch */
+
+ /* get copy of seq_num and class/locale for re-registration */
+ seq_num = ddi_get32(acc_handle, &evt_detail->seq_num);
+ seq_num++;
+ (void) memset(instance->mfi_evt_detail_obj.buffer, 0,
+ sizeof (struct drsas_evt_detail));
+
+ ddi_put8(acc_handle, &cmd->frame->dcmd.cmd_status, 0x0);
+ ddi_put32(acc_handle, &cmd->frame->dcmd.mbox.w[0], seq_num);
+
+ instance->aen_seq_num = seq_num;
+
+ cmd->frame_count = 1;
+
+ /* Issue the aen registration frame */
+ instance->func_ptr->issue_cmd(cmd, instance);
+}
+
+/*
+ * complete_cmd_in_sync_mode - Completes an internal command
+ * @instance: Adapter soft state
+ * @cmd: Command to be completed
+ *
+ * The issue_cmd_in_sync_mode() function waits for a command to complete
+ * after it issues a command. This function wakes up that waiting routine by
+ * calling wake_up() on the wait queue.
+ */
+static void
+complete_cmd_in_sync_mode(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ cmd->cmd_status = ddi_get8(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->io.cmd_status);
+
+ cmd->sync_cmd = DRSAS_FALSE;
+
+ if (cmd->cmd_status == ENODATA) {
+ cmd->cmd_status = 0;
+ }
+
+ cv_broadcast(&instance->int_cmd_cv);
+}
+
+/*
+ * drsas_softintr - The Software ISR
+ * @param arg : HBA soft state
+ *
+ * called from high-level interrupt if hi-level interrupt are not there,
+ * otherwise triggered as a soft interrupt
+ */
+static uint_t
+drsas_softintr(struct drsas_instance *instance)
+{
+ struct scsi_pkt *pkt;
+ struct scsa_cmd *acmd;
+ struct drsas_cmd *cmd;
+ struct mlist_head *pos, *next;
+ mlist_t process_list;
+ struct drsas_header *hdr;
+ struct scsi_arq_status *arqstat;
+
+ con_log(CL_ANN1, (CE_CONT, "drsas_softintr called"));
+
+ ASSERT(instance);
+ mutex_enter(&instance->completed_pool_mtx);
+
+ if (mlist_empty(&instance->completed_pool_list)) {
+ mutex_exit(&instance->completed_pool_mtx);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ instance->softint_running = 1;
+
+ INIT_LIST_HEAD(&process_list);
+ mlist_splice(&instance->completed_pool_list, &process_list);
+ INIT_LIST_HEAD(&instance->completed_pool_list);
+
+ mutex_exit(&instance->completed_pool_mtx);
+
+ /* perform all callbacks first, before releasing the SCBs */
+ mlist_for_each_safe(pos, next, &process_list) {
+ cmd = mlist_entry(pos, struct drsas_cmd, list);
+
+ /* syncronize the Cmd frame for the controller */
+ (void) ddi_dma_sync(cmd->frame_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORCPU);
+
+ if (drsas_check_dma_handle(cmd->frame_dma_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ hdr = &cmd->frame->hdr;
+
+ /* remove the internal command from the process list */
+ mlist_del_init(&cmd->list);
+
+ switch (ddi_get8(cmd->frame_dma_obj.acc_handle, &hdr->cmd)) {
+ case MFI_CMD_OP_PD_SCSI:
+ case MFI_CMD_OP_LD_SCSI:
+ case MFI_CMD_OP_LD_READ:
+ case MFI_CMD_OP_LD_WRITE:
+ /*
+ * MFI_CMD_OP_PD_SCSI and MFI_CMD_OP_LD_SCSI
+ * could have been issued either through an
+ * IO path or an IOCTL path. If it was via IOCTL,
+ * we will send it to internal completion.
+ */
+ if (cmd->sync_cmd == DRSAS_TRUE) {
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ }
+
+ /* regular commands */
+ acmd = cmd->cmd;
+ pkt = CMD2PKT(acmd);
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset,
+ acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORCPU);
+ }
+ }
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_state = STATE_GOT_BUS
+ | STATE_GOT_TARGET | STATE_SENT_CMD
+ | STATE_XFERRED_DATA | STATE_GOT_STATUS;
+
+ con_log(CL_ANN1, (CE_CONT,
+ "CDB[0] = %x completed for %s: size %lx context %x",
+ pkt->pkt_cdbp[0], ((acmd->islogical) ? "LD" : "PD"),
+ acmd->cmd_dmacount, hdr->context));
+
+ if (pkt->pkt_cdbp[0] == SCMD_INQUIRY) {
+ struct scsi_inquiry *inq;
+
+ if (acmd->cmd_dmacount != 0) {
+ bp_mapin(acmd->cmd_buf);
+ inq = (struct scsi_inquiry *)
+ acmd->cmd_buf->b_un.b_addr;
+
+ /* don't expose physical drives to OS */
+ if (acmd->islogical &&
+ (hdr->cmd_status == MFI_STAT_OK)) {
+ display_scsi_inquiry(
+ (caddr_t)inq);
+ } else if ((hdr->cmd_status ==
+ MFI_STAT_OK) && inq->inq_dtype ==
+ DTYPE_DIRECT) {
+
+ display_scsi_inquiry(
+ (caddr_t)inq);
+
+ /* for physical disk */
+ hdr->cmd_status =
+ MFI_STAT_DEVICE_NOT_FOUND;
+ }
+ }
+ }
+
+ switch (hdr->cmd_status) {
+ case MFI_STAT_OK:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+ case MFI_STAT_LD_CC_IN_PROGRESS:
+ case MFI_STAT_LD_RECON_IN_PROGRESS:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+ case MFI_STAT_LD_INIT_IN_PROGRESS:
+ con_log(CL_ANN,
+ (CE_WARN, "Initialization in Progress"));
+ pkt->pkt_reason = CMD_TRAN_ERR;
+
+ break;
+ case MFI_STAT_SCSI_DONE_WITH_ERROR:
+ con_log(CL_ANN1, (CE_CONT, "scsi_done error"));
+
+ pkt->pkt_reason = CMD_CMPLT;
+ ((struct scsi_status *)
+ pkt->pkt_scbp)->sts_chk = 1;
+
+ if (pkt->pkt_cdbp[0] == SCMD_TEST_UNIT_READY) {
+
+ con_log(CL_ANN,
+ (CE_WARN, "TEST_UNIT_READY fail"));
+
+ } else {
+ pkt->pkt_state |= STATE_ARQ_DONE;
+ arqstat = (void *)(pkt->pkt_scbp);
+ arqstat->sts_rqpkt_reason = CMD_CMPLT;
+ arqstat->sts_rqpkt_resid = 0;
+ arqstat->sts_rqpkt_state |=
+ STATE_GOT_BUS | STATE_GOT_TARGET
+ | STATE_SENT_CMD
+ | STATE_XFERRED_DATA;
+ *(uint8_t *)&arqstat->sts_rqpkt_status =
+ STATUS_GOOD;
+ ddi_rep_get8(
+ cmd->frame_dma_obj.acc_handle,
+ (uint8_t *)
+ &(arqstat->sts_sensedata),
+ cmd->sense,
+ acmd->cmd_scblen -
+ offsetof(struct scsi_arq_status,
+ sts_sensedata), DDI_DEV_AUTOINCR);
+ }
+ break;
+ case MFI_STAT_LD_OFFLINE:
+ case MFI_STAT_DEVICE_NOT_FOUND:
+ con_log(CL_ANN1, (CE_CONT,
+ "device not found error"));
+ pkt->pkt_reason = CMD_DEV_GONE;
+ pkt->pkt_statistics = STAT_DISCON;
+ break;
+ case MFI_STAT_LD_LBA_OUT_OF_RANGE:
+ pkt->pkt_state |= STATE_ARQ_DONE;
+ pkt->pkt_reason = CMD_CMPLT;
+ ((struct scsi_status *)
+ pkt->pkt_scbp)->sts_chk = 1;
+
+ arqstat = (void *)(pkt->pkt_scbp);
+ arqstat->sts_rqpkt_reason = CMD_CMPLT;
+ arqstat->sts_rqpkt_resid = 0;
+ arqstat->sts_rqpkt_state |= STATE_GOT_BUS
+ | STATE_GOT_TARGET | STATE_SENT_CMD
+ | STATE_XFERRED_DATA;
+ *(uint8_t *)&arqstat->sts_rqpkt_status =
+ STATUS_GOOD;
+
+ arqstat->sts_sensedata.es_valid = 1;
+ arqstat->sts_sensedata.es_key =
+ KEY_ILLEGAL_REQUEST;
+ arqstat->sts_sensedata.es_class =
+ CLASS_EXTENDED_SENSE;
+
+ /*
+ * LOGICAL BLOCK ADDRESS OUT OF RANGE:
+ * ASC: 0x21h; ASCQ: 0x00h;
+ */
+ arqstat->sts_sensedata.es_add_code = 0x21;
+ arqstat->sts_sensedata.es_qual_code = 0x00;
+
+ break;
+
+ default:
+ con_log(CL_ANN, (CE_CONT, "Unknown status!"));
+ pkt->pkt_reason = CMD_TRAN_ERR;
+
+ break;
+ }
+
+ atomic_add_16(&instance->fw_outstanding, (-1));
+
+ return_mfi_pkt(instance, cmd);
+
+ (void) drsas_common_check(instance, cmd);
+
+ if (acmd->cmd_dmahandle) {
+ if (drsas_check_dma_handle(
+ acmd->cmd_dmahandle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip,
+ DDI_SERVICE_UNAFFECTED);
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ pkt->pkt_statistics = 0;
+ }
+ }
+
+ /* Call the callback routine */
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) &&
+ pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ break;
+ case MFI_CMD_OP_SMP:
+ case MFI_CMD_OP_STP:
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ case MFI_CMD_OP_DCMD:
+ /* see if got an event notification */
+ if (ddi_get32(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->dcmd.opcode) ==
+ DR_DCMD_CTRL_EVENT_WAIT) {
+ if ((instance->aen_cmd == cmd) &&
+ (instance->aen_cmd->abort_aen)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_softintr: "
+ "aborted_aen returned"));
+ } else {
+ atomic_add_16(&instance->fw_outstanding,
+ (-1));
+ service_mfi_aen(instance, cmd);
+ }
+ } else {
+ complete_cmd_in_sync_mode(instance, cmd);
+ }
+
+ break;
+ case MFI_CMD_OP_ABORT:
+ con_log(CL_ANN, (CE_WARN, "MFI_CMD_OP_ABORT complete"));
+ /*
+ * MFI_CMD_OP_ABORT successfully completed
+ * in the synchronous mode
+ */
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ default:
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+
+ if (cmd->pkt != NULL) {
+ pkt = cmd->pkt;
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) &&
+ pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+ }
+ con_log(CL_ANN, (CE_WARN, "Cmd type unknown !"));
+ break;
+ }
+ }
+
+ instance->softint_running = 0;
+
+ return (DDI_INTR_CLAIMED);
+}
+
+/*
+ * drsas_alloc_dma_obj
+ *
+ * Allocate the memory and other resources for an dma object.
+ */
+static int
+drsas_alloc_dma_obj(struct drsas_instance *instance, dma_obj_t *obj,
+ uchar_t endian_flags)
+{
+ int i;
+ size_t alen = 0;
+ uint_t cookie_cnt;
+ struct ddi_device_acc_attr tmp_endian_attr;
+
+ tmp_endian_attr = endian_attr;
+ tmp_endian_attr.devacc_attr_endian_flags = endian_flags;
+
+ i = ddi_dma_alloc_handle(instance->dip, &obj->dma_attr,
+ DDI_DMA_SLEEP, NULL, &obj->dma_handle);
+ if (i != DDI_SUCCESS) {
+
+ switch (i) {
+ case DDI_DMA_BADATTR :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle- Bad attribute"));
+ break;
+ case DDI_DMA_NORESOURCES :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle- No Resources"));
+ break;
+ default :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle: "
+ "unknown status %d", i));
+ break;
+ }
+
+ return (-1);
+ }
+
+ if ((ddi_dma_mem_alloc(obj->dma_handle, obj->size, &tmp_endian_attr,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
+ &obj->buffer, &alen, &obj->acc_handle) != DDI_SUCCESS) ||
+ alen < obj->size) {
+
+ ddi_dma_free_handle(&obj->dma_handle);
+
+ con_log(CL_ANN, (CE_WARN, "Failed : ddi_dma_mem_alloc"));
+
+ return (-1);
+ }
+
+ if (ddi_dma_addr_bind_handle(obj->dma_handle, NULL, obj->buffer,
+ obj->size, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_SLEEP,
+ NULL, &obj->dma_cookie[0], &cookie_cnt) != DDI_SUCCESS) {
+
+ ddi_dma_mem_free(&obj->acc_handle);
+ ddi_dma_free_handle(&obj->dma_handle);
+
+ con_log(CL_ANN, (CE_WARN, "Failed : ddi_dma_addr_bind_handle"));
+
+ return (-1);
+ }
+
+ if (drsas_check_dma_handle(obj->dma_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (-1);
+ }
+
+ if (drsas_check_acc_handle(obj->acc_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (-1);
+ }
+
+ return (cookie_cnt);
+}
+
+/*
+ * drsas_free_dma_obj(struct drsas_instance *, dma_obj_t)
+ *
+ * De-allocate the memory and other resources for an dma object, which must
+ * have been alloated by a previous call to drsas_alloc_dma_obj()
+ */
+static int
+drsas_free_dma_obj(struct drsas_instance *instance, dma_obj_t obj)
+{
+
+ if (drsas_check_dma_handle(obj.dma_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ return (DDI_FAILURE);
+ }
+
+ if (drsas_check_acc_handle(obj.acc_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ return (DDI_FAILURE);
+ }
+
+ (void) ddi_dma_unbind_handle(obj.dma_handle);
+ ddi_dma_mem_free(&obj.acc_handle);
+ ddi_dma_free_handle(&obj.dma_handle);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * drsas_dma_alloc(instance_t *, struct scsi_pkt *, struct buf *,
+ * int, int (*)())
+ *
+ * Allocate dma resources for a new scsi command
+ */
+static int
+drsas_dma_alloc(struct drsas_instance *instance, struct scsi_pkt *pkt,
+ struct buf *bp, int flags, int (*callback)())
+{
+ int dma_flags;
+ int (*cb)(caddr_t);
+ int i;
+
+ ddi_dma_attr_t tmp_dma_attr = drsas_generic_dma_attr;
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ acmd->cmd_buf = bp;
+
+ if (bp->b_flags & B_READ) {
+ acmd->cmd_flags &= ~CFLAG_DMASEND;
+ dma_flags = DDI_DMA_READ;
+ } else {
+ acmd->cmd_flags |= CFLAG_DMASEND;
+ dma_flags = DDI_DMA_WRITE;
+ }
+
+ if (flags & PKT_CONSISTENT) {
+ acmd->cmd_flags |= CFLAG_CONSISTENT;
+ dma_flags |= DDI_DMA_CONSISTENT;
+ }
+
+ if (flags & PKT_DMA_PARTIAL) {
+ dma_flags |= DDI_DMA_PARTIAL;
+ }
+
+ dma_flags |= DDI_DMA_REDZONE;
+
+ cb = (callback == NULL_FUNC) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
+
+ tmp_dma_attr.dma_attr_sgllen = instance->max_num_sge;
+ tmp_dma_attr.dma_attr_addr_hi = 0xffffffffffffffffull;
+
+ if ((i = ddi_dma_alloc_handle(instance->dip, &tmp_dma_attr,
+ cb, 0, &acmd->cmd_dmahandle)) != DDI_SUCCESS) {
+ switch (i) {
+ case DDI_DMA_BADATTR:
+ bioerror(bp, EFAULT);
+ return (DDI_FAILURE);
+
+ case DDI_DMA_NORESOURCES:
+ bioerror(bp, 0);
+ return (DDI_FAILURE);
+
+ default:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_alloc_handle: "
+ "impossible result (0x%x)", i));
+ bioerror(bp, EFAULT);
+ return (DDI_FAILURE);
+ }
+ }
+
+ i = ddi_dma_buf_bind_handle(acmd->cmd_dmahandle, bp, dma_flags,
+ cb, 0, &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies);
+
+ switch (i) {
+ case DDI_DMA_PARTIAL_MAP:
+ if ((dma_flags & DDI_DMA_PARTIAL) == 0) {
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle: "
+ "DDI_DMA_PARTIAL_MAP impossible"));
+ goto no_dma_cookies;
+ }
+
+ if (ddi_dma_numwin(acmd->cmd_dmahandle, &acmd->cmd_nwin) ==
+ DDI_FAILURE) {
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_numwin failed"));
+ goto no_dma_cookies;
+ }
+
+ if (ddi_dma_getwin(acmd->cmd_dmahandle, acmd->cmd_curwin,
+ &acmd->cmd_dma_offset, &acmd->cmd_dma_len,
+ &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies) ==
+ DDI_FAILURE) {
+
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_getwin failed"));
+ goto no_dma_cookies;
+ }
+
+ goto get_dma_cookies;
+ case DDI_DMA_MAPPED:
+ acmd->cmd_nwin = 1;
+ acmd->cmd_dma_len = 0;
+ acmd->cmd_dma_offset = 0;
+
+get_dma_cookies:
+ i = 0;
+ acmd->cmd_dmacount = 0;
+ for (;;) {
+ acmd->cmd_dmacount +=
+ acmd->cmd_dmacookies[i++].dmac_size;
+
+ if (i == instance->max_num_sge ||
+ i == acmd->cmd_ncookies)
+ break;
+
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[i]);
+ }
+
+ acmd->cmd_cookie = i;
+ acmd->cmd_cookiecnt = i;
+
+ acmd->cmd_flags |= CFLAG_DMAVALID;
+
+ if (bp->b_bcount >= acmd->cmd_dmacount) {
+ pkt->pkt_resid = bp->b_bcount - acmd->cmd_dmacount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
+
+ return (DDI_SUCCESS);
+ case DDI_DMA_NORESOURCES:
+ bioerror(bp, 0);
+ break;
+ case DDI_DMA_NOMAPPING:
+ bioerror(bp, EFAULT);
+ break;
+ case DDI_DMA_TOOBIG:
+ bioerror(bp, EINVAL);
+ break;
+ case DDI_DMA_INUSE:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle:"
+ " DDI_DMA_INUSE impossible"));
+ break;
+ default:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle: "
+ "impossible result (0x%x)", i));
+ break;
+ }
+
+no_dma_cookies:
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+ acmd->cmd_dmahandle = NULL;
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+ return (DDI_FAILURE);
+}
+
+/*
+ * drsas_dma_move(struct drsas_instance *, struct scsi_pkt *, struct buf *)
+ *
+ * move dma resources to next dma window
+ *
+ */
+static int
+drsas_dma_move(struct drsas_instance *instance, struct scsi_pkt *pkt,
+ struct buf *bp)
+{
+ int i = 0;
+
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ /*
+ * If there are no more cookies remaining in this window,
+ * must move to the next window first.
+ */
+ if (acmd->cmd_cookie == acmd->cmd_ncookies) {
+ if (acmd->cmd_curwin == acmd->cmd_nwin && acmd->cmd_nwin == 1) {
+ return (DDI_SUCCESS);
+ }
+
+ /* at last window, cannot move */
+ if (++acmd->cmd_curwin >= acmd->cmd_nwin) {
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_dma_getwin(acmd->cmd_dmahandle, acmd->cmd_curwin,
+ &acmd->cmd_dma_offset, &acmd->cmd_dma_len,
+ &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies) ==
+ DDI_FAILURE) {
+ return (DDI_FAILURE);
+ }
+
+ acmd->cmd_cookie = 0;
+ } else {
+ /* still more cookies in this window - get the next one */
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[0]);
+ }
+
+ /* get remaining cookies in this window, up to our maximum */
+ for (;;) {
+ acmd->cmd_dmacount += acmd->cmd_dmacookies[i++].dmac_size;
+ acmd->cmd_cookie++;
+
+ if (i == instance->max_num_sge ||
+ acmd->cmd_cookie == acmd->cmd_ncookies) {
+ break;
+ }
+
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[i]);
+ }
+
+ acmd->cmd_cookiecnt = i;
+
+ if (bp->b_bcount >= acmd->cmd_dmacount) {
+ pkt->pkt_resid = bp->b_bcount - acmd->cmd_dmacount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * build_cmd
+ */
+static struct drsas_cmd *
+build_cmd(struct drsas_instance *instance, struct scsi_address *ap,
+ struct scsi_pkt *pkt, uchar_t *cmd_done)
+{
+ uint16_t flags = 0;
+ uint32_t i;
+ uint32_t context;
+ uint32_t sge_bytes;
+ ddi_acc_handle_t acc_handle;
+ struct drsas_cmd *cmd;
+ struct drsas_sge64 *mfi_sgl;
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+ struct drsas_pthru_frame *pthru;
+ struct drsas_io_frame *ldio;
+
+ /* find out if this is logical or physical drive command. */
+ acmd->islogical = MRDRV_IS_LOGICAL(ap);
+ acmd->device_id = MAP_DEVICE_ID(instance, ap);
+ *cmd_done = 0;
+
+ /* get the command packet */
+ if (!(cmd = get_mfi_pkt(instance))) {
+ return (NULL);
+ }
+
+ acc_handle = cmd->frame_dma_obj.acc_handle;
+
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(acc_handle, &cmd->frame->hdr.context, cmd->index);
+
+ cmd->pkt = pkt;
+ cmd->cmd = acmd;
+
+ /* lets get the command directions */
+ if (acmd->cmd_flags & CFLAG_DMASEND) {
+ flags = MFI_FRAME_DIR_WRITE;
+
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset, acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORDEV);
+ }
+ } else if (acmd->cmd_flags & ~CFLAG_DMASEND) {
+ flags = MFI_FRAME_DIR_READ;
+
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset, acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORCPU);
+ }
+ } else {
+ flags = MFI_FRAME_DIR_NONE;
+ }
+
+ flags |= MFI_FRAME_SGL64;
+
+ switch (pkt->pkt_cdbp[0]) {
+
+ /*
+ * case SCMD_SYNCHRONIZE_CACHE:
+ * flush_cache(instance);
+ * return_mfi_pkt(instance, cmd);
+ * *cmd_done = 1;
+ *
+ * return (NULL);
+ */
+
+ case SCMD_READ:
+ case SCMD_WRITE:
+ case SCMD_READ_G1:
+ case SCMD_WRITE_G1:
+ if (acmd->islogical) {
+ ldio = (struct drsas_io_frame *)cmd->frame;
+
+ /*
+ * preare the Logical IO frame:
+ * 2nd bit is zero for all read cmds
+ */
+ ddi_put8(acc_handle, &ldio->cmd,
+ (pkt->pkt_cdbp[0] & 0x02) ? MFI_CMD_OP_LD_WRITE
+ : MFI_CMD_OP_LD_READ);
+ ddi_put8(acc_handle, &ldio->cmd_status, 0x0);
+ ddi_put8(acc_handle, &ldio->scsi_status, 0x0);
+ ddi_put8(acc_handle, &ldio->target_id, acmd->device_id);
+ ddi_put16(acc_handle, &ldio->timeout, 0);
+ ddi_put8(acc_handle, &ldio->reserved_0, 0);
+ ddi_put16(acc_handle, &ldio->pad_0, 0);
+ ddi_put16(acc_handle, &ldio->flags, flags);
+
+ /* Initialize sense Information */
+ bzero(cmd->sense, SENSE_LENGTH);
+ ddi_put8(acc_handle, &ldio->sense_len, SENSE_LENGTH);
+ ddi_put32(acc_handle, &ldio->sense_buf_phys_addr_hi, 0);
+ ddi_put32(acc_handle, &ldio->sense_buf_phys_addr_lo,
+ cmd->sense_phys_addr);
+ ddi_put32(acc_handle, &ldio->start_lba_hi, 0);
+ ddi_put8(acc_handle, &ldio->access_byte,
+ (acmd->cmd_cdblen != 6) ? pkt->pkt_cdbp[1] : 0);
+ ddi_put8(acc_handle, &ldio->sge_count,
+ acmd->cmd_cookiecnt);
+ mfi_sgl = (struct drsas_sge64 *)&ldio->sgl;
+
+ context = ddi_get32(acc_handle, &ldio->context);
+
+ if (acmd->cmd_cdblen == CDB_GROUP0) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ (uint16_t)(pkt->pkt_cdbp[4])));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[3])) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 8) |
+ ((uint32_t)((pkt->pkt_cdbp[1]) & 0x1F)
+ << 16)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP1) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[8])) |
+ ((uint16_t)(pkt->pkt_cdbp[7]) << 8)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP2) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[9])) |
+ ((uint16_t)(pkt->pkt_cdbp[8]) << 8) |
+ ((uint16_t)(pkt->pkt_cdbp[7]) << 16) |
+ ((uint16_t)(pkt->pkt_cdbp[6]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP3) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[13])) |
+ ((uint16_t)(pkt->pkt_cdbp[12]) << 8) |
+ ((uint16_t)(pkt->pkt_cdbp[11]) << 16) |
+ ((uint16_t)(pkt->pkt_cdbp[10]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[9])) |
+ ((uint32_t)(pkt->pkt_cdbp[8]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[7]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[6]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ }
+
+ break;
+ }
+ /* fall through For all non-rd/wr cmds */
+ default:
+
+ switch (pkt->pkt_cdbp[0]) {
+ case SCMD_MODE_SENSE:
+ case SCMD_MODE_SENSE_G1: {
+ union scsi_cdb *cdbp;
+ uint16_t page_code;
+
+ cdbp = (void *)pkt->pkt_cdbp;
+ page_code = (uint16_t)cdbp->cdb_un.sg.scsi[0];
+ switch (page_code) {
+ case 0x3:
+ case 0x4:
+ (void) drsas_mode_sense_build(pkt);
+ return_mfi_pkt(instance, cmd);
+ *cmd_done = 1;
+ return (NULL);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ pthru = (struct drsas_pthru_frame *)cmd->frame;
+
+ /* prepare the DCDB frame */
+ ddi_put8(acc_handle, &pthru->cmd, (acmd->islogical) ?
+ MFI_CMD_OP_LD_SCSI : MFI_CMD_OP_PD_SCSI);
+ ddi_put8(acc_handle, &pthru->cmd_status, 0x0);
+ ddi_put8(acc_handle, &pthru->scsi_status, 0x0);
+ ddi_put8(acc_handle, &pthru->target_id, acmd->device_id);
+ ddi_put8(acc_handle, &pthru->lun, 0);
+ ddi_put8(acc_handle, &pthru->cdb_len, acmd->cmd_cdblen);
+ ddi_put16(acc_handle, &pthru->timeout, 0);
+ ddi_put16(acc_handle, &pthru->flags, flags);
+ ddi_put32(acc_handle, &pthru->data_xfer_len,
+ acmd->cmd_dmacount);
+ ddi_put8(acc_handle, &pthru->sge_count, acmd->cmd_cookiecnt);
+ mfi_sgl = (struct drsas_sge64 *)&pthru->sgl;
+
+ bzero(cmd->sense, SENSE_LENGTH);
+ ddi_put8(acc_handle, &pthru->sense_len, SENSE_LENGTH);
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_hi, 0);
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_lo,
+ cmd->sense_phys_addr);
+
+ context = ddi_get32(acc_handle, &pthru->context);
+ ddi_rep_put8(acc_handle, (uint8_t *)pkt->pkt_cdbp,
+ (uint8_t *)pthru->cdb, acmd->cmd_cdblen, DDI_DEV_AUTOINCR);
+
+ break;
+ }
+#ifdef lint
+ context = context;
+#endif
+ /* prepare the scatter-gather list for the firmware */
+ for (i = 0; i < acmd->cmd_cookiecnt; i++, mfi_sgl++) {
+ ddi_put64(acc_handle, &mfi_sgl->phys_addr,
+ acmd->cmd_dmacookies[i].dmac_laddress);
+ ddi_put32(acc_handle, &mfi_sgl->length,
+ acmd->cmd_dmacookies[i].dmac_size);
+ }
+
+ sge_bytes = sizeof (struct drsas_sge64)*acmd->cmd_cookiecnt;
+
+ cmd->frame_count = (sge_bytes / MRMFI_FRAME_SIZE) +
+ ((sge_bytes % MRMFI_FRAME_SIZE) ? 1 : 0) + 1;
+
+ if (cmd->frame_count >= 8) {
+ cmd->frame_count = 8;
+ }
+
+ return (cmd);
+}
+
+/*
+ * issue_mfi_pthru
+ */
+static int
+issue_mfi_pthru(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *ubuf;
+ uint32_t kphys_addr = 0;
+ uint32_t xferlen = 0;
+ uint_t model;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ dma_obj_t pthru_dma_obj;
+ struct drsas_pthru_frame *kpthru;
+ struct drsas_pthru_frame *pthru;
+ int i;
+ pthru = &cmd->frame->pthru;
+ kpthru = (struct drsas_pthru_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP32"));
+
+ xferlen = kpthru->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP32"));
+ xferlen = kpthru->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP64"));
+ xferlen = kpthru->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge64[0].phys_addr;
+#endif
+ }
+
+ if (xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ pthru_dma_obj.size = xferlen;
+ pthru_dma_obj.dma_attr = drsas_generic_dma_attr;
+ pthru_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ pthru_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ pthru_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ pthru_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &pthru_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_pthru: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ if (kpthru->flags & MFI_FRAME_DIR_WRITE) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyin((uint8_t *)ubuf+i,
+ (uint8_t *)pthru_dma_obj.buffer+i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru : "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kphys_addr = pthru_dma_obj.dma_cookie[0].dmac_address;
+ }
+
+ ddi_put8(acc_handle, &pthru->cmd, kpthru->cmd);
+ ddi_put8(acc_handle, &pthru->sense_len, kpthru->sense_len);
+ ddi_put8(acc_handle, &pthru->cmd_status, 0);
+ ddi_put8(acc_handle, &pthru->scsi_status, 0);
+ ddi_put8(acc_handle, &pthru->target_id, kpthru->target_id);
+ ddi_put8(acc_handle, &pthru->lun, kpthru->lun);
+ ddi_put8(acc_handle, &pthru->cdb_len, kpthru->cdb_len);
+ ddi_put8(acc_handle, &pthru->sge_count, kpthru->sge_count);
+ ddi_put16(acc_handle, &pthru->timeout, kpthru->timeout);
+ ddi_put32(acc_handle, &pthru->data_xfer_len, kpthru->data_xfer_len);
+
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_hi, 0);
+ /* pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr; */
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_lo, 0);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kpthru->cdb, (uint8_t *)pthru->cdb,
+ pthru->cdb_len, DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &pthru->flags, kpthru->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &pthru->sgl.sge32[0].length, xferlen);
+ ddi_put32(acc_handle, &pthru->sgl.sge32[0].phys_addr, kphys_addr);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru: fw_ioctl failed"));
+ } else {
+ if (xferlen && kpthru->flags & MFI_FRAME_DIR_READ) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)pthru_dma_obj.buffer+i,
+ (uint8_t *)ubuf+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru : "
+ "copy to user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ kpthru->cmd_status = ddi_get8(acc_handle, &pthru->cmd_status);
+ kpthru->scsi_status = ddi_get8(acc_handle, &pthru->scsi_status);
+
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_pthru: cmd_status %x, "
+ "scsi_status %x", kpthru->cmd_status, kpthru->scsi_status));
+
+ if (xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, pthru_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_dcmd
+ */
+static int
+issue_mfi_dcmd(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *ubuf;
+ uint32_t kphys_addr = 0;
+ uint32_t xferlen = 0;
+ uint32_t model;
+ dma_obj_t dcmd_dma_obj;
+ struct drsas_dcmd_frame *kdcmd;
+ struct drsas_dcmd_frame *dcmd;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ int i;
+ dcmd = &cmd->frame->dcmd;
+ kdcmd = (struct drsas_dcmd_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_ILP32"));
+
+ xferlen = kdcmd->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_ILP32"));
+ xferlen = kdcmd->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_LP64"));
+ xferlen = kdcmd->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge64[0].phys_addr;
+#endif
+ }
+ if (xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ dcmd_dma_obj.size = xferlen;
+ dcmd_dma_obj.dma_attr = drsas_generic_dma_attr;
+ dcmd_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ dcmd_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &dcmd_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_dcmd: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ if (kdcmd->flags & MFI_FRAME_DIR_WRITE) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyin((uint8_t *)ubuf + i,
+ (uint8_t *)dcmd_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_dcmd : "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kphys_addr = dcmd_dma_obj.dma_cookie[0].dmac_address;
+ }
+
+ ddi_put8(acc_handle, &dcmd->cmd, kdcmd->cmd);
+ ddi_put8(acc_handle, &dcmd->cmd_status, 0);
+ ddi_put8(acc_handle, &dcmd->sge_count, kdcmd->sge_count);
+ ddi_put16(acc_handle, &dcmd->timeout, kdcmd->timeout);
+ ddi_put32(acc_handle, &dcmd->data_xfer_len, kdcmd->data_xfer_len);
+ ddi_put32(acc_handle, &dcmd->opcode, kdcmd->opcode);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kdcmd->mbox.b,
+ (uint8_t *)dcmd->mbox.b, DCMD_MBOX_SZ, DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &dcmd->flags, kdcmd->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &dcmd->sgl.sge32[0].length, xferlen);
+ ddi_put32(acc_handle, &dcmd->sgl.sge32[0].phys_addr, kphys_addr);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_dcmd: fw_ioctl failed"));
+ } else {
+ if (xferlen && (kdcmd->flags & MFI_FRAME_DIR_READ)) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)dcmd_dma_obj.buffer + i,
+ (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_dcmd : "
+ "copy to user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ kdcmd->cmd_status = ddi_get8(acc_handle, &dcmd->cmd_status);
+
+ if (xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, dcmd_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_smp
+ */
+static int
+issue_mfi_smp(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *request_ubuf;
+ void *response_ubuf;
+ uint32_t request_xferlen = 0;
+ uint32_t response_xferlen = 0;
+ uint_t model;
+ dma_obj_t request_dma_obj;
+ dma_obj_t response_dma_obj;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ struct drsas_smp_frame *ksmp;
+ struct drsas_smp_frame *smp;
+ struct drsas_sge32 *sge32;
+#ifndef _ILP32
+ struct drsas_sge64 *sge64;
+#endif
+ int i;
+ uint64_t tmp_sas_addr;
+
+ smp = &cmd->frame->smp;
+ ksmp = (struct drsas_smp_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_ILP32"));
+
+ sge32 = &ksmp->sgl[0].sge32[0];
+ response_xferlen = sge32[0].length;
+ request_xferlen = sge32[1].length;
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_smp: "
+ "response_xferlen = %x, request_xferlen = %x",
+ response_xferlen, request_xferlen));
+
+ response_ubuf = (void *)(ulong_t)sge32[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge32[1].phys_addr;
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: "
+ "response_ubuf = %p, request_ubuf = %p",
+ response_ubuf, request_ubuf));
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_ILP32"));
+
+ sge32 = &ksmp->sgl[0].sge32[0];
+ response_xferlen = sge32[0].length;
+ request_xferlen = sge32[1].length;
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_smp: "
+ "response_xferlen = %x, request_xferlen = %x",
+ response_xferlen, request_xferlen));
+
+ response_ubuf = (void *)(ulong_t)sge32[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge32[1].phys_addr;
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: "
+ "response_ubuf = %p, request_ubuf = %p",
+ response_ubuf, request_ubuf));
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_LP64"));
+
+ sge64 = &ksmp->sgl[0].sge64[0];
+ response_xferlen = sge64[0].length;
+ request_xferlen = sge64[1].length;
+
+ response_ubuf = (void *)(ulong_t)sge64[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge64[1].phys_addr;
+#endif
+ }
+ if (request_xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ request_dma_obj.size = request_xferlen;
+ request_dma_obj.dma_attr = drsas_generic_dma_attr;
+ request_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ request_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ request_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ request_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &request_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < request_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)request_ubuf + i,
+ (uint8_t *)request_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (response_xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ response_dma_obj.size = response_xferlen;
+ response_dma_obj.dma_attr = drsas_generic_dma_attr;
+ response_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ response_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ response_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ response_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &response_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < response_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)response_ubuf + i,
+ (uint8_t *)response_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ ddi_put8(acc_handle, &smp->cmd, ksmp->cmd);
+ ddi_put8(acc_handle, &smp->cmd_status, 0);
+ ddi_put8(acc_handle, &smp->connection_status, 0);
+ ddi_put8(acc_handle, &smp->sge_count, ksmp->sge_count);
+ /* smp->context = ksmp->context; */
+ ddi_put16(acc_handle, &smp->timeout, ksmp->timeout);
+ ddi_put32(acc_handle, &smp->data_xfer_len, ksmp->data_xfer_len);
+
+ bcopy((void *)&ksmp->sas_addr, (void *)&tmp_sas_addr,
+ sizeof (uint64_t));
+ ddi_put64(acc_handle, &smp->sas_addr, tmp_sas_addr);
+
+ ddi_put16(acc_handle, &smp->flags, ksmp->flags & ~MFI_FRAME_SGL64);
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+
+ sge32 = &smp->sgl[0].sge32[0];
+ ddi_put32(acc_handle, &sge32[0].length, response_xferlen);
+ ddi_put32(acc_handle, &sge32[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge32[1].length, request_xferlen);
+ ddi_put32(acc_handle, &sge32[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+ sge32 = &smp->sgl[0].sge32[0];
+ ddi_put32(acc_handle, &sge32[0].length, response_xferlen);
+ ddi_put32(acc_handle, &sge32[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge32[1].length, request_xferlen);
+ ddi_put32(acc_handle, &sge32[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+#else
+ con_log(CL_ANN1, (CE_NOTE,
+ "issue_mfi_smp: DDI_MODEL_LP64"));
+ sge64 = &smp->sgl[0].sge64[0];
+ ddi_put32(acc_handle, &sge64[0].length, response_xferlen);
+ ddi_put64(acc_handle, &sge64[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge64[1].length, request_xferlen);
+ ddi_put64(acc_handle, &sge64[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+#endif
+ }
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp : "
+ "smp->response_xferlen = %d, smp->request_xferlen = %d "
+ "smp->data_xfer_len = %d", ddi_get32(acc_handle, &sge32[0].length),
+ ddi_get32(acc_handle, &sge32[1].length),
+ ddi_get32(acc_handle, &smp->data_xfer_len)));
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp: fw_ioctl failed"));
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "issue_mfi_smp: copy to user space"));
+
+ if (request_xferlen) {
+ for (i = 0; i < request_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)request_dma_obj.buffer +
+ i, (uint8_t *)request_ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp : copy to user space"
+ " failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (response_xferlen) {
+ for (i = 0; i < response_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)response_dma_obj.buffer
+ + i, (uint8_t *)response_ubuf
+ + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp : copy to "
+ "user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ ksmp->cmd_status = ddi_get8(acc_handle, &smp->cmd_status);
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: smp->cmd_status = %d",
+ ddi_get8(acc_handle, &smp->cmd_status)));
+
+
+ if (request_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, request_dma_obj) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ if (response_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, response_dma_obj) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_stp
+ */
+static int
+issue_mfi_stp(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *fis_ubuf;
+ void *data_ubuf;
+ uint32_t fis_xferlen = 0;
+ uint32_t data_xferlen = 0;
+ uint_t model;
+ dma_obj_t fis_dma_obj;
+ dma_obj_t data_dma_obj;
+ struct drsas_stp_frame *kstp;
+ struct drsas_stp_frame *stp;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ int i;
+
+ stp = &cmd->frame->stp;
+ kstp = (struct drsas_stp_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_ILP32"));
+
+ fis_xferlen = kstp->sgl.sge32[0].length;
+ data_xferlen = kstp->sgl.sge32[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge32[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge32[1].phys_addr;
+ }
+ else
+ {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_ILP32"));
+
+ fis_xferlen = kstp->sgl.sge32[0].length;
+ data_xferlen = kstp->sgl.sge32[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge32[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge32[1].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_LP64"));
+
+ fis_xferlen = kstp->sgl.sge64[0].length;
+ data_xferlen = kstp->sgl.sge64[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge64[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge64[1].phys_addr;
+#endif
+ }
+
+
+ if (fis_xferlen) {
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_stp: "
+ "fis_ubuf = %p fis_xferlen = %x", fis_ubuf, fis_xferlen));
+
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ fis_dma_obj.size = fis_xferlen;
+ fis_dma_obj.dma_attr = drsas_generic_dma_attr;
+ fis_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ fis_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ fis_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ fis_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &fis_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp : "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < fis_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)fis_ubuf + i,
+ (uint8_t *)fis_dma_obj.buffer + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (data_xferlen) {
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_stp: data_ubuf = %p "
+ "data_xferlen = %x", data_ubuf, data_xferlen));
+
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ data_dma_obj.size = data_xferlen;
+ data_dma_obj.dma_attr = drsas_generic_dma_attr;
+ data_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ data_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ data_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ data_dma_obj.dma_attr.dma_attr_align = 1;
+
+/* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &data_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < data_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)data_ubuf + i,
+ (uint8_t *)data_dma_obj.buffer + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ ddi_put8(acc_handle, &stp->cmd, kstp->cmd);
+ ddi_put8(acc_handle, &stp->cmd_status, 0);
+ ddi_put8(acc_handle, &stp->connection_status, 0);
+ ddi_put8(acc_handle, &stp->target_id, kstp->target_id);
+ ddi_put8(acc_handle, &stp->sge_count, kstp->sge_count);
+
+ ddi_put16(acc_handle, &stp->timeout, kstp->timeout);
+ ddi_put32(acc_handle, &stp->data_xfer_len, kstp->data_xfer_len);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kstp->fis, (uint8_t *)stp->fis, 10,
+ DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &stp->flags, kstp->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &stp->stp_flags, kstp->stp_flags);
+ ddi_put32(acc_handle, &stp->sgl.sge32[0].length, fis_xferlen);
+ ddi_put32(acc_handle, &stp->sgl.sge32[0].phys_addr,
+ fis_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &stp->sgl.sge32[1].length, data_xferlen);
+ ddi_put32(acc_handle, &stp->sgl.sge32[1].phys_addr,
+ data_dma_obj.dma_cookie[0].dmac_address);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: fw_ioctl failed"));
+ } else {
+
+ if (fis_xferlen) {
+ for (i = 0; i < fis_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)fis_dma_obj.buffer + i,
+ (uint8_t *)fis_ubuf + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_stp : copy to "
+ "user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+ if (data_xferlen) {
+ for (i = 0; i < data_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)data_dma_obj.buffer + i,
+ (uint8_t *)data_ubuf + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_stp : copy to"
+ " user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kstp->cmd_status = ddi_get8(acc_handle, &stp->cmd_status);
+
+ if (fis_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, fis_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ if (data_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, data_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * fill_up_drv_ver
+ */
+static void
+fill_up_drv_ver(struct drsas_drv_ver *dv)
+{
+ (void) memset(dv, 0, sizeof (struct drsas_drv_ver));
+
+ (void) memcpy(dv->signature, "$LSI LOGIC$", strlen("$LSI LOGIC$"));
+ (void) memcpy(dv->os_name, "Solaris", strlen("Solaris"));
+ (void) memcpy(dv->drv_name, "dr_sas", strlen("dr_sas"));
+ (void) memcpy(dv->drv_ver, DRSAS_VERSION, strlen(DRSAS_VERSION));
+ (void) memcpy(dv->drv_rel_date, DRSAS_RELDATE,
+ strlen(DRSAS_RELDATE));
+}
+
+/*
+ * handle_drv_ioctl
+ */
+static int
+handle_drv_ioctl(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ int mode)
+{
+ int i;
+ int rval = DDI_SUCCESS;
+ int *props = NULL;
+ void *ubuf;
+
+ uint8_t *pci_conf_buf;
+ uint32_t xferlen;
+ uint32_t num_props;
+ uint_t model;
+ struct drsas_dcmd_frame *kdcmd;
+ struct drsas_drv_ver dv;
+ struct drsas_pci_information pi;
+
+ kdcmd = (struct drsas_dcmd_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+
+ xferlen = kdcmd->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+ xferlen = kdcmd->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_LP64"));
+ xferlen = kdcmd->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge64[0].phys_addr;
+#endif
+ }
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "dataBuf=%p size=%d bytes", ubuf, xferlen));
+
+ switch (kdcmd->opcode) {
+ case DRSAS_DRIVER_IOCTL_DRIVER_VERSION:
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_DRIVER_VERSION"));
+
+ fill_up_drv_ver(&dv);
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout((uint8_t *)&dv + i, (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_DRIVER_VERSION"
+ " : copy to user space failed"));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+ }
+ if (i == xferlen)
+ kdcmd->cmd_status = 0;
+ break;
+ case DRSAS_DRIVER_IOCTL_PCI_INFORMATION:
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMAITON"));
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, instance->dip,
+ 0, "reg", &props, &num_props)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMATION : "
+ "ddi_prop_look_int_array failed"));
+ rval = DDI_FAILURE;
+ } else {
+
+ pi.busNumber = (props[0] >> 16) & 0xFF;
+ pi.deviceNumber = (props[0] >> 11) & 0x1f;
+ pi.functionNumber = (props[0] >> 8) & 0x7;
+ ddi_prop_free((void *)props);
+ }
+
+ pci_conf_buf = (uint8_t *)&pi.pciHeaderInfo;
+
+ for (i = 0; i < (sizeof (struct drsas_pci_information) -
+ offsetof(struct drsas_pci_information, pciHeaderInfo));
+ i++) {
+ pci_conf_buf[i] =
+ pci_config_get8(instance->pci_handle, i);
+ }
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout((uint8_t *)&pi + i, (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMATION"
+ " : copy to user space failed"));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+ }
+
+ if (i == xferlen)
+ kdcmd->cmd_status = 0;
+
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "invalid driver specific IOCTL opcode = 0x%x",
+ kdcmd->opcode));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+
+ return (rval);
+}
+
+/*
+ * handle_mfi_ioctl
+ */
+static int
+handle_mfi_ioctl(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ int mode)
+{
+ int rval = DDI_SUCCESS;
+
+ struct drsas_header *hdr;
+ struct drsas_cmd *cmd;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: "
+ "failed to get a cmd packet"));
+ return (DDI_FAILURE);
+ }
+
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ hdr = (struct drsas_header *)&ioctl->frame[0];
+
+ switch (hdr->cmd) {
+ case MFI_CMD_OP_DCMD:
+ rval = issue_mfi_dcmd(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_SMP:
+ rval = issue_mfi_smp(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_STP:
+ rval = issue_mfi_stp(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_LD_SCSI:
+ case MFI_CMD_OP_PD_SCSI:
+ rval = issue_mfi_pthru(instance, ioctl, cmd, mode);
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN, "handle_mfi_ioctl: "
+ "invalid mfi ioctl hdr->cmd = %d", hdr->cmd));
+ rval = DDI_FAILURE;
+ break;
+ }
+
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS)
+ rval = DDI_FAILURE;
+ return (rval);
+}
+
+/*
+ * AEN
+ */
+static int
+handle_mfi_aen(struct drsas_instance *instance, struct drsas_aen *aen)
+{
+ int rval = 0;
+
+ rval = register_mfi_aen(instance, instance->aen_seq_num,
+ aen->class_locale_word);
+
+ aen->cmd_status = (uint8_t)rval;
+
+ return (rval);
+}
+
+static int
+register_mfi_aen(struct drsas_instance *instance, uint32_t seq_num,
+ uint32_t class_locale_word)
+{
+ int ret_val;
+
+ struct drsas_cmd *cmd, *aen_cmd;
+ struct drsas_dcmd_frame *dcmd;
+ union drsas_evt_class_locale curr_aen;
+ union drsas_evt_class_locale prev_aen;
+
+ /*
+ * If there an AEN pending already (aen_cmd), check if the
+ * class_locale of that pending AEN is inclusive of the new
+ * AEN request we currently have. If it is, then we don't have
+ * to do anything. In other words, whichever events the current
+ * AEN request is subscribing to, have already been subscribed
+ * to.
+ *
+ * If the old_cmd is _not_ inclusive, then we have to abort
+ * that command, form a class_locale that is superset of both
+ * old and current and re-issue to the FW
+ */
+
+ curr_aen.word = class_locale_word;
+ aen_cmd = instance->aen_cmd;
+ if (aen_cmd) {
+ prev_aen.word = ddi_get32(aen_cmd->frame_dma_obj.acc_handle,
+ &aen_cmd->frame->dcmd.mbox.w[1]);
+
+ /*
+ * A class whose enum value is smaller is inclusive of all
+ * higher values. If a PROGRESS (= -1) was previously
+ * registered, then a new registration requests for higher
+ * classes need not be sent to FW. They are automatically
+ * included.
+ *
+ * Locale numbers don't have such hierarchy. They are bitmap
+ * values
+ */
+ if ((prev_aen.members.class <= curr_aen.members.class) &&
+ !((prev_aen.members.locale & curr_aen.members.locale) ^
+ curr_aen.members.locale)) {
+ /*
+ * Previously issued event registration includes
+ * current request. Nothing to do.
+ */
+
+ return (0);
+ } else {
+ curr_aen.members.locale |= prev_aen.members.locale;
+
+ if (prev_aen.members.class < curr_aen.members.class)
+ curr_aen.members.class = prev_aen.members.class;
+
+ ret_val = abort_aen_cmd(instance, aen_cmd);
+
+ if (ret_val) {
+ con_log(CL_ANN, (CE_WARN, "register_mfi_aen: "
+ "failed to abort prevous AEN command"));
+
+ return (ret_val);
+ }
+ }
+ } else {
+ curr_aen.word = class_locale_word;
+ }
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd)
+ return (ENOMEM);
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ /* for(i = 0; i < DCMD_MBOX_SZ; i++) dcmd->mbox.b[i] = 0; */
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ (void) memset(instance->mfi_evt_detail_obj.buffer, 0,
+ sizeof (struct drsas_evt_detail));
+
+ /* Prepare DCMD for aen registration */
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0x0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_evt_detail));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_EVENT_WAIT);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.w[0], seq_num);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.w[1],
+ curr_aen.word);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ instance->mfi_evt_detail_obj.dma_cookie[0].dmac_address);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_evt_detail));
+
+ instance->aen_seq_num = seq_num;
+
+
+ /*
+ * Store reference to the cmd used to register for AEN. When an
+ * application wants us to register for AEN, we have to abort this
+ * cmd and re-register with a new EVENT LOCALE supplied by that app
+ */
+ instance->aen_cmd = cmd;
+
+ cmd->frame_count = 1;
+
+ /* Issue the aen registration frame */
+ /* atomic_add_16 (&instance->fw_outstanding, 1); */
+ instance->func_ptr->issue_cmd(cmd, instance);
+
+ return (0);
+}
+
+static void
+display_scsi_inquiry(caddr_t scsi_inq)
+{
+#define MAX_SCSI_DEVICE_CODE 14
+ int i;
+ char inquiry_buf[256] = {0};
+ int len;
+ const char *const scsi_device_types[] = {
+ "Direct-Access ",
+ "Sequential-Access",
+ "Printer ",
+ "Processor ",
+ "WORM ",
+ "CD-ROM ",
+ "Scanner ",
+ "Optical Device ",
+ "Medium Changer ",
+ "Communications ",
+ "Unknown ",
+ "Unknown ",
+ "Unknown ",
+ "Enclosure ",
+ };
+
+ len = 0;
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Vendor: ");
+ for (i = 8; i < 16; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Model: ");
+
+ for (i = 16; i < 32; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Rev: ");
+
+ for (i = 32; i < 36; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, "\n");
+
+
+ i = scsi_inq[0] & 0x1f;
+
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Type: %s ",
+ i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
+ "Unknown ");
+
+
+ len += snprintf(inquiry_buf + len, 265 - len,
+ " ANSI SCSI revision: %02x", scsi_inq[2] & 0x07);
+
+ if ((scsi_inq[2] & 0x07) == 1 && (scsi_inq[3] & 0x0f) == 1) {
+ len += snprintf(inquiry_buf + len, 265 - len, " CCS\n");
+ } else {
+ len += snprintf(inquiry_buf + len, 265 - len, "\n");
+ }
+
+ con_log(CL_ANN1, (CE_CONT, inquiry_buf));
+}
+
+static int
+read_fw_status_reg_ppc(struct drsas_instance *instance)
+{
+ return ((int)RD_OB_SCRATCH_PAD_0(instance));
+}
+
+static void
+issue_cmd_ppc(struct drsas_cmd *cmd, struct drsas_instance *instance)
+{
+ atomic_add_16(&instance->fw_outstanding, 1);
+
+ /* Issue the command to the FW */
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+}
+
+/*
+ * issue_cmd_in_sync_mode
+ */
+static int
+issue_cmd_in_sync_mode_ppc(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int i;
+ uint32_t msecs = MFI_POLL_TIMEOUT_SECS * (10 * MILLISEC);
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_sync_mode_ppc: called"));
+
+ cmd->cmd_status = ENODATA;
+
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+
+ mutex_enter(&instance->int_cmd_mtx);
+
+ for (i = 0; i < msecs && (cmd->cmd_status == ENODATA); i++) {
+ cv_wait(&instance->int_cmd_cv, &instance->int_cmd_mtx);
+ }
+
+ mutex_exit(&instance->int_cmd_mtx);
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_sync_mode_ppc: done"));
+
+ if (i < (msecs -1)) {
+ return (DDI_SUCCESS);
+ } else {
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * issue_cmd_in_poll_mode
+ */
+static int
+issue_cmd_in_poll_mode_ppc(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int i;
+ uint16_t flags;
+ uint32_t msecs = MFI_POLL_TIMEOUT_SECS * MILLISEC;
+ struct drsas_header *frame_hdr;
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_poll_mode_ppc: called"));
+
+ frame_hdr = (struct drsas_header *)cmd->frame;
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ flags = ddi_get16(cmd->frame_dma_obj.acc_handle, &frame_hdr->flags);
+ flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &frame_hdr->flags, flags);
+
+ /* issue the frame using inbound queue port */
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+
+ /* wait for cmd_status to change from 0xFF */
+ for (i = 0; i < msecs && (
+ ddi_get8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status)
+ == MFI_CMD_STATUS_POLL_MODE); i++) {
+ drv_usecwait(MILLISEC); /* wait for 1000 usecs */
+ }
+
+ if (ddi_get8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status)
+ == MFI_CMD_STATUS_POLL_MODE) {
+ con_log(CL_ANN, (CE_NOTE, "issue_cmd_in_poll_mode: "
+ "cmd polling timed out"));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+enable_intr_ppc(struct drsas_instance *instance)
+{
+ uint32_t mask;
+
+ con_log(CL_ANN1, (CE_NOTE, "enable_intr_ppc: called"));
+
+ /* WR_OB_DOORBELL_CLEAR(0xFFFFFFFF, instance); */
+ WR_OB_DOORBELL_CLEAR(OB_DOORBELL_CLEAR_MASK, instance);
+
+ /* WR_OB_INTR_MASK(~0x80000000, instance); */
+ WR_OB_INTR_MASK(~(MFI_REPLY_2108_MESSAGE_INTR_MASK), instance);
+
+ /* dummy read to force PCI flush */
+ mask = RD_OB_INTR_MASK(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "enable_intr_ppc: "
+ "outbound_intr_mask = 0x%x", mask));
+}
+
+static void
+disable_intr_ppc(struct drsas_instance *instance)
+{
+ uint32_t mask;
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: called"));
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: before : "
+ "outbound_intr_mask = 0x%x", RD_OB_INTR_MASK(instance)));
+
+ /* WR_OB_INTR_MASK(0xFFFFFFFF, instance); */
+ WR_OB_INTR_MASK(OB_INTR_MASK, instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: after : "
+ "outbound_intr_mask = 0x%x", RD_OB_INTR_MASK(instance)));
+
+ /* dummy read to force PCI flush */
+ mask = RD_OB_INTR_MASK(instance);
+#ifdef lint
+ mask = mask;
+#endif
+}
+
+static int
+intr_ack_ppc(struct drsas_instance *instance)
+{
+ uint32_t status;
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: called"));
+
+ /* check if it is our interrupt */
+ status = RD_OB_INTR_STATUS(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: status = 0x%x", status));
+
+ if (!(status & MFI_REPLY_2108_MESSAGE_INTR)) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ /* clear the interrupt by writing back the same value */
+ WR_OB_DOORBELL_CLEAR(status, instance);
+
+ /* dummy READ */
+ status = RD_OB_INTR_STATUS(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: interrupt cleared"));
+
+ return (DDI_INTR_CLAIMED);
+}
+
+static int
+drsas_common_check(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int ret = DDI_SUCCESS;
+
+ if (drsas_check_dma_handle(cmd->frame_dma_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_dma_handle(instance->mfi_internal_dma_obj.dma_handle)
+ != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_dma_handle(instance->mfi_evt_detail_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+
+ ddi_fm_acc_err_clear(instance->regmap_handle, DDI_FME_VER0);
+
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+static int
+drsas_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
+{
+ /*
+ * as the driver can always deal with an error in any dma or
+ * access handle, we can just return the fme_status value.
+ */
+ pci_ereport_post(dip, err, NULL);
+ return (err->fme_status);
+}
+
+static void
+drsas_fm_init(struct drsas_instance *instance)
+{
+ /* Need to change iblock to priority for new MSI intr */
+ ddi_iblock_cookie_t fm_ibc;
+
+ /* Only register with IO Fault Services if we have some capability */
+ if (instance->fm_capabilities) {
+ /* Adjust access and dma attributes for FMA */
+ endian_attr.devacc_attr_access = DDI_FLAGERR_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = DDI_DMA_FLAGERR;
+
+ /*
+ * Register capabilities with IO Fault Services.
+ * fm_capabilities will be updated to indicate
+ * capabilities actually supported (not requested.)
+ */
+
+ ddi_fm_init(instance->dip, &instance->fm_capabilities, &fm_ibc);
+
+ /*
+ * Initialize pci ereport capabilities if ereport
+ * capable (should always be.)
+ */
+
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ pci_ereport_setup(instance->dip);
+ }
+
+ /*
+ * Register error callback if error callback capable.
+ */
+ if (DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ ddi_fm_handler_register(instance->dip,
+ drsas_fm_error_cb, (void*) instance);
+ }
+ } else {
+ endian_attr.devacc_attr_access = DDI_DEFAULT_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = 0;
+ }
+}
+
+static void
+drsas_fm_fini(struct drsas_instance *instance)
+{
+ /* Only unregister FMA capabilities if registered */
+ if (instance->fm_capabilities) {
+ /*
+ * Un-register error callback if error callback capable.
+ */
+ if (DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ ddi_fm_handler_unregister(instance->dip);
+ }
+
+ /*
+ * Release any resources allocated by pci_ereport_setup()
+ */
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ pci_ereport_teardown(instance->dip);
+ }
+
+ /* Unregister from IO Fault Services */
+ ddi_fm_fini(instance->dip);
+
+ /* Adjust access and dma attributes for FMA */
+ endian_attr.devacc_attr_access = DDI_DEFAULT_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = 0;
+ }
+}
+
+int
+drsas_check_acc_handle(ddi_acc_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ if (handle == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
+
+ return (de.fme_status);
+}
+
+int
+drsas_check_dma_handle(ddi_dma_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ if (handle == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
+
+ return (de.fme_status);
+}
+
+void
+drsas_fm_ereport(struct drsas_instance *instance, char *detail)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities)) {
+ ddi_fm_ereport_post(instance->dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERSION, NULL);
+ }
+}
+
+static int
+drsas_add_intrs(struct drsas_instance *instance, int intr_type)
+{
+
+ dev_info_t *dip = instance->dip;
+ int avail, actual, count;
+ int i, flag, ret;
+
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: intr_type = %x",
+ intr_type));
+
+ /* Get number of interrupts */
+ ret = ddi_intr_get_nintrs(dip, intr_type, &count);
+ if ((ret != DDI_SUCCESS) || (count == 0)) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_nintrs() failed:"
+ "ret %d count %d", ret, count));
+
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: count = %d ", count));
+
+ /* Get number of available interrupts */
+ ret = ddi_intr_get_navail(dip, intr_type, &avail);
+ if ((ret != DDI_SUCCESS) || (avail == 0)) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_navail() failed:"
+ "ret %d avail %d", ret, avail));
+
+ return (DDI_FAILURE);
+ }
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: avail = %d ", avail));
+
+ /* Only one interrupt routine. So limit the count to 1 */
+ if (count > 1) {
+ count = 1;
+ }
+
+ /*
+ * Allocate an array of interrupt handlers. Currently we support
+ * only one interrupt. The framework can be extended later.
+ */
+ instance->intr_size = count * sizeof (ddi_intr_handle_t);
+ instance->intr_htable = kmem_zalloc(instance->intr_size, KM_SLEEP);
+ ASSERT(instance->intr_htable);
+
+ flag = ((intr_type == DDI_INTR_TYPE_MSI) || (intr_type ==
+ DDI_INTR_TYPE_MSIX)) ? DDI_INTR_ALLOC_STRICT:DDI_INTR_ALLOC_NORMAL;
+
+ /* Allocate interrupt */
+ ret = ddi_intr_alloc(dip, instance->intr_htable, intr_type, 0,
+ count, &actual, flag);
+
+ if ((ret != DDI_SUCCESS) || (actual == 0)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "avail = %d", avail));
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+ if (actual < count) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "Requested = %d Received = %d", count, actual));
+ }
+ instance->intr_cnt = actual;
+
+ /*
+ * Get the priority of the interrupt allocated.
+ */
+ if ((ret = ddi_intr_get_pri(instance->intr_htable[0],
+ &instance->intr_pri)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "get priority call failed"));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Test for high level mutex. we don't support them.
+ */
+ if (instance->intr_pri >= ddi_intr_get_hilevel_pri()) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "High level interrupts not supported."));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE, "drsas_add_intrs: intr_pri = 0x%x ",
+ instance->intr_pri));
+
+ /* Call ddi_intr_add_handler() */
+ for (i = 0; i < actual; i++) {
+ ret = ddi_intr_add_handler(instance->intr_htable[i],
+ (ddi_intr_handler_t *)drsas_isr, (caddr_t)instance,
+ (caddr_t)(uintptr_t)i);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs:"
+ "failed %d", ret));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, " ddi_intr_add_handler done"));
+
+ if ((ret = ddi_intr_get_cap(instance->intr_htable[0],
+ &instance->intr_cap)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_cap() failed %d",
+ ret));
+
+ /* Free already allocated intr */
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_remove_handler(
+ instance->intr_htable[i]);
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ if (instance->intr_cap & DDI_INTR_FLAG_BLOCK) {
+ con_log(CL_ANN, (CE_WARN, "Calling ddi_intr_block _enable"));
+
+ (void) ddi_intr_block_enable(instance->intr_htable,
+ instance->intr_cnt);
+ } else {
+ con_log(CL_ANN, (CE_NOTE, " calling ddi_intr_enable"));
+
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_enable(instance->intr_htable[i]);
+ con_log(CL_ANN, (CE_NOTE, "ddi intr enable returns "
+ "%d", i));
+ }
+ }
+
+ return (DDI_SUCCESS);
+
+}
+
+
+static void
+drsas_rem_intrs(struct drsas_instance *instance)
+{
+ int i;
+
+ con_log(CL_ANN, (CE_NOTE, "drsas_rem_intrs called"));
+
+ /* Disable all interrupts first */
+ if (instance->intr_cap & DDI_INTR_FLAG_BLOCK) {
+ (void) ddi_intr_block_disable(instance->intr_htable,
+ instance->intr_cnt);
+ } else {
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_disable(instance->intr_htable[i]);
+ }
+ }
+
+ /* Remove all the handlers */
+
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_remove_handler(instance->intr_htable[i]);
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+
+ kmem_free(instance->intr_htable, instance->intr_size);
+}
+
+static int
+drsas_tran_bus_config(dev_info_t *parent, uint_t flags,
+ ddi_bus_config_op_t op, void *arg, dev_info_t **childp)
+{
+ struct drsas_instance *instance;
+ int config;
+ int rval;
+
+ char *ptr = NULL;
+ int tgt, lun;
+
+ con_log(CL_ANN1, (CE_NOTE, "Bus config called for op = %x", op));
+
+ if ((instance = ddi_get_soft_state(drsas_state,
+ ddi_get_instance(parent))) == NULL) {
+ return (NDI_FAILURE);
+ }
+
+ /* Hold nexus during bus_config */
+ ndi_devi_enter(parent, &config);
+ switch (op) {
+ case BUS_CONFIG_ONE: {
+
+ /* parse wwid/target name out of name given */
+ if ((ptr = strchr((char *)arg, '@')) == NULL) {
+ rval = NDI_FAILURE;
+ break;
+ }
+ ptr++;
+
+ if (drsas_parse_devname(arg, &tgt, &lun) != 0) {
+ rval = NDI_FAILURE;
+ break;
+ }
+
+ if (lun == 0) {
+ rval = drsas_config_ld(instance, tgt, lun, childp);
+ } else {
+ rval = NDI_FAILURE;
+ }
+
+ break;
+ }
+ case BUS_CONFIG_DRIVER:
+ case BUS_CONFIG_ALL: {
+
+ rval = drsas_config_all_devices(instance);
+
+ rval = NDI_SUCCESS;
+ break;
+ }
+ }
+
+ if (rval == NDI_SUCCESS) {
+ rval = ndi_busop_bus_config(parent, flags, op, arg, childp, 0);
+
+ }
+ ndi_devi_exit(parent, config);
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tran_bus_config: rval = %x",
+ rval));
+ return (rval);
+}
+
+static int
+drsas_config_all_devices(struct drsas_instance *instance)
+{
+ int rval, tgt;
+
+ for (tgt = 0; tgt < MRDRV_MAX_LD; tgt++) {
+ (void) drsas_config_ld(instance, tgt, 0, NULL);
+
+ }
+
+ rval = NDI_SUCCESS;
+ return (rval);
+}
+
+static int
+drsas_parse_devname(char *devnm, int *tgt, int *lun)
+{
+ char devbuf[SCSI_MAXNAMELEN];
+ char *addr;
+ char *p, *tp, *lp;
+ long num;
+
+ /* Parse dev name and address */
+ (void) strcpy(devbuf, devnm);
+ addr = "";
+ for (p = devbuf; *p != '\0'; p++) {
+ if (*p == '@') {
+ addr = p + 1;
+ *p = '\0';
+ } else if (*p == ':') {
+ *p = '\0';
+ break;
+ }
+ }
+
+ /* Parse target and lun */
+ for (p = tp = addr, lp = NULL; *p != '\0'; p++) {
+ if (*p == ',') {
+ lp = p + 1;
+ *p = '\0';
+ break;
+ }
+ }
+ if (tgt && tp) {
+ if (ddi_strtol(tp, NULL, 0x10, &num)) {
+ return (DDI_FAILURE); /* Can declare this as constant */
+ }
+ *tgt = (int)num;
+ }
+ if (lun && lp) {
+ if (ddi_strtol(lp, NULL, 0x10, &num)) {
+ return (DDI_FAILURE);
+ }
+ *lun = (int)num;
+ }
+ return (DDI_SUCCESS); /* Success case */
+}
+
+static int
+drsas_config_ld(struct drsas_instance *instance, uint16_t tgt,
+ uint8_t lun, dev_info_t **ldip)
+{
+ struct scsi_device *sd;
+ dev_info_t *child;
+ int rval;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_config_ld: t = %d l = %d",
+ tgt, lun));
+
+ if ((child = drsas_find_child(instance, tgt, lun)) != NULL) {
+ if (ldip) {
+ *ldip = child;
+ }
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_config_ld: Child = %p found t = %d l = %d",
+ (void *)child, tgt, lun));
+ return (NDI_SUCCESS);
+ }
+
+ sd = kmem_zalloc(sizeof (struct scsi_device), KM_SLEEP);
+ sd->sd_address.a_hba_tran = instance->tran;
+ sd->sd_address.a_target = (uint16_t)tgt;
+ sd->sd_address.a_lun = (uint8_t)lun;
+
+ if (scsi_hba_probe(sd, NULL) == SCSIPROBE_EXISTS)
+ rval = drsas_config_scsi_device(instance, sd, ldip);
+ else
+ rval = NDI_FAILURE;
+
+ /* sd_unprobe is blank now. Free buffer manually */
+ if (sd->sd_inq) {
+ kmem_free(sd->sd_inq, SUN_INQSIZE);
+ sd->sd_inq = (struct scsi_inquiry *)NULL;
+ }
+
+ kmem_free(sd, sizeof (struct scsi_device));
+ con_log(CL_ANN1, (CE_NOTE, "drsas_config_ld: return rval = %d",
+ rval));
+ return (rval);
+}
+
+static int
+drsas_config_scsi_device(struct drsas_instance *instance,
+ struct scsi_device *sd, dev_info_t **dipp)
+{
+ char *nodename = NULL;
+ char **compatible = NULL;
+ int ncompatible = 0;
+ char *childname;
+ dev_info_t *ldip = NULL;
+ int tgt = sd->sd_address.a_target;
+ int lun = sd->sd_address.a_lun;
+ int dtype = sd->sd_inq->inq_dtype & DTYPE_MASK;
+ int rval;
+
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: scsi_device t%dL%d", tgt, lun));
+ scsi_hba_nodename_compatible_get(sd->sd_inq, NULL, dtype,
+ NULL, &nodename, &compatible, &ncompatible);
+
+ if (nodename == NULL) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: Found no compatible driver "
+ "for t%dL%d", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ childname = (dtype == DTYPE_DIRECT) ? "sd" : nodename;
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas: Childname = %2s nodename = %s", childname, nodename));
+
+ /* Create a dev node */
+ rval = ndi_devi_alloc(instance->dip, childname, DEVI_SID_NODEID, &ldip);
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas_config_scsi_device: ndi_devi_alloc rval = %x", rval));
+ if (rval == NDI_SUCCESS) {
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, ldip, "target", tgt) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d target", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, ldip, "lun", lun) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d lun", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ if (ndi_prop_update_string_array(DDI_DEV_T_NONE, ldip,
+ "compatible", compatible, ncompatible) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d compatible", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ rval = ndi_devi_online(ldip, NDI_ONLINE_ATTACH);
+ if (rval != NDI_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to online "
+ "t%dl%d", tgt, lun));
+ ndi_prop_remove_all(ldip);
+ (void) ndi_devi_free(ldip);
+ } else {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: online Done :"
+ "0 t%dl%d", tgt, lun));
+ }
+
+ }
+finish:
+ if (dipp) {
+ *dipp = ldip;
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN,
+ "dr_sas: config_scsi_device rval = %d t%dL%d",
+ rval, tgt, lun));
+ scsi_hba_nodename_compatible_free(nodename, compatible);
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+drsas_service_evt(struct drsas_instance *instance, int tgt, int lun, int event,
+ uint64_t wwn)
+{
+ struct drsas_eventinfo *mrevt = NULL;
+
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_service_evt called for t%dl%d event = %d",
+ tgt, lun, event));
+
+ if ((instance->taskq == NULL) || (mrevt =
+ kmem_zalloc(sizeof (struct drsas_eventinfo), KM_NOSLEEP)) == NULL) {
+ return (ENOMEM);
+ }
+
+ mrevt->instance = instance;
+ mrevt->tgt = tgt;
+ mrevt->lun = lun;
+ mrevt->event = event;
+
+ if ((ddi_taskq_dispatch(instance->taskq,
+ (void (*)(void *))drsas_issue_evt_taskq, mrevt, DDI_NOSLEEP)) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: Event task failed for t%dl%d event = %d",
+ tgt, lun, event));
+ kmem_free(mrevt, sizeof (struct drsas_eventinfo));
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+}
+
+static void
+drsas_issue_evt_taskq(struct drsas_eventinfo *mrevt)
+{
+ struct drsas_instance *instance = mrevt->instance;
+ dev_info_t *dip, *pdip;
+ int circ1 = 0;
+ char *devname;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_issue_evt_taskq: called for"
+ " tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+
+ if (mrevt->tgt < MRDRV_MAX_LD && mrevt->lun == 0) {
+ dip = instance->dr_ld_list[mrevt->tgt].dip;
+ } else {
+ return;
+ }
+
+ ndi_devi_enter(instance->dip, &circ1);
+ switch (mrevt->event) {
+ case DRSAS_EVT_CONFIG_TGT:
+ if (dip == NULL) {
+
+ if (mrevt->lun == 0) {
+ (void) drsas_config_ld(instance, mrevt->tgt,
+ 0, NULL);
+ }
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_CONFIG_TGT called:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_CONFIG_TGT dip != NULL:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ }
+ break;
+ case DRSAS_EVT_UNCONFIG_TGT:
+ if (dip) {
+ if (i_ddi_devi_attached(dip)) {
+
+ pdip = ddi_get_parent(dip);
+
+ devname = kmem_zalloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devname);
+
+ (void) devfs_clean(pdip, devname + 1,
+ DV_CLEAN_FORCE);
+ kmem_free(devname, MAXNAMELEN + 1);
+ }
+ (void) ndi_devi_offline(dip, NDI_DEVI_REMOVE);
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_UNCONFIG_TGT called:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_UNCONFIG_TGT dip == NULL:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ }
+ break;
+ }
+ kmem_free(mrevt, sizeof (struct drsas_eventinfo));
+ ndi_devi_exit(instance->dip, circ1);
+}
+
+static int
+drsas_mode_sense_build(struct scsi_pkt *pkt)
+{
+ union scsi_cdb *cdbp;
+ uint16_t page_code;
+ struct scsa_cmd *acmd;
+ struct buf *bp;
+ struct mode_header *modehdrp;
+
+ cdbp = (void *)pkt->pkt_cdbp;
+ page_code = cdbp->cdb_un.sg.scsi[0];
+ acmd = PKT2CMD(pkt);
+ bp = acmd->cmd_buf;
+ if ((!bp) && bp->b_un.b_addr && bp->b_bcount && acmd->cmd_dmacount) {
+ con_log(CL_ANN1, (CE_WARN, "Failing MODESENSE Command"));
+ /* ADD pkt statistics as Command failed. */
+ return (NULL);
+ }
+
+ bp_mapin(bp);
+ bzero(bp->b_un.b_addr, bp->b_bcount);
+
+ switch (page_code) {
+ case 0x3: {
+ struct mode_format *page3p = NULL;
+ modehdrp = (struct mode_header *)(bp->b_un.b_addr);
+ modehdrp->bdesc_length = MODE_BLK_DESC_LENGTH;
+
+ page3p = (void *)((caddr_t)modehdrp +
+ MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH);
+ page3p->mode_page.code = 0x3;
+ page3p->mode_page.length =
+ (uchar_t)(sizeof (struct mode_format));
+ page3p->data_bytes_sect = 512;
+ page3p->sect_track = 63;
+ break;
+ }
+ case 0x4: {
+ struct mode_geometry *page4p = NULL;
+ modehdrp = (struct mode_header *)(bp->b_un.b_addr);
+ modehdrp->bdesc_length = MODE_BLK_DESC_LENGTH;
+
+ page4p = (void *)((caddr_t)modehdrp +
+ MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH);
+ page4p->mode_page.code = 0x4;
+ page4p->mode_page.length =
+ (uchar_t)(sizeof (struct mode_geometry));
+ page4p->heads = 255;
+ page4p->rpm = 10000;
+ break;
+ }
+ default:
+ break;
+ }
+ return (NULL);
+}
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.conf b/usr/src/uts/common/io/dr_sas/dr_sas.conf
new file mode 100644
index 0000000000..3792f43ca4
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.conf
@@ -0,0 +1,15 @@
+#
+# Copyright (c) 2008-2009, LSI Logic Corporation.
+# All rights reserved.
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# dr_sas.conf for sol 10 (and later) for all supported architectures
+#
+# global definitions
+
+# MSI specific flag. user can uncomment this line and set flag "yes" to enable MSI
+#drsas-enable-msi="yes";
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.h b/usr/src/uts/common/io/dr_sas/dr_sas.h
new file mode 100644
index 0000000000..8f78658edf
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.h
@@ -0,0 +1,1766 @@
+/*
+ * dr_sas.h: header for dr_sas
+ *
+ * Solaris MegaRAID driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_SAS_H_
+#define _DR_SAS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/scsi/scsi.h>
+#include "dr_sas_list.h"
+
+/*
+ * MegaRAID SAS2.0 Driver meta data
+ */
+#define DRSAS_VERSION "LSIv2.0"
+#define DRSAS_RELDATE "Jan 9, 2009"
+
+#define DRSAS_TRUE 1
+#define DRSAS_FALSE 0
+
+/*
+ * MegaRAID SAS2.0 device id conversion definitions.
+ */
+#define INST2LSIRDCTL(x) ((x) << INST_MINOR_SHIFT)
+
+/*
+ * MegaRAID SAS2.0 supported controllers
+ */
+#define PCI_DEVICE_ID_LSI_2108VDE 0x0078
+#define PCI_DEVICE_ID_LSI_2108V 0x0079
+
+/*
+ * Register Index for 2108 Controllers.
+ */
+#define REGISTER_SET_IO_2108 (2)
+
+#define DRSAS_MAX_SGE_CNT 0x50
+
+#define DRSAS_IOCTL_DRIVER 0x12341234
+#define DRSAS_IOCTL_FIRMWARE 0x12345678
+#define DRSAS_IOCTL_AEN 0x87654321
+
+#define DRSAS_1_SECOND 1000000
+
+/* Dynamic Enumeration Flags */
+#define DRSAS_PD_LUN 1
+#define DRSAS_LD_LUN 0
+#define DRSAS_PD_TGT_MAX 255
+#define DRSAS_GET_PD_MAX(s) ((s)->dr_pd_max)
+#define WWN_STRLEN 17
+
+/*
+ * =====================================
+ * MegaRAID SAS2.0 MFI firmware definitions
+ * =====================================
+ */
+/*
+ * MFI stands for MegaRAID SAS2.0 FW Interface. This is just a moniker for
+ * protocol between the software and firmware. Commands are issued using
+ * "message frames"
+ */
+
+/*
+ * FW posts its state in upper 4 bits of outbound_msg_0 register
+ */
+#define MFI_STATE_SHIFT 28
+#define MFI_STATE_MASK ((uint32_t)0xF<<MFI_STATE_SHIFT)
+#define MFI_STATE_UNDEFINED ((uint32_t)0x0<<MFI_STATE_SHIFT)
+#define MFI_STATE_BB_INIT ((uint32_t)0x1<<MFI_STATE_SHIFT)
+#define MFI_STATE_FW_INIT ((uint32_t)0x4<<MFI_STATE_SHIFT)
+#define MFI_STATE_WAIT_HANDSHAKE ((uint32_t)0x6<<MFI_STATE_SHIFT)
+#define MFI_STATE_FW_INIT_2 ((uint32_t)0x7<<MFI_STATE_SHIFT)
+#define MFI_STATE_DEVICE_SCAN ((uint32_t)0x8<<MFI_STATE_SHIFT)
+#define MFI_STATE_BOOT_MESSAGE_PENDING ((uint32_t)0x9<<MFI_STATE_SHIFT)
+#define MFI_STATE_FLUSH_CACHE ((uint32_t)0xA<<MFI_STATE_SHIFT)
+#define MFI_STATE_READY ((uint32_t)0xB<<MFI_STATE_SHIFT)
+#define MFI_STATE_OPERATIONAL ((uint32_t)0xC<<MFI_STATE_SHIFT)
+#define MFI_STATE_FAULT ((uint32_t)0xF<<MFI_STATE_SHIFT)
+
+#define MRMFI_FRAME_SIZE 64
+
+/*
+ * During FW init, clear pending cmds & reset state using inbound_msg_0
+ *
+ * ABORT : Abort all pending cmds
+ * READY : Move from OPERATIONAL to READY state; discard queue info
+ * MFIMODE : Discard (possible) low MFA posted in 64-bit mode (??)
+ * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver
+ */
+#define MFI_INIT_ABORT 0x00000001
+#define MFI_INIT_READY 0x00000002
+#define MFI_INIT_MFIMODE 0x00000004
+#define MFI_INIT_CLEAR_HANDSHAKE 0x00000008
+#define MFI_INIT_HOTPLUG 0x00000010
+#define MFI_STOP_ADP 0x00000020
+#define MFI_RESET_FLAGS MFI_INIT_READY|MFI_INIT_MFIMODE|MFI_INIT_ABORT
+
+/*
+ * MFI frame flags
+ */
+#define MFI_FRAME_POST_IN_REPLY_QUEUE 0x0000
+#define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE 0x0001
+#define MFI_FRAME_SGL32 0x0000
+#define MFI_FRAME_SGL64 0x0002
+#define MFI_FRAME_SENSE32 0x0000
+#define MFI_FRAME_SENSE64 0x0004
+#define MFI_FRAME_DIR_NONE 0x0000
+#define MFI_FRAME_DIR_WRITE 0x0008
+#define MFI_FRAME_DIR_READ 0x0010
+#define MFI_FRAME_DIR_BOTH 0x0018
+
+/*
+ * Definition for cmd_status
+ */
+#define MFI_CMD_STATUS_POLL_MODE 0xFF
+#define MFI_CMD_STATUS_SYNC_MODE 0xFF
+
+/*
+ * MFI command opcodes
+ */
+#define MFI_CMD_OP_INIT 0x00
+#define MFI_CMD_OP_LD_READ 0x01
+#define MFI_CMD_OP_LD_WRITE 0x02
+#define MFI_CMD_OP_LD_SCSI 0x03
+#define MFI_CMD_OP_PD_SCSI 0x04
+#define MFI_CMD_OP_DCMD 0x05
+#define MFI_CMD_OP_ABORT 0x06
+#define MFI_CMD_OP_SMP 0x07
+#define MFI_CMD_OP_STP 0x08
+
+#define DR_DCMD_CTRL_GET_INFO 0x01010000
+
+#define DR_DCMD_CTRL_CACHE_FLUSH 0x01101000
+#define DR_FLUSH_CTRL_CACHE 0x01
+#define DR_FLUSH_DISK_CACHE 0x02
+
+#define DR_DCMD_CTRL_SHUTDOWN 0x01050000
+#define DRSAS_ENABLE_DRIVE_SPINDOWN 0x01
+
+#define DR_DCMD_CTRL_EVENT_GET_INFO 0x01040100
+#define DR_DCMD_CTRL_EVENT_GET 0x01040300
+#define DR_DCMD_CTRL_EVENT_WAIT 0x01040500
+#define DR_DCMD_LD_GET_PROPERTIES 0x03030000
+#define DR_DCMD_PD_GET_INFO 0x02020000
+
+/*
+ * Solaris Specific MAX values
+ */
+#define MAX_SGL 24
+/*
+ * MFI command completion codes
+ */
+enum MFI_STAT {
+ MFI_STAT_OK = 0x00,
+ MFI_STAT_INVALID_CMD = 0x01,
+ MFI_STAT_INVALID_DCMD = 0x02,
+ MFI_STAT_INVALID_PARAMETER = 0x03,
+ MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04,
+ MFI_STAT_ABORT_NOT_POSSIBLE = 0x05,
+ MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06,
+ MFI_STAT_APP_IN_USE = 0x07,
+ MFI_STAT_APP_NOT_INITIALIZED = 0x08,
+ MFI_STAT_ARRAY_INDEX_INVALID = 0x09,
+ MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a,
+ MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b,
+ MFI_STAT_DEVICE_NOT_FOUND = 0x0c,
+ MFI_STAT_DRIVE_TOO_SMALL = 0x0d,
+ MFI_STAT_FLASH_ALLOC_FAIL = 0x0e,
+ MFI_STAT_FLASH_BUSY = 0x0f,
+ MFI_STAT_FLASH_ERROR = 0x10,
+ MFI_STAT_FLASH_IMAGE_BAD = 0x11,
+ MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12,
+ MFI_STAT_FLASH_NOT_OPEN = 0x13,
+ MFI_STAT_FLASH_NOT_STARTED = 0x14,
+ MFI_STAT_FLUSH_FAILED = 0x15,
+ MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16,
+ MFI_STAT_LD_CC_IN_PROGRESS = 0x17,
+ MFI_STAT_LD_INIT_IN_PROGRESS = 0x18,
+ MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19,
+ MFI_STAT_LD_MAX_CONFIGURED = 0x1a,
+ MFI_STAT_LD_NOT_OPTIMAL = 0x1b,
+ MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c,
+ MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d,
+ MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e,
+ MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f,
+ MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20,
+ MFI_STAT_MFC_HW_ERROR = 0x21,
+ MFI_STAT_NO_HW_PRESENT = 0x22,
+ MFI_STAT_NOT_FOUND = 0x23,
+ MFI_STAT_NOT_IN_ENCL = 0x24,
+ MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25,
+ MFI_STAT_PD_TYPE_WRONG = 0x26,
+ MFI_STAT_PR_DISABLED = 0x27,
+ MFI_STAT_ROW_INDEX_INVALID = 0x28,
+ MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29,
+ MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a,
+ MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b,
+ MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c,
+ MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d,
+ MFI_STAT_SCSI_IO_FAILED = 0x2e,
+ MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f,
+ MFI_STAT_SHUTDOWN_FAILED = 0x30,
+ MFI_STAT_TIME_NOT_SET = 0x31,
+ MFI_STAT_WRONG_STATE = 0x32,
+ MFI_STAT_LD_OFFLINE = 0x33,
+ /* UNUSED: 0x34 to 0xfe */
+ MFI_STAT_INVALID_STATUS = 0xFF
+};
+
+enum DR_EVT_CLASS {
+ DR_EVT_CLASS_DEBUG = -2,
+ DR_EVT_CLASS_PROGRESS = -1,
+ DR_EVT_CLASS_INFO = 0,
+ DR_EVT_CLASS_WARNING = 1,
+ DR_EVT_CLASS_CRITICAL = 2,
+ DR_EVT_CLASS_FATAL = 3,
+ DR_EVT_CLASS_DEAD = 4
+};
+
+enum DR_EVT_LOCALE {
+ DR_EVT_LOCALE_LD = 0x0001,
+ DR_EVT_LOCALE_PD = 0x0002,
+ DR_EVT_LOCALE_ENCL = 0x0004,
+ DR_EVT_LOCALE_BBU = 0x0008,
+ DR_EVT_LOCALE_SAS = 0x0010,
+ DR_EVT_LOCALE_CTRL = 0x0020,
+ DR_EVT_LOCALE_CONFIG = 0x0040,
+ DR_EVT_LOCALE_CLUSTER = 0x0080,
+ DR_EVT_LOCALE_ALL = 0xffff
+};
+
+#define DR_EVT_CFG_CLEARED 0x0004
+#define DR_EVT_LD_CREATED 0x008a
+#define DR_EVT_LD_DELETED 0x008b
+#define DR_EVT_PD_REMOVED_EXT 0x00f8
+#define DR_EVT_PD_INSERTED_EXT 0x00f7
+
+enum LD_STATE {
+ LD_OFFLINE = 0,
+ LD_PARTIALLY_DEGRADED = 1,
+ LD_DEGRADED = 2,
+ LD_OPTIMAL = 3,
+ LD_INVALID = 0xFF
+};
+
+enum DRSAS_EVT {
+ DRSAS_EVT_CONFIG_TGT = 0,
+ DRSAS_EVT_UNCONFIG_TGT = 1,
+ DRSAS_EVT_UNCONFIG_SMP = 2
+};
+
+#define DMA_OBJ_ALLOCATED 1
+#define DMA_OBJ_REALLOCATED 2
+#define DMA_OBJ_FREED 3
+
+/*
+ * dma_obj_t - Our DMA object
+ * @param buffer : kernel virtual address
+ * @param size : size of the data to be allocated
+ * @param acc_handle : access handle
+ * @param dma_handle : dma handle
+ * @param dma_cookie : scatter-gather list
+ * @param dma_attr : dma attributes for this buffer
+ * Our DMA object. The caller must initialize the size and dma attributes
+ * (dma_attr) fields before allocating the resources.
+ */
+typedef struct {
+ caddr_t buffer;
+ uint32_t size;
+ ddi_acc_handle_t acc_handle;
+ ddi_dma_handle_t dma_handle;
+ ddi_dma_cookie_t dma_cookie[DRSAS_MAX_SGE_CNT];
+ ddi_dma_attr_t dma_attr;
+ uint8_t status;
+ uint8_t reserved[3];
+} dma_obj_t;
+
+struct drsas_eventinfo {
+ struct drsas_instance *instance;
+ int tgt;
+ int lun;
+ int event;
+};
+
+struct drsas_ld {
+ dev_info_t *dip;
+ uint8_t lun_type;
+ uint8_t reserved[3];
+};
+
+struct drsas_pd {
+ dev_info_t *dip;
+ uint8_t lun_type;
+ uint8_t dev_id;
+ uint8_t flags;
+ uint8_t reserved;
+};
+
+struct drsas_pd_info {
+ uint16_t deviceId;
+ uint16_t seqNum;
+ uint8_t inquiryData[96];
+ uint8_t vpdPage83[64];
+ uint8_t notSupported;
+ uint8_t scsiDevType;
+ uint8_t a;
+ uint8_t device_speed;
+ uint32_t mediaerrcnt;
+ uint32_t other;
+ uint32_t pred;
+ uint32_t lastpred;
+ uint16_t fwState;
+ uint8_t disabled;
+ uint8_t linkspwwd;
+ uint32_t ddfType;
+ struct {
+ uint8_t count;
+ uint8_t isPathBroken;
+ uint8_t connectorIndex[2];
+ uint8_t reserved[4];
+ uint64_t sasAddr[2];
+ uint8_t reserved2[16];
+ } pathInfo;
+};
+
+typedef struct drsas_instance {
+ uint32_t *producer;
+ uint32_t *consumer;
+
+ uint32_t *reply_queue;
+ dma_obj_t mfi_internal_dma_obj;
+
+ uint8_t init_id;
+ uint8_t reserved[3];
+
+ uint16_t max_num_sge;
+ uint16_t max_fw_cmds;
+ uint32_t max_sectors_per_req;
+
+ struct drsas_cmd **cmd_list;
+
+ mlist_t cmd_pool_list;
+ kmutex_t cmd_pool_mtx;
+
+ mlist_t cmd_pend_list;
+ kmutex_t cmd_pend_mtx;
+
+ dma_obj_t mfi_evt_detail_obj;
+ struct drsas_cmd *aen_cmd;
+
+ uint32_t aen_seq_num;
+ uint32_t aen_class_locale_word;
+
+ scsi_hba_tran_t *tran;
+
+ kcondvar_t int_cmd_cv;
+ kmutex_t int_cmd_mtx;
+
+ kcondvar_t aen_cmd_cv;
+ kmutex_t aen_cmd_mtx;
+
+ kcondvar_t abort_cmd_cv;
+ kmutex_t abort_cmd_mtx;
+
+ dev_info_t *dip;
+ ddi_acc_handle_t pci_handle;
+
+ timeout_id_t timeout_id;
+ uint32_t unique_id;
+ uint16_t fw_outstanding;
+ caddr_t regmap;
+ ddi_acc_handle_t regmap_handle;
+ uint8_t isr_level;
+ ddi_iblock_cookie_t iblock_cookie;
+ ddi_iblock_cookie_t soft_iblock_cookie;
+ ddi_softintr_t soft_intr_id;
+ uint8_t softint_running;
+ kmutex_t completed_pool_mtx;
+ mlist_t completed_pool_list;
+
+ caddr_t internal_buf;
+ uint32_t internal_buf_dmac_add;
+ uint32_t internal_buf_size;
+
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t subsysvid;
+ uint16_t subsysid;
+ int instance;
+ int baseaddress;
+ char iocnode[16];
+
+ int fm_capabilities;
+
+ struct drsas_func_ptr *func_ptr;
+ /* MSI interrupts specific */
+ ddi_intr_handle_t *intr_htable;
+ int intr_type;
+ int intr_cnt;
+ size_t intr_size;
+ uint_t intr_pri;
+ int intr_cap;
+
+ ddi_taskq_t *taskq;
+ struct drsas_ld *dr_ld_list;
+} drsas_t;
+
+struct drsas_func_ptr {
+ int (*read_fw_status_reg)(struct drsas_instance *);
+ void (*issue_cmd)(struct drsas_cmd *, struct drsas_instance *);
+ int (*issue_cmd_in_sync_mode)(struct drsas_instance *,
+ struct drsas_cmd *);
+ int (*issue_cmd_in_poll_mode)(struct drsas_instance *,
+ struct drsas_cmd *);
+ void (*enable_intr)(struct drsas_instance *);
+ void (*disable_intr)(struct drsas_instance *);
+ int (*intr_ack)(struct drsas_instance *);
+};
+
+/*
+ * ### Helper routines ###
+ */
+
+/*
+ * con_log() - console log routine
+ * @param level : indicates the severity of the message.
+ * @fparam mt : format string
+ *
+ * con_log displays the error messages on the console based on the current
+ * debug level. Also it attaches the appropriate kernel severity level with
+ * the message.
+ *
+ *
+ * console messages debug levels
+ */
+#define CL_NONE 0 /* No debug information */
+#define CL_ANN 1 /* print unconditionally, announcements */
+#define CL_ANN1 2 /* No o/p */
+#define CL_DLEVEL1 3 /* debug level 1, informative */
+#define CL_DLEVEL2 4 /* debug level 2, verbose */
+#define CL_DLEVEL3 5 /* debug level 3, very verbose */
+
+#ifdef __SUNPRO_C
+#define __func__ ""
+#endif
+
+#define con_log(level, fmt) { if (debug_level_g >= level) cmn_err fmt; }
+
+/*
+ * ### SCSA definitions ###
+ */
+#define PKT2TGT(pkt) ((pkt)->pkt_address.a_target)
+#define PKT2LUN(pkt) ((pkt)->pkt_address.a_lun)
+#define PKT2TRAN(pkt) ((pkt)->pkt_adress.a_hba_tran)
+#define ADDR2TRAN(ap) ((ap)->a_hba_tran)
+
+#define TRAN2MR(tran) (struct drsas_instance *)(tran)->tran_hba_private)
+#define ADDR2MR(ap) (TRAN2MR(ADDR2TRAN(ap))
+
+#define PKT2CMD(pkt) ((struct scsa_cmd *)(pkt)->pkt_ha_private)
+#define CMD2PKT(sp) ((sp)->cmd_pkt)
+#define PKT2REQ(pkt) (&(PKT2CMD(pkt)->request))
+
+#define CMD2ADDR(cmd) (&CMD2PKT(cmd)->pkt_address)
+#define CMD2TRAN(cmd) (CMD2PKT(cmd)->pkt_address.a_hba_tran)
+#define CMD2MR(cmd) (TRAN2MR(CMD2TRAN(cmd)))
+
+#define CFLAG_DMAVALID 0x0001 /* requires a dma operation */
+#define CFLAG_DMASEND 0x0002 /* Transfer from the device */
+#define CFLAG_CONSISTENT 0x0040 /* consistent data transfer */
+
+/*
+ * ### Data structures for ioctl inteface and internal commands ###
+ */
+
+/*
+ * Data direction flags
+ */
+#define UIOC_RD 0x00001
+#define UIOC_WR 0x00002
+
+#define SCP2HOST(scp) (scp)->device->host /* to host */
+#define SCP2HOSTDATA(scp) SCP2HOST(scp)->hostdata /* to soft state */
+#define SCP2CHANNEL(scp) (scp)->device->channel /* to channel */
+#define SCP2TARGET(scp) (scp)->device->id /* to target */
+#define SCP2LUN(scp) (scp)->device->lun /* to LUN */
+
+#define SCSIHOST2ADAP(host) (((caddr_t *)(host->hostdata))[0])
+#define SCP2ADAPTER(scp) \
+ (struct drsas_instance *)SCSIHOST2ADAP(SCP2HOST(scp))
+
+#define MRDRV_IS_LOGICAL_SCSA(instance, acmd) \
+ (acmd->device_id < MRDRV_MAX_LD) ? 1 : 0
+#define MRDRV_IS_LOGICAL(ap) \
+ ((ap->a_target < MRDRV_MAX_LD) && (ap->a_lun == 0)) ? 1 : 0
+#define MAP_DEVICE_ID(instance, ap) \
+ (ap->a_target)
+
+#define HIGH_LEVEL_INTR 1
+#define NORMAL_LEVEL_INTR 0
+
+/*
+ * scsa_cmd - Per-command mr private data
+ * @param cmd_dmahandle : dma handle
+ * @param cmd_dmacookies : current dma cookies
+ * @param cmd_pkt : scsi_pkt reference
+ * @param cmd_dmacount : dma count
+ * @param cmd_cookie : next cookie
+ * @param cmd_ncookies : cookies per window
+ * @param cmd_cookiecnt : cookies per sub-win
+ * @param cmd_nwin : number of dma windows
+ * @param cmd_curwin : current dma window
+ * @param cmd_dma_offset : current window offset
+ * @param cmd_dma_len : current window length
+ * @param cmd_flags : private flags
+ * @param cmd_cdblen : length of cdb
+ * @param cmd_scblen : length of scb
+ * @param cmd_buf : command buffer
+ * @param channel : channel for scsi sub-system
+ * @param target : target for scsi sub-system
+ * @param lun : LUN for scsi sub-system
+ *
+ * - Allocated at same time as scsi_pkt by scsi_hba_pkt_alloc(9E)
+ * - Pointed to by pkt_ha_private field in scsi_pkt
+ */
+struct scsa_cmd {
+ ddi_dma_handle_t cmd_dmahandle;
+ ddi_dma_cookie_t cmd_dmacookies[DRSAS_MAX_SGE_CNT];
+ struct scsi_pkt *cmd_pkt;
+ ulong_t cmd_dmacount;
+ uint_t cmd_cookie;
+ uint_t cmd_ncookies;
+ uint_t cmd_cookiecnt;
+ uint_t cmd_nwin;
+ uint_t cmd_curwin;
+ off_t cmd_dma_offset;
+ ulong_t cmd_dma_len;
+ ulong_t cmd_flags;
+ uint_t cmd_cdblen;
+ uint_t cmd_scblen;
+ struct buf *cmd_buf;
+ ushort_t device_id;
+ uchar_t islogical;
+ uchar_t lun;
+ struct drsas_device *drsas_dev;
+};
+
+
+struct drsas_cmd {
+ union drsas_frame *frame;
+ uint32_t frame_phys_addr;
+ uint8_t *sense;
+ uint32_t sense_phys_addr;
+ dma_obj_t frame_dma_obj;
+ uint8_t frame_dma_obj_status;
+
+ uint32_t index;
+ uint8_t sync_cmd;
+ uint8_t cmd_status;
+ uint16_t abort_aen;
+ mlist_t list;
+ uint32_t frame_count;
+ struct scsa_cmd *cmd;
+ struct scsi_pkt *pkt;
+};
+
+#define MAX_MGMT_ADAPTERS 1024
+#define IOC_SIGNATURE "MR-SAS"
+
+#define IOC_CMD_FIRMWARE 0x0
+#define DRSAS_DRIVER_IOCTL_COMMON 0xF0010000
+#define DRSAS_DRIVER_IOCTL_DRIVER_VERSION 0xF0010100
+#define DRSAS_DRIVER_IOCTL_PCI_INFORMATION 0xF0010200
+#define DRSAS_DRIVER_IOCTL_MRRAID_STATISTICS 0xF0010300
+
+
+#define DRSAS_MAX_SENSE_LENGTH 32
+
+struct drsas_mgmt_info {
+
+ uint16_t count;
+ struct drsas_instance *instance[MAX_MGMT_ADAPTERS];
+ uint16_t map[MAX_MGMT_ADAPTERS];
+ int max_index;
+};
+
+#pragma pack(1)
+
+/*
+ * SAS controller properties
+ */
+struct drsas_ctrl_prop {
+ uint16_t seq_num;
+ uint16_t pred_fail_poll_interval;
+ uint16_t intr_throttle_count;
+ uint16_t intr_throttle_timeouts;
+
+ uint8_t rebuild_rate;
+ uint8_t patrol_read_rate;
+ uint8_t bgi_rate;
+ uint8_t cc_rate;
+ uint8_t recon_rate;
+
+ uint8_t cache_flush_interval;
+
+ uint8_t spinup_drv_count;
+ uint8_t spinup_delay;
+
+ uint8_t cluster_enable;
+ uint8_t coercion_mode;
+ uint8_t disk_write_cache_disable;
+ uint8_t alarm_enable;
+
+ uint8_t reserved[44];
+};
+
+/*
+ * SAS controller information
+ */
+struct drsas_ctrl_info {
+ /* PCI device information */
+ struct {
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t sub_vendor_id;
+ uint16_t sub_device_id;
+ uint8_t reserved[24];
+ } pci;
+
+ /* Host interface information */
+ struct {
+ uint8_t PCIX : 1;
+ uint8_t PCIE : 1;
+ uint8_t iSCSI : 1;
+ uint8_t SAS_3G : 1;
+ uint8_t reserved_0 : 4;
+ uint8_t reserved_1[6];
+ uint8_t port_count;
+ uint64_t port_addr[8];
+ } host_interface;
+
+ /* Device (backend) interface information */
+ struct {
+ uint8_t SPI : 1;
+ uint8_t SAS_3G : 1;
+ uint8_t SATA_1_5G : 1;
+ uint8_t SATA_3G : 1;
+ uint8_t reserved_0 : 4;
+ uint8_t reserved_1[6];
+ uint8_t port_count;
+ uint64_t port_addr[8];
+ } device_interface;
+
+ /* List of components residing in flash. All str are null terminated */
+ uint32_t image_check_word;
+ uint32_t image_component_count;
+
+ struct {
+ char name[8];
+ char version[32];
+ char build_date[16];
+ char built_time[16];
+ } image_component[8];
+
+ /*
+ * List of flash components that have been flashed on the card, but
+ * are not in use, pending reset of the adapter. This list will be
+ * empty if a flash operation has not occurred. All stings are null
+ * terminated
+ */
+ uint32_t pending_image_component_count;
+
+ struct {
+ char name[8];
+ char version[32];
+ char build_date[16];
+ char build_time[16];
+ } pending_image_component[8];
+
+ uint8_t max_arms;
+ uint8_t max_spans;
+ uint8_t max_arrays;
+ uint8_t max_lds;
+
+ char product_name[80];
+ char serial_no[32];
+
+ /*
+ * Other physical/controller/operation information. Indicates the
+ * presence of the hardware
+ */
+ struct {
+ uint32_t bbu : 1;
+ uint32_t alarm : 1;
+ uint32_t nvram : 1;
+ uint32_t uart : 1;
+ uint32_t reserved : 28;
+ } hw_present;
+
+ uint32_t current_fw_time;
+
+ /* Maximum data transfer sizes */
+ uint16_t max_concurrent_cmds;
+ uint16_t max_sge_count;
+ uint32_t max_request_size;
+
+ /* Logical and physical device counts */
+ uint16_t ld_present_count;
+ uint16_t ld_degraded_count;
+ uint16_t ld_offline_count;
+
+ uint16_t pd_present_count;
+ uint16_t pd_disk_present_count;
+ uint16_t pd_disk_pred_failure_count;
+ uint16_t pd_disk_failed_count;
+
+ /* Memory size information */
+ uint16_t nvram_size;
+ uint16_t memory_size;
+ uint16_t flash_size;
+
+ /* Error counters */
+ uint16_t mem_correctable_error_count;
+ uint16_t mem_uncorrectable_error_count;
+
+ /* Cluster information */
+ uint8_t cluster_permitted;
+ uint8_t cluster_active;
+ uint8_t reserved_1[2];
+
+ /* Controller capabilities structures */
+ struct {
+ uint32_t raid_level_0 : 1;
+ uint32_t raid_level_1 : 1;
+ uint32_t raid_level_5 : 1;
+ uint32_t raid_level_1E : 1;
+ uint32_t reserved : 28;
+ } raid_levels;
+
+ struct {
+ uint32_t rbld_rate : 1;
+ uint32_t cc_rate : 1;
+ uint32_t bgi_rate : 1;
+ uint32_t recon_rate : 1;
+ uint32_t patrol_rate : 1;
+ uint32_t alarm_control : 1;
+ uint32_t cluster_supported : 1;
+ uint32_t bbu : 1;
+ uint32_t spanning_allowed : 1;
+ uint32_t dedicated_hotspares : 1;
+ uint32_t revertible_hotspares : 1;
+ uint32_t foreign_config_import : 1;
+ uint32_t self_diagnostic : 1;
+ uint32_t reserved : 19;
+ } adapter_operations;
+
+ struct {
+ uint32_t read_policy : 1;
+ uint32_t write_policy : 1;
+ uint32_t io_policy : 1;
+ uint32_t access_policy : 1;
+ uint32_t reserved : 28;
+ } ld_operations;
+
+ struct {
+ uint8_t min;
+ uint8_t max;
+ uint8_t reserved[2];
+ } stripe_size_operations;
+
+ struct {
+ uint32_t force_online : 1;
+ uint32_t force_offline : 1;
+ uint32_t force_rebuild : 1;
+ uint32_t reserved : 29;
+ } pd_operations;
+
+ struct {
+ uint32_t ctrl_supports_sas : 1;
+ uint32_t ctrl_supports_sata : 1;
+ uint32_t allow_mix_in_encl : 1;
+ uint32_t allow_mix_in_ld : 1;
+ uint32_t allow_sata_in_cluster : 1;
+ uint32_t reserved : 27;
+ } pd_mix_support;
+
+ /* Include the controller properties (changeable items) */
+ uint8_t reserved_2[12];
+ struct drsas_ctrl_prop properties;
+
+ uint8_t pad[0x800 - 0x640];
+};
+
+/*
+ * ==================================
+ * MegaRAID SAS2.0 driver definitions
+ * ==================================
+ */
+#define MRDRV_MAX_NUM_CMD 1024
+
+#define MRDRV_MAX_PD_CHANNELS 2
+#define MRDRV_MAX_LD_CHANNELS 2
+#define MRDRV_MAX_CHANNELS (MRDRV_MAX_PD_CHANNELS + \
+ MRDRV_MAX_LD_CHANNELS)
+#define MRDRV_MAX_DEV_PER_CHANNEL 128
+#define MRDRV_DEFAULT_INIT_ID -1
+#define MRDRV_MAX_CMD_PER_LUN 1000
+#define MRDRV_MAX_LUN 1
+#define MRDRV_MAX_LD 64
+
+#define MRDRV_RESET_WAIT_TIME 300
+#define MRDRV_RESET_NOTICE_INTERVAL 5
+
+#define DRSAS_IOCTL_CMD 0
+
+/*
+ * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit
+ * SGLs based on the size of dma_addr_t
+ */
+#define IS_DMA64 (sizeof (dma_addr_t) == 8)
+
+#define IB_MSG_0_OFF 0x10 /* XScale */
+#define OB_MSG_0_OFF 0x18 /* XScale */
+#define IB_DOORBELL_OFF 0x20 /* XScale & ROC */
+#define OB_INTR_STATUS_OFF 0x30 /* XScale & ROC */
+#define OB_INTR_MASK_OFF 0x34 /* XScale & ROC */
+#define IB_QPORT_OFF 0x40 /* XScale & ROC */
+#define OB_DOORBELL_CLEAR_OFF 0xA0 /* ROC */
+#define OB_SCRATCH_PAD_0_OFF 0xB0 /* ROC */
+#define OB_INTR_MASK 0xFFFFFFFF
+#define OB_DOORBELL_CLEAR_MASK 0xFFFFFFFF
+
+/*
+ * All MFI register set macros accept drsas_register_set*
+ */
+#define WR_IB_MSG_0(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_MSG_0_OFF), (v))
+
+#define RD_OB_MSG_0(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_MSG_0_OFF))
+
+#define WR_IB_DOORBELL(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_DOORBELL_OFF), (v))
+
+#define RD_IB_DOORBELL(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_DOORBELL_OFF))
+
+#define WR_OB_INTR_STATUS(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_STATUS_OFF), (v))
+
+#define RD_OB_INTR_STATUS(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_STATUS_OFF))
+
+#define WR_OB_INTR_MASK(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF), (v))
+
+#define RD_OB_INTR_MASK(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF))
+
+#define WR_IB_QPORT(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_QPORT_OFF), (v))
+
+#define WR_OB_DOORBELL_CLEAR(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_DOORBELL_CLEAR_OFF), \
+ (v))
+
+#define RD_OB_SCRATCH_PAD_0(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_SCRATCH_PAD_0_OFF))
+
+/*
+ * When FW is in MFI_STATE_READY or MFI_STATE_OPERATIONAL, the state data
+ * of Outbound Msg Reg 0 indicates max concurrent cmds supported, max SGEs
+ * supported per cmd and if 64-bit MFAs (M64) is enabled or disabled.
+ */
+#define MFI_OB_INTR_STATUS_MASK 0x00000002
+
+/*
+ * This MFI_REPLY_2108_MESSAGE_INTR flag is used also
+ * in enable_intr_ppc also. Hence bit 2, i.e. 0x4 has
+ * been set in this flag along with bit 1.
+ */
+#define MFI_REPLY_2108_MESSAGE_INTR 0x00000001
+#define MFI_REPLY_2108_MESSAGE_INTR_MASK 0x00000005
+
+#define MFI_POLL_TIMEOUT_SECS 60
+
+#define MFI_ENABLE_INTR(instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF), 1)
+#define MFI_DISABLE_INTR(instance) \
+{ \
+ uint32_t disable = 1; \
+ uint32_t mask = ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF));\
+ mask &= ~disable; \
+ ddi_put32((instance)->regmap_handle, (uint32_t *) \
+ (uintptr_t)((instance)->regmap + OB_INTR_MASK_OFF), mask); \
+}
+
+/* By default, the firmware programs for 8 Kbytes of memory */
+#define DEFAULT_MFI_MEM_SZ 8192
+#define MINIMUM_MFI_MEM_SZ 4096
+
+/* DCMD Message Frame MAILBOX0-11 */
+#define DCMD_MBOX_SZ 12
+
+
+struct drsas_register_set {
+ uint32_t reserved_0[4];
+
+ uint32_t inbound_msg_0;
+ uint32_t inbound_msg_1;
+ uint32_t outbound_msg_0;
+ uint32_t outbound_msg_1;
+
+ uint32_t inbound_doorbell;
+ uint32_t inbound_intr_status;
+ uint32_t inbound_intr_mask;
+
+ uint32_t outbound_doorbell;
+ uint32_t outbound_intr_status;
+ uint32_t outbound_intr_mask;
+
+ uint32_t reserved_1[2];
+
+ uint32_t inbound_queue_port;
+ uint32_t outbound_queue_port;
+
+ uint32_t reserved_2[22];
+
+ uint32_t outbound_doorbell_clear;
+
+ uint32_t reserved_3[3];
+
+ uint32_t outbound_scratch_pad;
+
+ uint32_t reserved_4[3];
+
+ uint32_t inbound_low_queue_port;
+
+ uint32_t inbound_high_queue_port;
+
+ uint32_t reserved_5;
+ uint32_t index_registers[820];
+};
+
+struct drsas_sge32 {
+ uint32_t phys_addr;
+ uint32_t length;
+};
+
+struct drsas_sge64 {
+ uint64_t phys_addr;
+ uint32_t length;
+};
+
+union drsas_sgl {
+ struct drsas_sge32 sge32[1];
+ struct drsas_sge64 sge64[1];
+};
+
+struct drsas_header {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t lun;
+ uint8_t cdb_len;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t data_xferlen;
+};
+
+union drsas_sgl_frame {
+ struct drsas_sge32 sge32[8];
+ struct drsas_sge64 sge64[5];
+};
+
+struct drsas_init_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+
+ uint8_t reserved_1;
+ uint32_t reserved_2;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t reserved_3;
+ uint32_t data_xfer_len;
+
+ uint32_t queue_info_new_phys_addr_lo;
+ uint32_t queue_info_new_phys_addr_hi;
+ uint32_t queue_info_old_phys_addr_lo;
+ uint32_t queue_info_old_phys_addr_hi;
+
+ uint32_t reserved_4[6];
+};
+
+struct drsas_init_queue_info {
+ uint32_t init_flags;
+ uint32_t reply_queue_entries;
+
+ uint32_t reply_queue_start_phys_addr_lo;
+ uint32_t reply_queue_start_phys_addr_hi;
+ uint32_t producer_index_phys_addr_lo;
+ uint32_t producer_index_phys_addr_hi;
+ uint32_t consumer_index_phys_addr_lo;
+ uint32_t consumer_index_phys_addr_hi;
+};
+
+struct drsas_io_frame {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t access_byte;
+ uint8_t reserved_0;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t lba_count;
+
+ uint32_t sense_buf_phys_addr_lo;
+ uint32_t sense_buf_phys_addr_hi;
+
+ uint32_t start_lba_lo;
+ uint32_t start_lba_hi;
+
+ union drsas_sgl sgl;
+};
+
+struct drsas_pthru_frame {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t lun;
+ uint8_t cdb_len;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t data_xfer_len;
+
+ uint32_t sense_buf_phys_addr_lo;
+ uint32_t sense_buf_phys_addr_hi;
+
+ uint8_t cdb[16];
+ union drsas_sgl sgl;
+};
+
+struct drsas_dcmd_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+ uint8_t reserved_1[4];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+ uint32_t opcode;
+
+ union {
+ uint8_t b[DCMD_MBOX_SZ];
+ uint16_t s[6];
+ uint32_t w[3];
+ } mbox;
+
+ union drsas_sgl sgl;
+};
+
+struct drsas_abort_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+
+ uint8_t reserved_1;
+ uint32_t reserved_2;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t reserved_3;
+ uint32_t reserved_4;
+
+ uint32_t abort_context;
+ uint32_t pad_1;
+
+ uint32_t abort_mfi_phys_addr_lo;
+ uint32_t abort_mfi_phys_addr_hi;
+
+ uint32_t reserved_5[6];
+};
+
+struct drsas_smp_frame {
+ uint8_t cmd;
+ uint8_t reserved_1;
+ uint8_t cmd_status;
+ uint8_t connection_status;
+
+ uint8_t reserved_2[3];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+
+ uint64_t sas_addr;
+
+ union drsas_sgl sgl[2];
+};
+
+struct drsas_stp_frame {
+ uint8_t cmd;
+ uint8_t reserved_1;
+ uint8_t cmd_status;
+ uint8_t connection_status;
+
+ uint8_t target_id;
+ uint8_t reserved_2[2];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+
+ uint16_t fis[10];
+ uint32_t stp_flags;
+ union drsas_sgl sgl;
+};
+
+union drsas_frame {
+ struct drsas_header hdr;
+ struct drsas_init_frame init;
+ struct drsas_io_frame io;
+ struct drsas_pthru_frame pthru;
+ struct drsas_dcmd_frame dcmd;
+ struct drsas_abort_frame abort;
+ struct drsas_smp_frame smp;
+ struct drsas_stp_frame stp;
+
+ uint8_t raw_bytes[64];
+};
+
+typedef struct drsas_pd_address {
+ uint16_t device_id;
+ uint16_t encl_id;
+
+ union {
+ struct {
+ uint8_t encl_index;
+ uint8_t slot_number;
+ } pd_address;
+ struct {
+ uint8_t encl_position;
+ uint8_t encl_connector_index;
+ } encl_address;
+ }address;
+
+ uint8_t scsi_dev_type;
+
+ union {
+ uint8_t port_bitmap;
+ uint8_t port_numbers;
+ } connected;
+
+ uint64_t sas_addr[2];
+} drsas_pd_address_t;
+
+union drsas_evt_class_locale {
+ struct {
+ uint16_t locale;
+ uint8_t reserved;
+ int8_t class;
+ } members;
+
+ uint32_t word;
+};
+
+struct drsas_evt_log_info {
+ uint32_t newest_seq_num;
+ uint32_t oldest_seq_num;
+ uint32_t clear_seq_num;
+ uint32_t shutdown_seq_num;
+ uint32_t boot_seq_num;
+};
+
+struct drsas_progress {
+ uint16_t progress;
+ uint16_t elapsed_seconds;
+};
+
+struct drsas_evtarg_ld {
+ uint16_t target_id;
+ uint8_t ld_index;
+ uint8_t reserved;
+};
+
+struct drsas_evtarg_pd {
+ uint16_t device_id;
+ uint8_t encl_index;
+ uint8_t slot_number;
+};
+
+struct drsas_evt_detail {
+ uint32_t seq_num;
+ uint32_t time_stamp;
+ uint32_t code;
+ union drsas_evt_class_locale cl;
+ uint8_t arg_type;
+ uint8_t reserved1[15];
+
+ union {
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint8_t cdb_length;
+ uint8_t sense_length;
+ uint8_t reserved[2];
+ uint8_t cdb[16];
+ uint8_t sense[64];
+ } cdbSense;
+
+ struct drsas_evtarg_ld ld;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint64_t count;
+ } ld_count;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_ld ld;
+ } ld_lba;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint32_t prevOwner;
+ uint32_t newOwner;
+ } ld_owner;
+
+ struct {
+ uint64_t ld_lba;
+ uint64_t pd_lba;
+ struct drsas_evtarg_ld ld;
+ struct drsas_evtarg_pd pd;
+ } ld_lba_pd_lba;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ struct drsas_progress prog;
+ } ld_prog;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint32_t prev_state;
+ uint32_t new_state;
+ } ld_state;
+
+ struct {
+ uint64_t strip;
+ struct drsas_evtarg_ld ld;
+ } ld_strip;
+
+ struct drsas_evtarg_pd pd;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint32_t err;
+ } pd_err;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_pd pd;
+ } pd_lba;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_pd pd;
+ struct drsas_evtarg_ld ld;
+ } pd_lba_ld;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ struct drsas_progress prog;
+ } pd_prog;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint32_t prevState;
+ uint32_t newState;
+ } pd_state;
+
+ struct {
+ uint16_t vendorId;
+ uint16_t deviceId;
+ uint16_t subVendorId;
+ uint16_t subDeviceId;
+ } pci;
+
+ uint32_t rate;
+ char str[96];
+
+ struct {
+ uint32_t rtc;
+ uint32_t elapsedSeconds;
+ } time;
+
+ struct {
+ uint32_t ecar;
+ uint32_t elog;
+ char str[64];
+ } ecc;
+
+ drsas_pd_address_t pd_addr;
+
+ uint8_t b[96];
+ uint16_t s[48];
+ uint32_t w[24];
+ uint64_t d[12];
+ } args;
+
+ char description[128];
+
+};
+
+/* only 63 are usable by the application */
+#define MAX_LOGICAL_DRIVES 64
+/* only 255 physical devices may be used */
+#define MAX_PHYSICAL_DEVICES 256
+#define MAX_PD_PER_ENCLOSURE 64
+/* maximum disks per array */
+#define MAX_ROW_SIZE 32
+/* maximum spans per logical drive */
+#define MAX_SPAN_DEPTH 8
+/* maximum number of arrays a hot spare may be dedicated to */
+#define MAX_ARRAYS_DEDICATED 16
+/* maximum number of arrays which may exist */
+#define MAX_ARRAYS 128
+/* maximum number of foreign configs that may ha managed at once */
+#define MAX_FOREIGN_CONFIGS 8
+/* maximum spares (global and dedicated combined) */
+#define MAX_SPARES_FOR_THE_CONTROLLER MAX_PHYSICAL_DEVICES
+/* maximum possible Target IDs (i.e. 0 to 63) */
+#define MAX_TARGET_ID 63
+/* maximum number of supported enclosures */
+#define MAX_ENCLOSURES 32
+/* maximum number of PHYs per controller */
+#define MAX_PHYS_PER_CONTROLLER 16
+/* maximum number of LDs per array (due to DDF limitations) */
+#define MAX_LDS_PER_ARRAY 16
+
+/*
+ * -----------------------------------------------------------------------------
+ * -----------------------------------------------------------------------------
+ *
+ * Logical Drive commands
+ *
+ * -----------------------------------------------------------------------------
+ * -----------------------------------------------------------------------------
+ */
+#define DR_DCMD_LD 0x03000000, /* Logical Device (LD) opcodes */
+
+/*
+ * Input: dcmd.opcode - DR_DCMD_LD_GET_LIST
+ * dcmd.mbox - reserved
+ * dcmd.sge IN - ptr to returned DR_LD_LIST structure
+ * Desc: Return the logical drive list structure
+ * Status: No error
+ */
+
+/*
+ * defines the logical drive reference structure
+ */
+typedef union _DR_LD_REF { /* LD reference structure */
+ struct {
+ uint8_t targetId; /* LD target id (0 to MAX_TARGET_ID) */
+ uint8_t reserved; /* reserved for in line with DR_PD_REF */
+ uint16_t seqNum; /* Sequence Number */
+ } ld_ref;
+ uint32_t ref; /* shorthand reference to full 32-bits */
+} DR_LD_REF; /* 4 bytes */
+
+/*
+ * defines the logical drive list structure
+ */
+typedef struct _DR_LD_LIST {
+ uint32_t ldCount; /* number of LDs */
+ uint32_t reserved; /* pad to 8-byte boundary */
+ struct {
+ DR_LD_REF ref; /* LD reference */
+ uint8_t state; /* current LD state (DR_LD_STATE) */
+ uint8_t reserved[3]; /* pad to 8-byte boundary */
+ uint64_t size; /* LD size */
+ } ldList[MAX_LOGICAL_DRIVES];
+} DR_LD_LIST;
+
+struct drsas_drv_ver {
+ uint8_t signature[12];
+ uint8_t os_name[16];
+ uint8_t os_ver[12];
+ uint8_t drv_name[20];
+ uint8_t drv_ver[32];
+ uint8_t drv_rel_date[20];
+};
+
+#define PCI_TYPE0_ADDRESSES 6
+#define PCI_TYPE1_ADDRESSES 2
+#define PCI_TYPE2_ADDRESSES 5
+
+struct drsas_pci_common_header {
+ uint16_t vendorID; /* (ro) */
+ uint16_t deviceID; /* (ro) */
+ uint16_t command; /* Device control */
+ uint16_t status;
+ uint8_t revisionID; /* (ro) */
+ uint8_t progIf; /* (ro) */
+ uint8_t subClass; /* (ro) */
+ uint8_t baseClass; /* (ro) */
+ uint8_t cacheLineSize; /* (ro+) */
+ uint8_t latencyTimer; /* (ro+) */
+ uint8_t headerType; /* (ro) */
+ uint8_t bist; /* Built in self test */
+
+ union {
+ struct {
+ uint32_t baseAddresses[PCI_TYPE0_ADDRESSES];
+ uint32_t cis;
+ uint16_t subVendorID;
+ uint16_t subSystemID;
+ uint32_t romBaseAddress;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved1[3];
+ uint32_t reserved2;
+ uint8_t interruptLine;
+ uint8_t interruptPin; /* (ro) */
+ uint8_t minimumGrant; /* (ro) */
+ uint8_t maximumLatency; /* (ro) */
+ } type_0;
+
+ struct {
+ uint32_t baseAddresses[PCI_TYPE1_ADDRESSES];
+ uint8_t primaryBus;
+ uint8_t secondaryBus;
+ uint8_t subordinateBus;
+ uint8_t secondaryLatency;
+ uint8_t ioBase;
+ uint8_t ioLimit;
+ uint16_t secondaryStatus;
+ uint16_t memoryBase;
+ uint16_t memoryLimit;
+ uint16_t prefetchBase;
+ uint16_t prefetchLimit;
+ uint32_t prefetchBaseUpper32;
+ uint32_t prefetchLimitUpper32;
+ uint16_t ioBaseUpper16;
+ uint16_t ioLimitUpper16;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved1[3];
+ uint32_t romBaseAddress;
+ uint8_t interruptLine;
+ uint8_t interruptPin;
+ uint16_t bridgeControl;
+ } type_1;
+
+ struct {
+ uint32_t socketRegistersBaseAddress;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved;
+ uint16_t secondaryStatus;
+ uint8_t primaryBus;
+ uint8_t secondaryBus;
+ uint8_t subordinateBus;
+ uint8_t secondaryLatency;
+ struct {
+ uint32_t base;
+ uint32_t limit;
+ } range[PCI_TYPE2_ADDRESSES-1];
+ uint8_t interruptLine;
+ uint8_t interruptPin;
+ uint16_t bridgeControl;
+ } type_2;
+ } header;
+};
+
+struct drsas_pci_link_capability {
+ union {
+ struct {
+ uint32_t linkSpeed :4;
+ uint32_t linkWidth :6;
+ uint32_t aspmSupport :2;
+ uint32_t losExitLatency :3;
+ uint32_t l1ExitLatency :3;
+ uint32_t rsvdp :6;
+ uint32_t portNumber :8;
+ } bits;
+
+ uint32_t asUlong;
+ } cap;
+
+};
+
+struct drsas_pci_link_status_capability {
+ union {
+ struct {
+ uint16_t linkSpeed :4;
+ uint16_t negotiatedLinkWidth :6;
+ uint16_t linkTrainingError :1;
+ uint16_t linkTraning :1;
+ uint16_t slotClockConfig :1;
+ uint16_t rsvdZ :3;
+ } bits;
+
+ uint16_t asUshort;
+ } stat_cap;
+
+ uint16_t reserved;
+
+};
+
+struct drsas_pci_capabilities {
+ struct drsas_pci_link_capability linkCapability;
+ struct drsas_pci_link_status_capability linkStatusCapability;
+};
+
+struct drsas_pci_information
+{
+ uint32_t busNumber;
+ uint8_t deviceNumber;
+ uint8_t functionNumber;
+ uint8_t interruptVector;
+ uint8_t reserved;
+ struct drsas_pci_common_header pciHeaderInfo;
+ struct drsas_pci_capabilities capability;
+ uint8_t reserved2[32];
+};
+
+struct drsas_ioctl {
+ uint16_t version;
+ uint16_t controller_id;
+ uint8_t signature[8];
+ uint32_t reserved_1;
+ uint32_t control_code;
+ uint32_t reserved_2[2];
+ uint8_t frame[64];
+ union drsas_sgl_frame sgl_frame;
+ uint8_t sense_buff[DRSAS_MAX_SENSE_LENGTH];
+ uint8_t data[1];
+};
+
+struct drsas_aen {
+ uint16_t host_no;
+ uint16_t cmd_status;
+ uint32_t seq_num;
+ uint32_t class_locale_word;
+};
+#pragma pack()
+
+#ifndef DDI_VENDOR_LSI
+#define DDI_VENDOR_LSI "LSI"
+#endif /* DDI_VENDOR_LSI */
+
+static int drsas_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int drsas_attach(dev_info_t *, ddi_attach_cmd_t);
+static int drsas_reset(dev_info_t *, ddi_reset_cmd_t);
+static int drsas_detach(dev_info_t *, ddi_detach_cmd_t);
+static int drsas_open(dev_t *, int, int, cred_t *);
+static int drsas_close(dev_t, int, int, cred_t *);
+static int drsas_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+static int drsas_tran_tgt_init(dev_info_t *, dev_info_t *,
+ scsi_hba_tran_t *, struct scsi_device *);
+static struct scsi_pkt *drsas_tran_init_pkt(struct scsi_address *, register
+ struct scsi_pkt *, struct buf *, int, int, int, int,
+ int (*)(), caddr_t);
+static int drsas_tran_start(struct scsi_address *,
+ register struct scsi_pkt *);
+static int drsas_tran_abort(struct scsi_address *, struct scsi_pkt *);
+static int drsas_tran_reset(struct scsi_address *, int);
+static int drsas_tran_getcap(struct scsi_address *, char *, int);
+static int drsas_tran_setcap(struct scsi_address *, char *, int, int);
+static void drsas_tran_destroy_pkt(struct scsi_address *,
+ struct scsi_pkt *);
+static void drsas_tran_dmafree(struct scsi_address *, struct scsi_pkt *);
+static void drsas_tran_sync_pkt(struct scsi_address *, struct scsi_pkt *);
+static uint_t drsas_isr();
+static uint_t drsas_softintr();
+
+static int init_mfi(struct drsas_instance *);
+static int drsas_free_dma_obj(struct drsas_instance *, dma_obj_t);
+static int drsas_alloc_dma_obj(struct drsas_instance *, dma_obj_t *,
+ uchar_t);
+static struct drsas_cmd *get_mfi_pkt(struct drsas_instance *);
+static void return_mfi_pkt(struct drsas_instance *,
+ struct drsas_cmd *);
+
+static void free_space_for_mfi(struct drsas_instance *);
+static void free_additional_dma_buffer(struct drsas_instance *);
+static int alloc_additional_dma_buffer(struct drsas_instance *);
+static int read_fw_status_reg_ppc(struct drsas_instance *);
+static void issue_cmd_ppc(struct drsas_cmd *, struct drsas_instance *);
+static int issue_cmd_in_poll_mode_ppc(struct drsas_instance *,
+ struct drsas_cmd *);
+static int issue_cmd_in_sync_mode_ppc(struct drsas_instance *,
+ struct drsas_cmd *);
+static void enable_intr_ppc(struct drsas_instance *);
+static void disable_intr_ppc(struct drsas_instance *);
+static int intr_ack_ppc(struct drsas_instance *);
+static int mfi_state_transition_to_ready(struct drsas_instance *);
+static void destroy_mfi_frame_pool(struct drsas_instance *);
+static int create_mfi_frame_pool(struct drsas_instance *);
+static int drsas_dma_alloc(struct drsas_instance *, struct scsi_pkt *,
+ struct buf *, int, int (*)());
+static int drsas_dma_move(struct drsas_instance *,
+ struct scsi_pkt *, struct buf *);
+static void flush_cache(struct drsas_instance *instance);
+static void display_scsi_inquiry(caddr_t);
+static int start_mfi_aen(struct drsas_instance *instance);
+static int handle_drv_ioctl(struct drsas_instance *instance,
+ struct drsas_ioctl *ioctl, int mode);
+static int handle_mfi_ioctl(struct drsas_instance *instance,
+ struct drsas_ioctl *ioctl, int mode);
+static int handle_mfi_aen(struct drsas_instance *instance,
+ struct drsas_aen *aen);
+static void fill_up_drv_ver(struct drsas_drv_ver *dv);
+static struct drsas_cmd *build_cmd(struct drsas_instance *instance,
+ struct scsi_address *ap, struct scsi_pkt *pkt,
+ uchar_t *cmd_done);
+static int register_mfi_aen(struct drsas_instance *instance,
+ uint32_t seq_num, uint32_t class_locale_word);
+static int issue_mfi_pthru(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_dcmd(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_smp(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_stp(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int abort_aen_cmd(struct drsas_instance *instance,
+ struct drsas_cmd *cmd_to_abort);
+
+static int drsas_common_check(struct drsas_instance *instance,
+ struct drsas_cmd *cmd);
+static void drsas_fm_init(struct drsas_instance *instance);
+static void drsas_fm_fini(struct drsas_instance *instance);
+static int drsas_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
+ const void *);
+static void drsas_fm_ereport(struct drsas_instance *instance,
+ char *detail);
+static int drsas_check_dma_handle(ddi_dma_handle_t handle);
+static int drsas_check_acc_handle(ddi_acc_handle_t handle);
+
+static void drsas_rem_intrs(struct drsas_instance *instance);
+static int drsas_add_intrs(struct drsas_instance *instance, int intr_type);
+
+static void drsas_tran_tgt_free(dev_info_t *, dev_info_t *,
+ scsi_hba_tran_t *, struct scsi_device *);
+static int drsas_tran_bus_config(dev_info_t *, uint_t,
+ ddi_bus_config_op_t, void *, dev_info_t **);
+static int drsas_parse_devname(char *, int *, int *);
+static int drsas_config_all_devices(struct drsas_instance *);
+static int drsas_config_scsi_device(struct drsas_instance *,
+ struct scsi_device *, dev_info_t **);
+static int drsas_config_ld(struct drsas_instance *, uint16_t,
+ uint8_t, dev_info_t **);
+static dev_info_t *drsas_find_child(struct drsas_instance *, uint16_t,
+ uint8_t);
+static int drsas_name_node(dev_info_t *, char *, int);
+static void drsas_issue_evt_taskq(struct drsas_eventinfo *);
+static int drsas_service_evt(struct drsas_instance *, int, int, int,
+ uint64_t);
+static int drsas_mode_sense_build(struct scsi_pkt *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_SAS_H_ */
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas_list.h b/usr/src/uts/common/io/dr_sas/dr_sas_list.h
new file mode 100644
index 0000000000..4154a77796
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas_list.h
@@ -0,0 +1,212 @@
+/*
+ * dr_sas_list.h: header for dr_sas
+ *
+ * Solaris MegaRAID driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_SAS_LIST_H_
+#define _DR_SAS_LIST_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct mlist_head {
+ struct mlist_head *next, *prev;
+};
+
+typedef struct mlist_head mlist_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct mlist_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+}
+
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static void __list_add(struct mlist_head *new,
+ struct mlist_head *prev,
+ struct mlist_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+
+/*
+ * mlist_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static void mlist_add(struct mlist_head *new, struct mlist_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+
+/*
+ * mlist_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static void mlist_add_tail(struct mlist_head *new, struct mlist_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static void __list_del(struct mlist_head *prev,
+ struct mlist_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+
+/*
+ * mlist_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static void mlist_del_init(struct mlist_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+
+/*
+ * mlist_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static int mlist_empty(struct mlist_head *head)
+{
+ return (head->next == head);
+}
+
+
+/*
+ * mlist_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static void mlist_splice(struct mlist_head *list, struct mlist_head *head)
+{
+ struct mlist_head *first = list->next;
+
+ if (first != list) {
+ struct mlist_head *last = list->prev;
+ struct mlist_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+
+/*
+ * mlist_entry - get the struct for this entry
+ * @ptr: the &struct mlist_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define mlist_entry(ptr, type, member) \
+ ((type *)((size_t)(ptr) - offsetof(type, member)))
+
+
+/*
+ * mlist_for_each - iterate over a list
+ * @pos: the &struct mlist_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define mlist_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+
+/*
+ * mlist_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct mlist_head to use as a loop counter.
+ * @n: another &struct mlist_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define mlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_SAS_LIST_H_ */
diff --git a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c
index c8af6ae527..2930fe578c 100644
--- a/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c
+++ b/usr/src/uts/common/io/fibre-channel/fca/oce/oce_rx.c
@@ -532,8 +532,7 @@ oce_drain_rq_cq(void *arg)
if (dev->function_mode & FLEX10_MODE) {
if (cqe->u0.s.vlan_tag_present &&
cqe->u0.s.qnq) {
- oce_rx_insert_tag(mp,
- cqe->u0.s.vlan_tag);
+ oce_rx_insert_tag(mp, cqe->u0.s.vlan_tag);
}
} else if (cqe->u0.s.vlan_tag_present) {
oce_rx_insert_tag(mp, cqe->u0.s.vlan_tag);
diff --git a/usr/src/uts/common/io/gsqueue/gsqueue.c b/usr/src/uts/common/io/gsqueue/gsqueue.c
new file mode 100644
index 0000000000..b484b16142
--- /dev/null
+++ b/usr/src/uts/common/io/gsqueue/gsqueue.c
@@ -0,0 +1,612 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Serialization queues are a technique used in illumos to provide what's
+ * commonly known as a 'vertical' perimeter. The idea (described a bit in
+ * uts/common/inet/squeue.c) is to provide a means to make sure that message
+ * blocks (mblk_t) are processed in a specific order. Subsystems like ip and vnd
+ * consume these on different policies, ip on a conn_t basis, vnd on a per
+ * device basis, and use this to ensure that only one packet is being processed
+ * at a given time.
+ *
+ * Serialization queues were originally used by ip. As part of that
+ * implementation, many of the details of ip were baked into it. That includes
+ * things like conn_t, ip receive attributes, and the notion of sets. While an
+ * individual serialization queue, or gsqueue_t, is a useful level of
+ * abstraction, it isn't the basis on which monst consumers want to manage them.
+ * Instead, we have the notion of a set of serialization queues. These sets are
+ * DR (CPU Dynamic reconfiguration) aware, and allow consumers to have a
+ * gsqueue_t per CPU to fanout on without managing them all itself. In the
+ * original implementation, this existed, but they were heavily tied into the
+ * infrastructure of IP, and its notion of polling on the underlying MAC
+ * devices.
+ *
+ * The result of that past is a new interface to serialization queues and a
+ * similar, but slightly different, abstraction to sets of these
+ * (gsqueue_set_t). When designing this there are two different approaches that
+ * one could consider. The first is that the system has one gsqueue_set_t that
+ * the entire world shares, whether IP or some other consumer. The other is that
+ * every consumer has their own set.
+ *
+ * The trade offs between these two failure modes are the pathological failure
+ * modes. There is no guarantee that any two consumers here are equivalent. In
+ * fact, they very likely have very different latency profiles. If they are
+ * being processed in the same queue, that can lead to very odd behaviors. More
+ * generally, if we have a series of processing functions from one consumer
+ * which are generally short, and another which are generally long, that'll
+ * cause undue latency that's harder to observe. If we instead take the approach
+ * that each consumer should have its own set that it fans out over then we
+ * won't end up with the problem that a given serialization queue will have
+ * multiple latency profiles, but instead we'll see cpu contention for the bound
+ * gsqueue_t worker thread. Keep in mind though, that only the gsqueue_t worker
+ * thread is bound and it is in fact possible for it to be processed by other
+ * threads on other CPUs.
+ *
+ * We've opted to go down the second path, so each consumer has its own
+ * independent set of serialization queues that it is bound over.
+ *
+ * Structure Hierarchies
+ * ---------------------
+ *
+ * At the top level, we have a single list of gsqueue_set_t. The gsqueue_set_t
+ * encapsulates all the per-CPU gsqueue_t that exist in the form of
+ * gsqueue_cpu_t. The gsqueue_cpu_t has been designed such that it could
+ * accommodate more than one gsqueue_t, but today there is a one to one mapping.
+ *
+ * We maintain two different lists of gsqueue_cpu_t, the active and defunct
+ * sets. The active set is maintained in the array `gs_cpus`. There are NCPU
+ * entries available in `gs_cpus` with the total number of currently active cpus
+ * described in `gs_ncpus`. The ordering of `gs_cpus` is unimportant. When
+ * there is no longer a need for a given binding (see the following section for
+ * more explanation on when this is the case) then we move the entry to the
+ * `gs_defunct` list which is just a singly linked list of gsqueue_cpu_t.
+ *
+ * In addition, each gsqueue_set_t can have a series of callbacks registered
+ * with it. These are described in the following section. Graphically, a given
+ * gsqueue_set_t looks roughly like the following:
+ *
+ * +---------------+
+ * | gsqueue_set_t |
+ * +---------------+
+ * | | |
+ * | | * . . . gs_cpus
+ * | | |
+ * | | | +-------------------------------------------------+
+ * | | +----->| gsqueue_cpu_t || gsqueue_cpu_t || gsqueue_cpu_t |...
+ * | | +-------------------------------------------------+
+ * | |
+ * | * . . . gs_defunct
+ * | |
+ * | | +---------------+ +---------------+ +---------------+
+ * | +--->| gsqueue_cpu_t |-->| gsqueue_cpu_t |-->| gsqueue_cpu_t |...
+ * | +---------------+ +---------------+ +---------------+
+ * * . . . gs_cbs
+ * |
+ * | +--------------+ +--------------+ +--------------+
+ * +--->| gsqueue_cb_t |-->| gsqueue_cb_t |->| gsqueue_cb_t |...
+ * +--------------+ +--------------+ +--------------+
+ *
+ * CPU DR, gsqueue_t, and gsqueue_t
+ * --------------------------------
+ *
+ * Recall, that every serialization queue (gsqueue_t or squeue_t) has a worker
+ * thread that may end up doing work. As part of supporting fanout, we have one
+ * gsqueue_t per CPU, and its worker thread is bound to that CPU. Because of
+ * this binding, we need to deal with CPU DR changes.
+ *
+ * The gsqueue driver maintains a single CPU DR callback that is used for the
+ * entire sub-system. We break down CPU DR events into three groups. Offline
+ * events, online events, and events we can ignore. When the first group occurs,
+ * we need to go through every gsqueue_t, find the gsqueue_cpu_t that
+ * corresponds to that processor id, and unbind all of its gsqueue_t's. It's
+ * rather important that we only unbind the gsqueue_t's and not actually destroy
+ * them. When this happens, they could very easily have data queued inside of
+ * them and it's unreasonable to just throw out everything in them at this
+ * point. The data remains intact and service continues uinterrupted.
+ *
+ * When we receive an online event, we do the opposite. We try to find a
+ * gsqueue_cpu_t that previously was bound to this CPU (by leaving its gqc_cpuid
+ * field intact) in the defunct list. If we find one, we remove it from the
+ * defunct list and add it to the active list as well as binding the gsqueue_t
+ * to the CPU in question. If we don't find one, then we create a new one.
+ *
+ * To deal with these kinds of situations, we allow a consumer to register
+ * callbacks for the gsqueue_t that they are interested in. These callbacks will
+ * fire whenever we are handling a topology change. The design of the callbacks
+ * is not that the user can take any administrative action during them, but
+ * rather set something for them to do asynchronously. It is illegal to make any
+ * calls into the gsqueue system while you are in a callback.
+ *
+ * Locking
+ * -------
+ *
+ * The lock ordering here is fairly straightforward. Due to our use of CPU
+ * binding and the CPU DR callbacks, we have an additional lock to consider
+ * cpu_lock. Because of that, the following are the rules for locking:
+ *
+ *
+ * o If performing binding operations, you must grab cpu_lock. cpu_lock is
+ * also at the top of the order.
+ *
+ * o cpu_lock > gsqueue_lock > gsqueue_t`gs_lock > squeue_t`sq_lock
+ * If you need to take multiple locks, you must take the greatest
+ * (left-most) one first.
+ */
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/stream.h>
+#include <sys/modctl.h>
+#include <sys/cpuvar.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+
+#include <sys/gsqueue.h>
+#include <sys/squeue_impl.h>
+
+typedef struct gsqueue_cb {
+ struct gsqueue_cb *gcb_next;
+ gsqueue_cb_f gcb_func;
+ void *gcb_arg;
+} gsqueue_cb_t;
+
+typedef struct gsqueue_cpu {
+ struct gsqueue_cpu *gqc_next;
+ squeue_t *gqc_head;
+ processorid_t gqc_cpuid;
+} gsqueue_cpu_t;
+
+struct gsqueue_set {
+ list_node_t gs_next;
+ uint_t gs_wwait;
+ pri_t gs_wpri;
+ kmutex_t gs_lock;
+ int gs_ncpus;
+ gsqueue_cpu_t **gs_cpus;
+ gsqueue_cpu_t *gs_defunct;
+ gsqueue_cb_t *gs_cbs;
+};
+
+static kmutex_t gsqueue_lock;
+static list_t gsqueue_list;
+static kmem_cache_t *gsqueue_cb_cache;
+static kmem_cache_t *gsqueue_cpu_cache;
+static kmem_cache_t *gsqueue_set_cache;
+
+static gsqueue_cpu_t *
+gsqueue_cpu_create(uint_t wwait, pri_t wpri, processorid_t cpuid)
+{
+ gsqueue_cpu_t *scp;
+
+ scp = kmem_cache_alloc(gsqueue_cpu_cache, KM_SLEEP);
+
+ scp->gqc_next = NULL;
+ scp->gqc_cpuid = cpuid;
+ scp->gqc_head = squeue_create(wwait, wpri, B_FALSE);
+ scp->gqc_head->sq_state = SQS_DEFAULT;
+ squeue_bind(scp->gqc_head, cpuid);
+
+ return (scp);
+}
+
+static void
+gsqueue_cpu_destroy(gsqueue_cpu_t *scp)
+{
+ squeue_destroy(scp->gqc_head);
+ kmem_cache_free(gsqueue_cpu_cache, scp);
+}
+
+gsqueue_set_t *
+gsqueue_set_create(uint_t wwait, pri_t wpri)
+{
+ int i;
+ gsqueue_set_t *gssp;
+
+ gssp = kmem_cache_alloc(gsqueue_set_cache, KM_SLEEP);
+ gssp->gs_wwait = wwait;
+ gssp->gs_wpri = wpri;
+ gssp->gs_ncpus = 0;
+
+ /*
+ * We're grabbing CPU lock. Once we let go of it we have to ensure all
+ * set up of the gsqueue_set_t is complete, as it'll be in there for the
+ * various CPU DR bits.
+ */
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < NCPU; i++) {
+ gsqueue_cpu_t *scp;
+ cpu_t *cp = cpu_get(i);
+ if (cp != NULL && CPU_ACTIVE(cp) &&
+ cp->cpu_flags & CPU_EXISTS) {
+ scp = gsqueue_cpu_create(wwait, wpri, cp->cpu_id);
+ gssp->gs_cpus[gssp->gs_ncpus] = scp;
+ gssp->gs_ncpus++;
+ }
+ }
+
+ /* Finally we can add it to our global list and be done */
+ mutex_enter(&gsqueue_lock);
+ list_insert_tail(&gsqueue_list, gssp);
+ mutex_exit(&gsqueue_lock);
+ mutex_exit(&cpu_lock);
+
+ return (gssp);
+}
+
+void
+gsqueue_set_destroy(gsqueue_set_t *gssp)
+{
+ int i;
+ gsqueue_cpu_t *scp;
+
+ /*
+ * Go through and unbind all of the squeues while cpu_lock is held and
+ * move them to the defunct list. Once that's done, we don't need to do
+ * anything else with cpu_lock.
+ */
+ mutex_enter(&cpu_lock);
+ mutex_enter(&gsqueue_lock);
+ list_remove(&gsqueue_list, gssp);
+ mutex_exit(&gsqueue_lock);
+
+ mutex_enter(&gssp->gs_lock);
+
+ for (i = 0; i < gssp->gs_ncpus; i++) {
+ scp = gssp->gs_cpus[i];
+ squeue_unbind(scp->gqc_head);
+ scp->gqc_next = gssp->gs_defunct;
+ gssp->gs_defunct = scp;
+ gssp->gs_cpus[i] = NULL;
+ }
+ gssp->gs_ncpus = 0;
+
+ mutex_exit(&gssp->gs_lock);
+ mutex_exit(&cpu_lock);
+
+ while (gssp->gs_defunct != NULL) {
+ gsqueue_cpu_t *scp;
+
+ scp = gssp->gs_defunct;
+ gssp->gs_defunct = scp->gqc_next;
+ gsqueue_cpu_destroy(scp);
+ }
+
+ while (gssp->gs_cbs != NULL) {
+ gsqueue_cb_t *cbp;
+
+ cbp = gssp->gs_cbs;
+ gssp->gs_cbs = cbp->gcb_next;
+ kmem_cache_free(gsqueue_cb_cache, cbp);
+ }
+
+ ASSERT(gssp->gs_ncpus == 0);
+ ASSERT(gssp->gs_defunct == NULL);
+ ASSERT(gssp->gs_cbs == NULL);
+ kmem_cache_free(gsqueue_set_cache, gssp);
+}
+
+gsqueue_t *
+gsqueue_set_get(gsqueue_set_t *gssp, uint_t index)
+{
+ squeue_t *sqp;
+ gsqueue_cpu_t *scp;
+
+ mutex_enter(&gssp->gs_lock);
+ scp = gssp->gs_cpus[index % gssp->gs_ncpus];
+ sqp = scp->gqc_head;
+ mutex_exit(&gssp->gs_lock);
+ return ((gsqueue_t *)sqp);
+}
+
+uintptr_t
+gsqueue_set_cb_add(gsqueue_set_t *gssp, gsqueue_cb_f cb, void *arg)
+{
+ gsqueue_cb_t *cbp;
+
+ cbp = kmem_cache_alloc(gsqueue_cb_cache, KM_SLEEP);
+ cbp->gcb_func = cb;
+ cbp->gcb_arg = arg;
+
+ mutex_enter(&gssp->gs_lock);
+ cbp->gcb_next = gssp->gs_cbs;
+ gssp->gs_cbs = cbp;
+ mutex_exit(&gssp->gs_lock);
+ return ((uintptr_t)cbp);
+}
+
+int
+gsqueue_set_cb_remove(gsqueue_set_t *gssp, uintptr_t id)
+{
+ gsqueue_cb_t *cbp, *prev;
+ mutex_enter(&gssp->gs_lock);
+ cbp = gssp->gs_cbs;
+ prev = NULL;
+ while (cbp != NULL) {
+ if ((uintptr_t)cbp != id) {
+ prev = cbp;
+ cbp = cbp->gcb_next;
+ continue;
+ }
+
+ if (prev == NULL) {
+ gssp->gs_cbs = cbp->gcb_next;
+ } else {
+ prev->gcb_next = cbp->gcb_next;
+ }
+
+ mutex_exit(&gssp->gs_lock);
+ kmem_cache_free(gsqueue_cb_cache, cbp);
+ return (0);
+ }
+ mutex_exit(&gssp->gs_lock);
+ return (-1);
+}
+
+void
+gsqueue_enter_one(gsqueue_t *gsp, mblk_t *mp, gsqueue_proc_f func, void *arg,
+ int flags, uint8_t tag)
+{
+ squeue_t *sqp = (squeue_t *)gsp;
+
+ ASSERT(mp->b_next == NULL);
+ ASSERT(mp->b_prev == NULL);
+ mp->b_queue = (queue_t *)func;
+ mp->b_prev = arg;
+ sqp->sq_enter(sqp, mp, mp, 1, NULL, flags, tag);
+}
+
+static void
+gsqueue_notify(gsqueue_set_t *gssp, squeue_t *sqp, boolean_t online)
+{
+ gsqueue_cb_t *cbp;
+
+ ASSERT(MUTEX_HELD(&gssp->gs_lock));
+ cbp = gssp->gs_cbs;
+ while (cbp != NULL) {
+ cbp->gcb_func(gssp, (gsqueue_t *)sqp, cbp->gcb_arg, online);
+ cbp = cbp->gcb_next;
+ }
+
+}
+
+/*
+ * When we online a processor we need to go through and either bind a defunct
+ * squeue or create a new one. We'll try to reuse a gsqueue_cpu_t from the
+ * defunct list that used to be on that processor. If no such gsqueue_cpu_t
+ * exists, then we'll create a new one. We'd rather avoid taking over an
+ * existing defunct one that used to be on another CPU, as its not unreasonable
+ * to believe that its CPU will come back. More CPUs are offlined and onlined by
+ * the administrator or by creating cpu sets than actually get offlined by FMA.
+ */
+static void
+gsqueue_handle_online(processorid_t id)
+{
+ gsqueue_set_t *gssp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ mutex_enter(&gsqueue_lock);
+ for (gssp = list_head(&gsqueue_list); gssp != NULL;
+ gssp = list_next(&gsqueue_list, gssp)) {
+ gsqueue_cpu_t *scp;
+
+ mutex_enter(&gssp->gs_lock);
+ scp = gssp->gs_defunct;
+ while (scp != NULL) {
+ if (scp->gqc_cpuid == id)
+ break;
+ scp = scp->gqc_next;
+ }
+
+ if (scp == NULL) {
+ scp = gsqueue_cpu_create(gssp->gs_wwait,
+ gssp->gs_wpri, id);
+ } else {
+ squeue_bind(scp->gqc_head, id);
+ }
+ ASSERT(gssp->gs_ncpus < NCPU);
+ gssp->gs_cpus[gssp->gs_ncpus] = scp;
+ gssp->gs_ncpus++;
+ gsqueue_notify(gssp, scp->gqc_head, B_TRUE);
+ mutex_exit(&gssp->gs_lock);
+ }
+ mutex_exit(&gsqueue_lock);
+}
+
+static void
+gsqueue_handle_offline(processorid_t id)
+{
+ gsqueue_set_t *gssp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ mutex_enter(&gsqueue_lock);
+ for (gssp = list_head(&gsqueue_list); gssp != NULL;
+ gssp = list_next(&gsqueue_list, gssp)) {
+ int i;
+ gsqueue_cpu_t *scp = NULL;
+
+ mutex_enter(&gssp->gs_lock);
+ for (i = 0; i < gssp->gs_ncpus; i++) {
+ if (gssp->gs_cpus[i]->gqc_cpuid == id) {
+ scp = gssp->gs_cpus[i];
+ break;
+ }
+ }
+
+ if (scp != NULL) {
+ squeue_unbind(scp->gqc_head);
+ scp->gqc_next = gssp->gs_defunct;
+ gssp->gs_defunct = scp;
+ gssp->gs_cpus[i] = gssp->gs_cpus[gssp->gs_ncpus-1];
+ gssp->gs_ncpus--;
+ gsqueue_notify(gssp, scp->gqc_head, B_FALSE);
+ }
+ mutex_exit(&gssp->gs_lock);
+ }
+ mutex_exit(&gsqueue_lock);
+}
+
+/* ARGSUSED */
+static int
+gsqueue_cpu_setup(cpu_setup_t what, int id, void *unused)
+{
+ cpu_t *cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ cp = cpu_get(id);
+ switch (what) {
+ case CPU_CONFIG:
+ case CPU_ON:
+ case CPU_INIT:
+ case CPU_CPUPART_IN:
+ if (cp != NULL && CPU_ACTIVE(cp) && cp->cpu_flags & CPU_EXISTS)
+ gsqueue_handle_online(cp->cpu_id);
+ break;
+ case CPU_UNCONFIG:
+ case CPU_OFF:
+ case CPU_CPUPART_OUT:
+ gsqueue_handle_offline(cp->cpu_id);
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+
+/* ARGSUSED */
+static int
+gsqueue_set_cache_construct(void *buf, void *arg, int kmflags)
+{
+ gsqueue_set_t *gssp = buf;
+
+ gssp->gs_cpus = kmem_alloc(sizeof (gsqueue_cpu_t *) * NCPU, kmflags);
+ if (gssp->gs_cpus == NULL)
+ return (-1);
+
+ mutex_init(&gssp->gs_lock, NULL, MUTEX_DRIVER, NULL);
+ gssp->gs_ncpus = 0;
+ gssp->gs_defunct = NULL;
+ gssp->gs_cbs = NULL;
+
+ return (0);
+}
+
+static void
+gsqueue_set_cache_destruct(void *buf, void *arg)
+{
+ gsqueue_set_t *gssp = buf;
+
+ kmem_free(gssp->gs_cpus, sizeof (gsqueue_cpu_t *) * NCPU);
+ gssp->gs_cpus = NULL;
+ mutex_destroy(&gssp->gs_lock);
+}
+
+static void
+gsqueue_ddiinit(void)
+{
+ list_create(&gsqueue_list, sizeof (gsqueue_set_t),
+ offsetof(gsqueue_set_t, gs_next));
+ mutex_init(&gsqueue_lock, NULL, MUTEX_DRIVER, NULL);
+
+ gsqueue_cb_cache = kmem_cache_create("gsqueue_cb_cache",
+ sizeof (gsqueue_cb_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ gsqueue_cpu_cache = kmem_cache_create("gsqueue_cpu_cache",
+ sizeof (gsqueue_cpu_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ gsqueue_set_cache = kmem_cache_create("squeue_set_cache",
+ sizeof (gsqueue_set_t),
+ 0, gsqueue_set_cache_construct, gsqueue_set_cache_destruct,
+ NULL, NULL, NULL, 0);
+
+
+ mutex_enter(&cpu_lock);
+ register_cpu_setup_func(gsqueue_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+}
+
+static int
+gsqueue_ddifini(void)
+{
+ mutex_enter(&gsqueue_lock);
+ if (list_is_empty(&gsqueue_list) == 0) {
+ mutex_exit(&gsqueue_lock);
+ return (EBUSY);
+ }
+ list_destroy(&gsqueue_list);
+ mutex_exit(&gsqueue_lock);
+
+ mutex_enter(&cpu_lock);
+ register_cpu_setup_func(gsqueue_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ kmem_cache_destroy(gsqueue_set_cache);
+ kmem_cache_destroy(gsqueue_cpu_cache);
+ kmem_cache_destroy(gsqueue_cb_cache);
+
+ mutex_destroy(&gsqueue_lock);
+
+ return (0);
+}
+
+static struct modlmisc gsqueue_modmisc = {
+ &mod_miscops,
+ "gsqueue"
+};
+
+static struct modlinkage gsqueue_modlinkage = {
+ MODREV_1,
+ &gsqueue_modmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int ret;
+
+ gsqueue_ddiinit();
+ if ((ret = mod_install(&gsqueue_modlinkage)) != 0) {
+ VERIFY(gsqueue_ddifini() == 0);
+ return (ret);
+ }
+
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&gsqueue_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ if ((ret = gsqueue_ddifini()) != 0)
+ return (ret);
+
+ if ((ret = mod_remove(&gsqueue_modlinkage)) != 0)
+ return (ret);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/inotify.c b/usr/src/uts/common/io/inotify.c
new file mode 100644
index 0000000000..1eebcad7e4
--- /dev/null
+++ b/usr/src/uts/common/io/inotify.c
@@ -0,0 +1,1480 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Support for the inotify facility, a Linux-borne facility for asynchronous
+ * notification of certain events on specified files or directories. Our
+ * implementation broadly leverages the file event monitoring facility, and
+ * would actually be quite straightforward were it not for a very serious
+ * blunder in the inotify interface: in addition to allowing for one to be
+ * notified on events on a particular file or directory, inotify also allows
+ * for one to be notified on certain events on files _within_ a watched
+ * directory -- even though those events have absolutely nothing to do with
+ * the directory itself. This leads to all sorts of madness because file
+ * operations are (of course) not undertaken on paths but rather on open
+ * files -- and the relationships between open files and the paths that resolve
+ * to those files are neither static nor isomorphic. We implement this
+ * concept by having _child watches_ when directories are watched with events
+ * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
+ * first added, and we modify those child watches dynamically as files are
+ * created, deleted, moved into or moved out of the specified directory. This
+ * mechanism works well, absent hard links. Hard links, unfortunately, break
+ * this rather badly, and the user is warned that watches on directories that
+ * have multiple directory entries referring to the same file may behave
+ * unexpectedly.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/inotify.h>
+#include <sys/fem.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vmem.h>
+#include <sys/avl.h>
+#include <sys/sysmacros.h>
+#include <sys/cyclic.h>
+#include <sys/filio.h>
+
+struct inotify_state;
+struct inotify_kevent;
+
+typedef struct inotify_watch inotify_watch_t;
+typedef struct inotify_state inotify_state_t;
+typedef struct inotify_kevent inotify_kevent_t;
+
+struct inotify_watch {
+ kmutex_t inw_lock; /* lock protecting ref count */
+ int inw_refcnt; /* reference count */
+ uint8_t inw_zombie:1; /* boolean: is zombie */
+ uint8_t inw_fired:1; /* boolean: fired one-shot */
+ uint8_t inw_active:1; /* boolean: watch is active */
+ uint8_t inw_orphaned:1; /* boolean: orphaned */
+ kcondvar_t inw_cv; /* condvar for zombifier */
+ uint32_t inw_mask; /* mask of watch */
+ int32_t inw_wd; /* watch descriptor */
+ vnode_t *inw_vp; /* underlying vnode */
+ inotify_watch_t *inw_parent; /* parent, if a child */
+ avl_node_t inw_byvp; /* watches by vnode */
+ avl_node_t inw_bywd; /* watches by descriptor */
+ avl_tree_t inw_children; /* children, if a parent */
+ char *inw_name; /* name, if a child */
+ list_node_t inw_orphan; /* orphan list */
+ inotify_state_t *inw_state; /* corresponding state */
+};
+
+struct inotify_kevent {
+ inotify_kevent_t *ine_next; /* next event in queue */
+ struct inotify_event ine_event; /* event (variable size) */
+};
+
+#define INOTIFY_EVENT_LENGTH(ev) \
+ (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
+
+struct inotify_state {
+ kmutex_t ins_lock; /* lock protecting state */
+ avl_tree_t ins_byvp; /* watches by vnode */
+ avl_tree_t ins_bywd; /* watches by descriptor */
+ vmem_t *ins_wds; /* watch identifier arena */
+ int ins_maxwatches; /* maximum number of watches */
+ int ins_maxevents; /* maximum number of events */
+ int ins_nevents; /* current # of events */
+ int32_t ins_size; /* total size of events */
+ inotify_kevent_t *ins_head; /* head of event queue */
+ inotify_kevent_t *ins_tail; /* tail of event queue */
+ pollhead_t ins_pollhd; /* poll head */
+ kcondvar_t ins_cv; /* condvar for reading */
+ list_t ins_orphans; /* orphan list */
+ cyclic_id_t ins_cleaner; /* cyclic for cleaning */
+ inotify_watch_t *ins_zombies; /* zombie watch list */
+ cred_t *ins_cred; /* creator's credentials */
+ inotify_state_t *ins_next; /* next state on global list */
+};
+
+/*
+ * Tunables (exported read-only in lx-branded zones via /proc).
+ */
+int inotify_maxwatches = 8192; /* max watches per instance */
+int inotify_maxevents = 16384; /* max events */
+int inotify_maxinstances = 128; /* max instances per user */
+
+/*
+ * Internal global variables.
+ */
+static kmutex_t inotify_lock; /* lock protecting state */
+static dev_info_t *inotify_devi; /* device info */
+static fem_t *inotify_femp; /* FEM pointer */
+static vmem_t *inotify_minor; /* minor number arena */
+static void *inotify_softstate; /* softstate pointer */
+static inotify_state_t *inotify_state; /* global list if state */
+
+static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
+static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
+static void inotify_watch_delete(inotify_watch_t *, uint32_t);
+static void inotify_watch_remove(inotify_state_t *state,
+ inotify_watch_t *watch);
+
+static int
+inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
+ cred_t *cr, caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
+ inotify_watch_event(watch, flag & FWRITE ?
+ IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
+ int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
+ vsecattr_t *vsecp)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_create(vf, name, vap, excl, mode,
+ vpp, cr, flag, ct, vsecp)) == 0) {
+ inotify_watch_insert(watch, *vpp, name);
+ inotify_watch_event(watch, IN_CREATE, name);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
+ inotify_watch_insert(watch, svp, tnm);
+ inotify_watch_event(watch, IN_CREATE, tnm);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
+ cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
+ ct, flags, vsecp)) == 0) {
+ inotify_watch_insert(watch, *vpp, name);
+ inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
+ inotify_watch_event(watch, IN_OPEN, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_read(vf, uiop, ioflag, cr, ct);
+ inotify_watch_event(watch, IN_ACCESS, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
+ inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
+
+ return (rval);
+}
+
+int
+inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
+ inotify_watch_event(watch, IN_DELETE, nm);
+
+ return (rval);
+}
+
+int
+inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
+ inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
+
+ return (rval);
+}
+
+static int
+inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_write(vf, uiop, ioflag, cr, ct);
+ inotify_watch_event(watch, IN_MODIFY, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+
+ switch (vnevent) {
+ case VE_RENAME_SRC:
+ inotify_watch_event(watch, IN_MOVE_SELF, NULL);
+ inotify_watch_delete(watch, IN_MOVE_SELF);
+ break;
+ case VE_REMOVE:
+ /*
+ * Linux will apparently fire an IN_ATTRIB event when the link
+ * count changes (including when it drops to 0 on a remove).
+ * This is merely somewhat odd; what is amazing is that this
+ * IN_ATTRIB event is not visible on an inotify watch on the
+ * parent directory. (IN_ATTRIB events are normally sent to
+ * watches on the parent directory). While it's hard to
+ * believe that this constitutes desired semantics, ltp
+ * unfortunately tests this case (if implicitly); in the name
+ * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
+ * explicitly watching the file that has been removed.
+ */
+ if (watch->inw_parent == NULL)
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+
+ /*FALLTHROUGH*/
+ case VE_RENAME_DEST:
+ inotify_watch_event(watch, IN_DELETE_SELF, NULL);
+ inotify_watch_delete(watch, IN_DELETE_SELF);
+ break;
+ case VE_RMDIR:
+ /*
+ * It seems that IN_ISDIR should really be OR'd in here, but
+ * Linux doesn't seem to do that in this case; for the sake of
+ * bug-for-bug compatibility, we don't do it either.
+ */
+ inotify_watch_event(watch, IN_DELETE_SELF, NULL);
+ inotify_watch_delete(watch, IN_DELETE_SELF);
+ break;
+ case VE_CREATE:
+ inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
+ break;
+ case VE_LINK:
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+ break;
+ case VE_RENAME_SRC_DIR:
+ inotify_watch_event(watch, IN_MOVED_FROM, name);
+ break;
+ case VE_RENAME_DEST_DIR:
+ if (name == NULL)
+ name = dvp->v_path;
+
+ inotify_watch_insert(watch, dvp, name);
+ inotify_watch_event(watch, IN_MOVED_TO, name);
+ break;
+ case VE_SUPPORT:
+ case VE_MOUNTEDOVER:
+ case VE_TRUNCATE:
+ break;
+ }
+
+ return (vnext_vnevent(vf, vnevent, dvp, name, ct));
+}
+
+const fs_operation_def_t inotify_vnodesrc_template[] = {
+ VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
+ VOPNAME_CREATE, { .femop_create = inotify_fop_create },
+ VOPNAME_LINK, { .femop_link = inotify_fop_link },
+ VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
+ VOPNAME_OPEN, { .femop_open = inotify_fop_open },
+ VOPNAME_READ, { .femop_read = inotify_fop_read },
+ VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
+ VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
+ VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
+ VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
+ VOPNAME_WRITE, { .femop_write = inotify_fop_write },
+ VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
+ NULL, NULL
+};
+
+static int
+inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
+{
+ if (lhs->inw_wd < rhs->inw_wd)
+ return (-1);
+
+ if (lhs->inw_wd > rhs->inw_wd)
+ return (1);
+
+ return (0);
+}
+
+static int
+inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
+{
+ uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
+
+ if (lvp < rvp)
+ return (-1);
+
+ if (lvp > rvp)
+ return (1);
+
+ return (0);
+}
+
+static void
+inotify_watch_hold(inotify_watch_t *watch)
+{
+ mutex_enter(&watch->inw_lock);
+ VERIFY(watch->inw_refcnt > 0);
+ watch->inw_refcnt++;
+ mutex_exit(&watch->inw_lock);
+}
+
+static void
+inotify_watch_release(inotify_watch_t *watch)
+{
+ mutex_enter(&watch->inw_lock);
+ VERIFY(watch->inw_refcnt > 1);
+
+ if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
+ /*
+ * We're down to our last reference; kick anyone that might be
+ * waiting.
+ */
+ cv_signal(&watch->inw_cv);
+ }
+
+ mutex_exit(&watch->inw_lock);
+}
+
+static void
+inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
+{
+ inotify_kevent_t *event, *tail;
+ inotify_state_t *state = watch->inw_state;
+ uint32_t wd = watch->inw_wd, cookie = 0, len;
+ int align = sizeof (uintptr_t) - 1;
+ boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
+ inotify_watch_t *source = watch;
+
+ if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
+ return;
+
+ if (watch->inw_parent != NULL) {
+ /*
+ * This is an event on the child; if this isn't a valid child
+ * event, return. Otherwise, we move our watch to be our
+ * parent (which we know is around because we have a hold on
+ * it) and continue.
+ */
+ if (!(mask & IN_CHILD_EVENTS))
+ return;
+
+ name = watch->inw_name;
+ watch = watch->inw_parent;
+ }
+
+ if (!removal) {
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie ||
+ watch->inw_fired || !watch->inw_active) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+ } else {
+ if (!watch->inw_active)
+ return;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ }
+
+ /*
+ * If this is an operation on a directory and it's a child event
+ * (event if it's not on a child), we specify IN_ISDIR.
+ */
+ if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
+ mask |= IN_ISDIR;
+
+ if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
+ cookie = (uint32_t)curthread->t_did;
+
+ if (state->ins_nevents >= state->ins_maxevents) {
+ /*
+ * We're at our maximum number of events -- turn our event
+ * into an IN_Q_OVERFLOW event, which will be coalesced if
+ * it's already the tail event.
+ */
+ mask = IN_Q_OVERFLOW;
+ wd = (uint32_t)-1;
+ cookie = 0;
+ len = 0;
+ }
+
+ if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
+ tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
+ ((tail->ine_event.len == 0 && len == 0) ||
+ (name != NULL && tail->ine_event.len != 0 &&
+ strcmp(tail->ine_event.name, name) == 0))) {
+ /*
+ * This is an implicitly coalesced event; we're done.
+ */
+ if (!removal)
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (name != NULL) {
+ if ((len = strlen(name) + 1) & align)
+ len += (align + 1) - (len & align);
+ } else {
+ len = 0;
+ }
+
+ event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
+ event->ine_event.wd = wd;
+ event->ine_event.mask = (uint32_t)mask;
+ event->ine_event.cookie = cookie;
+ event->ine_event.len = len;
+
+ if (name != NULL)
+ strcpy(event->ine_event.name, name);
+
+ if (tail != NULL) {
+ tail->ine_next = event;
+ } else {
+ VERIFY(state->ins_head == NULL);
+ state->ins_head = event;
+ cv_broadcast(&state->ins_cv);
+ }
+
+ state->ins_tail = event;
+ state->ins_nevents++;
+ state->ins_size += sizeof (inotify_kevent_t) + len;
+
+ if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
+ /*
+ * If this is a one-shot, we need to remove the watch. (Note
+ * that this will recurse back into inotify_watch_event() to
+ * fire the IN_IGNORED event -- but with "removal" set.)
+ */
+ watch->inw_fired = 1;
+ inotify_watch_remove(state, watch);
+ }
+
+ if (removal)
+ return;
+
+ mutex_exit(&state->ins_lock);
+ pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
+}
+
+/*
+ * Destroy a watch. By the time we're in here, the watch must have exactly
+ * one reference.
+ */
+static void
+inotify_watch_destroy(inotify_watch_t *watch)
+{
+ VERIFY(MUTEX_HELD(&watch->inw_lock));
+
+ if (watch->inw_name != NULL)
+ kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
+
+ kmem_free(watch, sizeof (inotify_watch_t));
+}
+
+/*
+ * Zombify a watch. By the time we come in here, it must be true that the
+ * watch has already been fem_uninstall()'d -- the only reference should be
+ * in the state's data structure. If we can get away with freeing it, we'll
+ * do that -- but if the reference count is greater than one due to an active
+ * vnode operation, we'll put this watch on the zombie list on the state
+ * structure.
+ */
+static void
+inotify_watch_zombify(inotify_watch_t *watch)
+{
+ inotify_state_t *state = watch->inw_state;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ VERIFY(!watch->inw_zombie);
+
+ watch->inw_zombie = 1;
+
+ if (watch->inw_parent != NULL) {
+ inotify_watch_release(watch->inw_parent);
+ } else {
+ avl_remove(&state->ins_byvp, watch);
+ avl_remove(&state->ins_bywd, watch);
+ vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
+ watch->inw_wd = -1;
+ }
+
+ mutex_enter(&watch->inw_lock);
+
+ if (watch->inw_refcnt == 1) {
+ /*
+ * There are no operations in flight and there is no way
+ * for anyone to discover this watch -- we can destroy it.
+ */
+ inotify_watch_destroy(watch);
+ } else {
+ /*
+ * There are operations in flight; we will need to enqueue
+ * this for later destruction.
+ */
+ watch->inw_parent = state->ins_zombies;
+ state->ins_zombies = watch;
+ mutex_exit(&watch->inw_lock);
+ }
+}
+
+static inotify_watch_t *
+inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
+ const char *name, vnode_t *vp, uint32_t mask)
+{
+ inotify_watch_t *watch;
+ int err;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+
+ watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
+
+ watch->inw_vp = vp;
+ watch->inw_mask = mask;
+ watch->inw_state = state;
+ watch->inw_refcnt = 1;
+
+ if (parent == NULL) {
+ watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
+ 1, VM_BESTFIT | VM_SLEEP);
+ avl_add(&state->ins_byvp, watch);
+ avl_add(&state->ins_bywd, watch);
+
+ avl_create(&watch->inw_children,
+ (int(*)(const void *, const void *))inotify_watch_cmpvp,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_byvp));
+ } else {
+ VERIFY(name != NULL);
+ inotify_watch_hold(parent);
+ watch->inw_mask &= IN_CHILD_EVENTS;
+ watch->inw_parent = parent;
+ watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
+ strcpy(watch->inw_name, name);
+
+ avl_add(&parent->inw_children, watch);
+ }
+
+ /*
+ * Add our monitor to the vnode. We must not have the watch lock held
+ * when we do this, as it will immediately hold our watch.
+ */
+ err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
+ (void (*)(void *))inotify_watch_hold,
+ (void (*)(void *))inotify_watch_release);
+
+ VERIFY(err == 0);
+
+ return (watch);
+}
+
+/*
+ * Remove a (non-child) watch. This is called from either synchronous context
+ * via inotify_rm_watch() or monitor context via either a vnevent or a
+ * one-shot.
+ */
+static void
+inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
+{
+ inotify_watch_t *child;
+ int err;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ VERIFY(watch->inw_parent == NULL);
+
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ /*
+ * If we have children, we're going to remove them all and set them
+ * all to be zombies.
+ */
+ while ((child = avl_first(&watch->inw_children)) != NULL) {
+ VERIFY(child->inw_parent == watch);
+ avl_remove(&watch->inw_children, child);
+
+ err = fem_uninstall(child->inw_vp, inotify_femp, child);
+ VERIFY(err == 0);
+
+ /*
+ * If this child watch has been orphaned, remove it from the
+ * state's list of orphans.
+ */
+ if (child->inw_orphaned)
+ list_remove(&state->ins_orphans, child);
+
+ VN_RELE(child->inw_vp);
+
+ /*
+ * We're down (or should be down) to a single reference to
+ * this child watch; it's safe to zombify it.
+ */
+ inotify_watch_zombify(child);
+ }
+
+ inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
+ VN_RELE(watch->inw_vp);
+
+ /*
+ * It's now safe to zombify the watch -- we know that the only reference
+ * can come from operations in flight.
+ */
+ inotify_watch_zombify(watch);
+}
+
+/*
+ * Delete a watch. Should only be called from VOP context.
+ */
+static void
+inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
+{
+ inotify_state_t *state = watch->inw_state;
+ inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
+ int err;
+
+ if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
+ return;
+
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if ((parent = watch->inw_parent) == NULL) {
+ if (event == IN_DELETE_SELF) {
+ /*
+ * If we're here because we're being deleted and we
+ * are not a child watch, we need to delete the entire
+ * watch, children and all.
+ */
+ inotify_watch_remove(state, watch);
+ }
+
+ mutex_exit(&state->ins_lock);
+ return;
+ } else {
+ if (event == IN_DELETE_SELF &&
+ !(parent->inw_mask & IN_EXCL_UNLINK)) {
+ /*
+ * This is a child watch for a file that is being
+ * removed and IN_EXCL_UNLINK has not been specified;
+ * indicate that it is orphaned and add it to the list
+ * of orphans. (This list will be checked by the
+ * cleaning cyclic to determine when the watch has
+ * become the only hold on the vnode, at which point
+ * the watch can be zombified.) Note that we check
+ * if the watch is orphaned before we orphan it: hard
+ * links make it possible for VE_REMOVE to be called
+ * multiple times on the same vnode. (!)
+ */
+ if (!watch->inw_orphaned) {
+ watch->inw_orphaned = 1;
+ list_insert_head(&state->ins_orphans, watch);
+ }
+
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (watch->inw_orphaned) {
+ /*
+ * If we're here, a file was orphaned and then later
+ * moved -- which almost certainly means that hard
+ * links are on the scene. We choose the orphan over
+ * the move because we don't want to spuriously
+ * drop events if we can avoid it.
+ */
+ list_remove(&state->ins_orphans, watch);
+ }
+ }
+
+ if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
+ /*
+ * This watch has already been deleted from the parent.
+ */
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ avl_remove(&parent->inw_children, watch);
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ VN_RELE(watch->inw_vp);
+
+ /*
+ * It's now safe to zombify the watch -- which won't actually delete
+ * it as we know that the reference count is greater than 1.
+ */
+ inotify_watch_zombify(watch);
+ mutex_exit(&state->ins_lock);
+}
+
+/*
+ * Insert a new child watch. Should only be called from VOP context when
+ * a child is created in a watched directory.
+ */
+static void
+inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
+{
+ inotify_state_t *state = watch->inw_state;
+ inotify_watch_t cmp = { .inw_vp = vp };
+
+ if (!(watch->inw_mask & IN_CHILD_EVENTS))
+ return;
+
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ VN_HOLD(vp);
+ watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
+ VERIFY(watch != NULL);
+
+ mutex_exit(&state->ins_lock);
+}
+
+
+static int
+inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
+ int32_t *wdp)
+{
+ inotify_watch_t *watch, cmp = { .inw_vp = vp };
+ uint32_t set;
+
+ set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
+
+ /*
+ * Lookup our vnode to determine if we already have a watch on it.
+ */
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
+ /*
+ * We don't have this watch; allocate a new one, provided that
+ * we have fewer than our limit.
+ */
+ if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
+ mutex_exit(&state->ins_lock);
+ return (ENOSPC);
+ }
+
+ VN_HOLD(vp);
+ watch = inotify_watch_add(state, NULL, NULL, vp, set);
+ *wdp = watch->inw_wd;
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+ }
+
+ VERIFY(!watch->inw_zombie);
+
+ if (!(mask & IN_MASK_ADD)) {
+ /*
+ * Note that if we're resetting our event mask and we're
+ * transitioning from an event mask that includes child events
+ * to one that doesn't, there will be potentially some stale
+ * child watches. This is basically fine: they won't fire,
+ * and they will correctly be removed when the watch is
+ * removed.
+ */
+ watch->inw_mask = 0;
+ }
+
+ watch->inw_mask |= set;
+
+ *wdp = watch->inw_wd;
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
+{
+ inotify_watch_t *watch, cmp = { .inw_vp = vp };
+ vnode_t *cvp;
+ int err;
+
+ /*
+ * Verify that the specified child doesn't have a directory component
+ * within it.
+ */
+ if (strchr(name, '/') != NULL)
+ return (EINVAL);
+
+ /*
+ * Lookup the underlying file. Note that this will succeed even if
+ * we don't have permissions to actually read the file.
+ */
+ if ((err = lookupnameat(name,
+ UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
+ return (err);
+ }
+
+ /*
+ * Use our vnode to find our watch, and then add our child watch to it.
+ */
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
+ /*
+ * This is unexpected -- it means that we don't have the
+ * watch that we thought we had.
+ */
+ mutex_exit(&state->ins_lock);
+ VN_RELE(cvp);
+ return (ENXIO);
+ }
+
+ /*
+ * Now lookup the child vnode in the watch; we'll only add it if it
+ * isn't already there.
+ */
+ cmp.inw_vp = cvp;
+
+ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
+ mutex_exit(&state->ins_lock);
+ VN_RELE(cvp);
+ return (0);
+ }
+
+ watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
+ VERIFY(watch != NULL);
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_rm_watch(inotify_state_t *state, int32_t wd)
+{
+ inotify_watch_t *watch, cmp = { .inw_wd = wd };
+
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
+ mutex_exit(&state->ins_lock);
+ return (EINVAL);
+ }
+
+ inotify_watch_remove(state, watch);
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_activate(inotify_state_t *state, int32_t wd)
+{
+ inotify_watch_t *watch, cmp = { .inw_wd = wd };
+
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
+ mutex_exit(&state->ins_lock);
+ return (EINVAL);
+ }
+
+ watch->inw_active = 1;
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+/*
+ * Called periodically as a cyclic to process the orphans and zombies.
+ */
+static void
+inotify_clean(void *arg)
+{
+ inotify_state_t *state = arg;
+ inotify_watch_t *watch, *parent, *next, **prev;
+ int err;
+
+ mutex_enter(&state->ins_lock);
+
+ for (watch = list_head(&state->ins_orphans);
+ watch != NULL; watch = next) {
+ next = list_next(&state->ins_orphans, watch);
+
+ VERIFY(!watch->inw_zombie);
+ VERIFY((parent = watch->inw_parent) != NULL);
+
+ if (watch->inw_vp->v_count > 1)
+ continue;
+
+ avl_remove(&parent->inw_children, watch);
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ list_remove(&state->ins_orphans, watch);
+
+ VN_RELE(watch->inw_vp);
+ inotify_watch_zombify(watch);
+ }
+
+ prev = &state->ins_zombies;
+
+ while ((watch = *prev) != NULL) {
+ mutex_enter(&watch->inw_lock);
+
+ if (watch->inw_refcnt == 1) {
+ *prev = watch->inw_parent;
+ inotify_watch_destroy(watch);
+ continue;
+ }
+
+ prev = &watch->inw_parent;
+ mutex_exit(&watch->inw_lock);
+ }
+
+ mutex_exit(&state->ins_lock);
+}
+
+/*ARGSUSED*/
+static int
+inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ inotify_state_t *state;
+ major_t major = getemajor(*devp);
+ minor_t minor = getminor(*devp);
+ int instances = 0;
+ cyc_handler_t hdlr;
+ cyc_time_t when;
+ char c[64];
+
+ if (minor != INOTIFYMNRN_INOTIFY)
+ return (ENXIO);
+
+ mutex_enter(&inotify_lock);
+
+ for (state = inotify_state; state != NULL; state = state->ins_next) {
+ if (state->ins_cred == cred_p)
+ instances++;
+ }
+
+ if (instances >= inotify_maxinstances) {
+ mutex_exit(&inotify_lock);
+ return (EMFILE);
+ }
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
+ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
+ mutex_exit(&inotify_lock);
+ return (NULL);
+ }
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+ *devp = makedevice(major, minor);
+
+ crhold(cred_p);
+ state->ins_cred = cred_p;
+ state->ins_next = inotify_state;
+ inotify_state = state;
+
+ (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
+ state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
+ NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
+
+ avl_create(&state->ins_bywd,
+ (int(*)(const void *, const void *))inotify_watch_cmpwd,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_bywd));
+
+ avl_create(&state->ins_byvp,
+ (int(*)(const void *, const void *))inotify_watch_cmpvp,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_byvp));
+
+ list_create(&state->ins_orphans, sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_orphan));
+
+ state->ins_maxwatches = inotify_maxwatches;
+ state->ins_maxevents = inotify_maxevents;
+
+ mutex_exit(&inotify_lock);
+
+ mutex_enter(&cpu_lock);
+
+ hdlr.cyh_func = inotify_clean;
+ hdlr.cyh_level = CY_LOW_LEVEL;
+ hdlr.cyh_arg = state;
+
+ when.cyt_when = 0;
+ when.cyt_interval = NANOSEC;
+
+ state->ins_cleaner = cyclic_add(&hdlr, &when);
+ mutex_exit(&cpu_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
+{
+ inotify_state_t *state;
+ inotify_kevent_t *event;
+ minor_t minor = getminor(dev);
+ int err = 0, nevents = 0;
+ size_t len;
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ mutex_enter(&state->ins_lock);
+
+ while (state->ins_head == NULL) {
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
+ mutex_exit(&state->ins_lock);
+ return (EAGAIN);
+ }
+
+ if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
+ mutex_exit(&state->ins_lock);
+ return (EINTR);
+ }
+ }
+
+ /*
+ * We have events and we have our lock; return as many as we can.
+ */
+ while ((event = state->ins_head) != NULL) {
+ len = sizeof (event->ine_event) + event->ine_event.len;
+
+ if (uio->uio_resid < len) {
+ if (nevents == 0)
+ err = EINVAL;
+ break;
+ }
+
+ nevents++;
+
+ if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
+ break;
+
+ VERIFY(state->ins_nevents > 0);
+ state->ins_nevents--;
+
+ VERIFY(state->ins_size > 0);
+ state->ins_size -= INOTIFY_EVENT_LENGTH(event);
+
+ if ((state->ins_head = event->ine_next) == NULL) {
+ VERIFY(event == state->ins_tail);
+ VERIFY(state->ins_nevents == 0);
+ state->ins_tail = NULL;
+ }
+
+ kmem_free(event, INOTIFY_EVENT_LENGTH(event));
+ }
+
+ mutex_exit(&state->ins_lock);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ inotify_state_t *state;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ mutex_enter(&state->ins_lock);
+
+ if (state->ins_head != NULL) {
+ *reventsp = POLLRDNORM | POLLIN;
+ } else {
+ *reventsp = 0;
+
+ if (!anyyet)
+ *phpp = &state->ins_pollhd;
+ }
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ inotify_state_t *state;
+ minor_t minor = getminor(dev);
+ file_t *fp;
+ int rval;
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ switch (cmd) {
+ case INOTIFYIOC_ADD_WATCH: {
+ inotify_addwatch_t addwatch;
+ file_t *fp;
+
+ if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
+ return (EFAULT);
+
+ if ((fp = getf(addwatch.inaw_fd)) == NULL)
+ return (EBADF);
+
+ rval = inotify_add_watch(state, fp->f_vnode,
+ addwatch.inaw_mask, rv);
+
+ releasef(addwatch.inaw_fd);
+ return (rval);
+ }
+
+ case INOTIFYIOC_ADD_CHILD: {
+ inotify_addchild_t addchild;
+ char name[MAXPATHLEN];
+
+ if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
+ return (EFAULT);
+
+ if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
+ return (EFAULT);
+
+ if ((fp = getf(addchild.inac_fd)) == NULL)
+ return (EBADF);
+
+ rval = inotify_add_child(state, fp->f_vnode, name);
+
+ releasef(addchild.inac_fd);
+ return (rval);
+ }
+
+ case INOTIFYIOC_RM_WATCH:
+ return (inotify_rm_watch(state, arg));
+
+ case INOTIFYIOC_ACTIVATE:
+ return (inotify_activate(state, arg));
+
+ case FIONREAD:
+ mutex_enter(&state->ins_lock);
+ *rv = state->ins_size;
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ inotify_state_t *state, **sp;
+ inotify_watch_t *watch, *zombies;
+ inotify_kevent_t *event;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ mutex_enter(&state->ins_lock);
+
+ /*
+ * First, destroy all of our watches.
+ */
+ while ((watch = avl_first(&state->ins_bywd)) != NULL)
+ inotify_watch_remove(state, watch);
+
+ /*
+ * And now destroy our event queue.
+ */
+ while ((event = state->ins_head) != NULL) {
+ state->ins_head = event->ine_next;
+ kmem_free(event, INOTIFY_EVENT_LENGTH(event));
+ }
+
+ zombies = state->ins_zombies;
+ state->ins_zombies = NULL;
+ mutex_exit(&state->ins_lock);
+
+ /*
+ * Now that our state lock is dropped, we can synchronously wait on
+ * any zombies.
+ */
+ while ((watch = zombies) != NULL) {
+ zombies = zombies->inw_parent;
+
+ mutex_enter(&watch->inw_lock);
+
+ while (watch->inw_refcnt > 1)
+ cv_wait(&watch->inw_cv, &watch->inw_lock);
+
+ inotify_watch_destroy(watch);
+ }
+
+ mutex_enter(&cpu_lock);
+ cyclic_remove(state->ins_cleaner);
+ mutex_exit(&cpu_lock);
+
+ mutex_enter(&inotify_lock);
+
+ /*
+ * Remove our state from our global list, and release our hold on
+ * the cred.
+ */
+ for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
+ VERIFY(*sp != NULL);
+
+ *sp = (*sp)->ins_next;
+ crfree(state->ins_cred);
+
+ ddi_soft_state_free(inotify_softstate, minor);
+ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
+
+ mutex_exit(&inotify_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ mutex_enter(&inotify_lock);
+
+ if (ddi_soft_state_init(&inotify_softstate,
+ sizeof (inotify_state_t), 0) != 0) {
+ cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
+ INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+ cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (fem_create("inotify_fem",
+ inotify_vnodesrc_template, &inotify_femp) != 0) {
+ cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
+ ddi_remove_minor_node(devi, NULL);
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ inotify_devi = devi;
+
+ inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
+ UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
+ VM_SLEEP | VMC_IDENTIFIER);
+
+ mutex_exit(&inotify_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&inotify_lock);
+ fem_free(inotify_femp);
+ vmem_destroy(inotify_minor);
+
+ ddi_remove_minor_node(inotify_devi, NULL);
+ inotify_devi = NULL;
+
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)inotify_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static struct cb_ops inotify_cb_ops = {
+ inotify_open, /* open */
+ inotify_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ inotify_read, /* read */
+ nodev, /* write */
+ inotify_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ inotify_poll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops inotify_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ inotify_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ inotify_attach, /* attach */
+ inotify_detach, /* detach */
+ nodev, /* reset */
+ &inotify_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "inotify support", /* name of module */
+ &inotify_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/io/inotify.conf b/usr/src/uts/common/io/inotify.conf
new file mode 100644
index 0000000000..ce9da6180f
--- /dev/null
+++ b/usr/src/uts/common/io/inotify.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+name="inotify" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
index 848e3470c7..117b7da16a 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
@@ -1792,6 +1792,7 @@ ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg,
void *arg1, void *arg2)
{
ixgbe_t *ixgbe = (ixgbe_t *)arg1;
+ int prev = ixgbe->intr_cnt;
switch (cbaction) {
/* IRM callback */
@@ -1805,7 +1806,8 @@ ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg,
if (ixgbe_intr_adjust(ixgbe, cbaction, count) !=
DDI_SUCCESS) {
ixgbe_error(ixgbe,
- "IRM CB: Failed to adjust interrupts");
+ "IRM CB: Failed to adjust interrupts [%d %d %d]",
+ cbaction, count, prev);
goto cb_fail;
}
break;
diff --git a/usr/src/uts/common/io/ksocket/ksocket.c b/usr/src/uts/common/io/ksocket/ksocket.c
index 49ca6f0475..8944fcbff3 100644
--- a/usr/src/uts/common/io/ksocket/ksocket.c
+++ b/usr/src/uts/common/io/ksocket/ksocket.c
@@ -22,6 +22,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/file.h>
@@ -820,7 +821,7 @@ ksocket_spoll(ksocket_t ks, int timo, short events, short *revents,
if (error != 0 || *revents != 0)
break;
- if (pcp->pc_flag & T_POLLWAKE)
+ if (pcp->pc_flag & PC_POLLWAKE)
continue;
if (timo == -1) {
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index ed809e5f45..42b83c7daf 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -3094,6 +3094,9 @@ mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range)
case MAC_PROP_WL_MLME:
minsize = sizeof (wl_mlme_t);
break;
+ case MAC_PROP_VN_PROMISC_FILTERED:
+ minsize = sizeof (boolean_t);
+ break;
}
return (valsize >= minsize);
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index 078d0e816d..88620518f1 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -1344,6 +1344,7 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name,
mcip->mci_p_unicast_list = NULL;
mcip->mci_direct_rx_fn = NULL;
mcip->mci_direct_rx_arg = NULL;
+ mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
mcip->mci_unicast_list = NULL;
@@ -3255,7 +3256,8 @@ mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
}
if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
- type == MAC_CLIENT_PROMISC_ALL) {
+ type == MAC_CLIENT_PROMISC_ALL &&
+ (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED)) {
/*
* The function is being invoked by the upper MAC client
* of a VNIC. The VNIC should only see the traffic
@@ -4134,16 +4136,15 @@ mac_info_get(const char *name, mac_info_t *minfop)
/*
* To get the capabilities that MAC layer cares about, such as rings, factory
* mac address, vnic or not, it should directly invoke this function. If the
- * link is part of a bridge, then the only "capability" it has is the inability
- * to do zero copy.
+ * link is part of a bridge, then the link is unable to do zero copy.
*/
boolean_t
i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
{
mac_impl_t *mip = (mac_impl_t *)mh;
- if (mip->mi_bridge_link != NULL)
- return (cap == MAC_CAPAB_NO_ZCOPY);
+ if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY)
+ return (B_TRUE);
else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
else
@@ -4800,6 +4801,8 @@ mac_client_add_to_flow_list(mac_client_impl_t *mcip, flow_entry_t *flent)
*/
rw_enter(&mcip->mci_rw_lock, RW_WRITER);
+ mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
+
/* Add it to the head */
flent->fe_client_next = mcip->mci_flent_list;
mcip->mci_flent_list = flent;
@@ -4830,6 +4833,8 @@ mac_client_remove_flow_from_list(mac_client_impl_t *mcip, flow_entry_t *flent)
* using mci_rw_lock
*/
rw_enter(&mcip->mci_rw_lock, RW_WRITER);
+ mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
+
while ((fe != NULL) && (fe != flent)) {
prev_fe = fe;
fe = fe->fe_client_next;
@@ -4858,6 +4863,14 @@ mac_client_check_flow_vid(mac_client_impl_t *mcip, uint16_t vid)
{
flow_entry_t *flent;
uint16_t mci_vid;
+ uint32_t cache = mcip->mci_vidcache;
+
+ /*
+ * In hopes of not having to touch the mci_rw_lock, check to see if
+ * this vid matches our cached result.
+ */
+ if (MCIP_VIDCACHE_ISVALID(cache) && MCIP_VIDCACHE_VID(cache) == vid)
+ return (MCIP_VIDCACHE_BOOL(cache) ? B_TRUE : B_FALSE);
/* The mci_flent_list is protected by mci_rw_lock */
rw_enter(&mcip->mci_rw_lock, RW_WRITER);
@@ -4865,10 +4878,13 @@ mac_client_check_flow_vid(mac_client_impl_t *mcip, uint16_t vid)
flent = flent->fe_client_next) {
mci_vid = i_mac_flow_vid(flent);
if (vid == mci_vid) {
+ mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_TRUE);
rw_exit(&mcip->mci_rw_lock);
return (B_TRUE);
}
}
+
+ mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_FALSE);
rw_exit(&mcip->mci_rw_lock);
return (B_FALSE);
}
@@ -5521,3 +5537,23 @@ mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings)
mrp->mrp_ntxrings = txrings;
}
}
+
+boolean_t
+mac_get_promisc_filtered(mac_client_handle_t mch)
+{
+ mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
+
+ return (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED);
+}
+
+void
+mac_set_promisc_filtered(mac_client_handle_t mch, boolean_t enable)
+{
+ mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
+
+ ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
+ if (enable)
+ mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
+ else
+ mcip->mci_protect_flags &= ~MPT_FLAG_PROMISC_FILTERED;
+}
diff --git a/usr/src/uts/common/io/mac/mac_protect.c b/usr/src/uts/common/io/mac/mac_protect.c
index cd7fcb9a5d..e341f3d562 100644
--- a/usr/src/uts/common/io/mac/mac_protect.c
+++ b/usr/src/uts/common/io/mac/mac_protect.c
@@ -2297,6 +2297,9 @@ mac_protect_init(mac_client_impl_t *mcip)
sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node));
avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip,
sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node));
+
+ if (mcip->mci_state_flags & MCIS_IS_VNIC)
+ mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
}
void
diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c
index 31972f94d8..c1a5c9c069 100644
--- a/usr/src/uts/common/io/mac/mac_stat.c
+++ b/usr/src/uts/common/io/mac/mac_stat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
/*
@@ -390,8 +391,8 @@ i_mac_stat_create(void *handle, const char *modname, const char *statname,
kstat_t *ksp;
kstat_named_t *knp;
- ksp = kstat_create(modname, 0, statname, "net",
- KSTAT_TYPE_NAMED, count, 0);
+ ksp = kstat_create_zone(modname, 0, statname, "net",
+ KSTAT_TYPE_NAMED, count, 0, getzoneid());
if (ksp == NULL)
return (NULL);
@@ -948,9 +949,9 @@ mac_driver_stat_create(mac_impl_t *mip)
major_t major = getmajor(mip->mi_phy_dev);
count = MAC_MOD_NKSTAT + MAC_NKSTAT + mip->mi_type->mt_statcount;
- ksp = kstat_create((const char *)ddi_major_to_name(major),
+ ksp = kstat_create_zone((const char *)ddi_major_to_name(major),
getminor(mip->mi_phy_dev) - 1, MAC_KSTAT_NAME,
- MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0);
+ MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0, getzoneid());
if (ksp == NULL)
return;
diff --git a/usr/src/uts/common/io/mr_sas/mr_sas.conf b/usr/src/uts/common/io/mr_sas/mr_sas.conf
index cfda434e23..6c585c6a42 100644
--- a/usr/src/uts/common/io/mr_sas/mr_sas.conf
+++ b/usr/src/uts/common/io/mr_sas/mr_sas.conf
@@ -13,3 +13,11 @@
# Fast-Path specific flag. Default is "yes".
# mrsas-enable-fp="yes";
+flow_control="dmult" queue="qsort" tape="sctp";
+
+# MSI specific flag. To enable MSI modify the flag value to "yes"
+mrsas-enable-msi="yes";
+
+# Fast-Path specific flag. To enable Fast-Path modify the flag value to "yes"
+mrsas-enable-fp="yes";
+
diff --git a/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..187088ff34
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2014, Thales UK Limited
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..cde8b65b37
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+NFAST CRYPTO ACCELERATOR DRIVER
diff --git a/usr/src/uts/common/io/nfp/autoversion.h b/usr/src/uts/common/io/nfp/autoversion.h
new file mode 100644
index 0000000000..b9021942b2
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/autoversion.h
@@ -0,0 +1,21 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/* AUTOGENERATED - DO NOT EDIT */
+#ifndef AUTOVERSION_H
+#define AUTOVERSION_H
+
+#define VERSION_RELEASEMAJOR 2
+#define VERSION_RELEASEMINOR 26
+#define VERSION_RELEASEPATCH 40
+#define VERSION_NO "2.26.40cam999"
+#define VERSION_COMPNAME "nfdrv"
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/drvlist.c b/usr/src/uts/common/io/nfp/drvlist.c
new file mode 100644
index 0000000000..a04b1fd5b0
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/drvlist.c
@@ -0,0 +1,19 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_cmd.h"
+
+const nfpcmd_dev *nfp_drvlist[] = {
+ &i21285_cmddev,
+ &i21555_cmddev,
+ NULL
+};
+
diff --git a/usr/src/uts/common/io/nfp/hostif.c b/usr/src/uts/common/io/nfp/hostif.c
new file mode 100644
index 0000000000..684be703ea
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/hostif.c
@@ -0,0 +1,1192 @@
+/*
+
+hostif.c: nFast PCI driver for Solaris 2.5, 2.6, 2.7 and 2.8
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+06/05/1998 jsh Original solaris 2.6
+21/05/1999 jsh added support for solaris 2.5
+10/06/1999 jsh added support for solaris 2.7 (32 and 64 bit)
+??/??/2001 jsh added support for solaris 2.8 (32 and 64 bit)
+16/10/2001 jsh moved from nfast to new structure in nfdrv
+12/02/2002 jsh added high level interrupt support
+
+*/
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/map.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+
+#include "nfp_common.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "nfp_cmd.h"
+
+#include "nfp.h"
+
+/* mapped memory attributes, no-swap endianess (done in higher level) */
+static struct ddi_device_acc_attr nosw_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+/* dma attributes */
+static ddi_dma_attr_t dma_attrs = {
+ DMA_ATTR_V0, /* version number */
+ (uint64_t)0x0, /* low address */
+ (uint64_t)0xffffffff, /* high address */
+ (uint64_t)0xffffff, /* DMA counter max */
+ (uint64_t)0x1, /* alignment */
+ 0x0c, /* burst sizes */
+ 0x1, /* minimum transfer size */
+ (uint64_t)0x3ffffff, /* maximum transfer size */
+ (uint64_t)0x7fff, /* maximum segment size */
+ 1, /* no scatter/gather lists */
+ 1, /* granularity */
+ 0 /* DMA flags */
+};
+
+/*
+ * Debug message control
+ * Debug Levels:
+ * 0 = no messages
+ * 1 = Errors
+ * 2 = Subroutine calls & control flow
+ * 3 = I/O Data (verbose!)
+ * Can be set with adb or in the /etc/system file with
+ * "set nfp:nfp_debug=<value>"
+ */
+
+int nfp_debug= 1;
+
+static void *state_head; /* opaque handle top of state structs */
+
+static int nfp_open(dev_t *dev, int openflags, int otyp, cred_t *credp);
+static int nfp_close(dev_t dev, int openflags, int otyp, cred_t *credp);
+static int nfp_release_dev( dev_info_t *dip );
+
+static int nfp_read(dev_t dev, struct uio *uiop, cred_t *credp);
+static int nfp_write(dev_t dev, struct uio *uiop, cred_t *credp);
+static int nfp_strategy(struct buf *bp);
+
+static int nfp_ioctl(dev_t dev, int cmd, ioctlptr_t arg, int mode, cred_t *credp, int *rvalp);
+static int nfp_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp);
+
+static void nfp_wrtimeout (void *pdev);
+static void nfp_rdtimeout (void *pdev);
+
+static int nfp_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result);
+static int nfp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int nfp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+
+static void nfp_read_complete_final(nfp_dev *pdev, int ok);
+static void nfp_write_complete_final(nfp_dev *pdev, int ok);
+
+/* nfp file ops --------------------------------------------------- */
+
+static struct cb_ops nfp_cb_ops = {
+ nfp_open,
+ nfp_close,
+ nodev, /* no nfp_strategy */
+ nodev, /* no print routine */
+ nodev, /* no dump routine */
+ nfp_read,
+ nfp_write,
+ nfp_ioctl,
+ nodev, /* no devmap routine */
+ nodev, /* no mmap routine */
+ nodev, /* no segmap routine */
+ nfp_chpoll,
+ ddi_prop_op,
+ 0, /* not a STREAMS driver, no cb_str routine */
+ D_NEW | D_MP | EXTRA_CB_FLAGS, /* must be safe for multi-thread/multi-processor */
+ CB_REV,
+ nodev, /* aread */
+ nodev /* awrite */
+};
+
+static struct dev_ops nfp_ops = {
+ DEVO_REV, /* DEVO_REV indicated by manual */
+ 0, /* device reference count */
+ nfp_getinfo,
+ nulldev, /* identify */
+ nulldev, /* probe */
+ nfp_attach,
+ nfp_detach,
+ nodev, /* device reset routine */
+ &nfp_cb_ops,
+ (struct bus_ops *)0, /* bus operations */
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv modldrv = {
+ &mod_driverops,
+ NFP_DRVNAME,
+ &nfp_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, /* MODREV_1 indicated by manual */
+ (void *)&modldrv,
+ NULL, /* termination of list of linkage structures */
+};
+
+/* interface resource allocation */
+
+int nfp_alloc_pci_push( nfp_dev *pdev ) {
+ /* allocate resources needed for PCI Push,
+ * if not already allocated.
+ * return True if successful
+ */
+ nfp_err ret;
+ uint_t cookie_count;
+ size_t real_length;
+
+ if(!pdev->read_buf) {
+ /* allocate read buffer */
+ pdev->read_buf = kmem_zalloc( NFP_READBUF_SIZE, KM_NOSLEEP );
+ }
+ if(!pdev->read_buf) {
+ nfp_log( NFP_DBG1, "nfp_attach: kmem_zalloc read buffer failed");
+ pdev->read_buf = NULL;
+ return 0;
+ }
+
+ if(!pdev->rd_dma_ok) {
+ /* allocate dma handle for read buffer */
+ ret = ddi_dma_alloc_handle( pdev->dip,
+ &dma_attrs,
+ DDI_DMA_DONTWAIT,
+ NULL,
+ &pdev->read_dma_handle );
+ if( ret != DDI_SUCCESS ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: ddi_dma_alloc_handle failed (%d)",
+ ret );
+ return 0;
+ }
+
+ /* Allocate the memory for dma transfers */
+ ret = ddi_dma_mem_alloc(pdev->read_dma_handle, NFP_READBUF_SIZE, &nosw_attr,
+ DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
+ (caddr_t*)&pdev->read_buf, &real_length, &pdev->acchandle);
+ if (ret != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_alloc_pci_push: ddi_dma_mem_alloc failed (%d)", ret);
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+
+ ret = ddi_dma_addr_bind_handle( pdev->read_dma_handle,
+ NULL, /* kernel address space */
+ (caddr_t)pdev->read_buf, real_length,
+ DDI_DMA_READ | DDI_DMA_CONSISTENT, /* dma flags */
+ DDI_DMA_DONTWAIT, NULL,
+ &pdev->read_dma_cookie, &cookie_count );
+ if( ret != DDI_DMA_MAPPED ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: ddi_dma_addr_bind_handle failed (%d)",
+ ret);
+ ddi_dma_mem_free(&pdev->acchandle);
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+ if( cookie_count > 1 ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: error:"
+ " ddi_dma_addr_bind_handle wants %d transfers",
+ cookie_count);
+ ddi_dma_mem_free(&pdev->acchandle);
+ (void) ddi_dma_unbind_handle( pdev->read_dma_handle );
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+ pdev->rd_dma_ok = 1;
+ }
+ return pdev->rd_dma_ok;
+}
+
+void nfp_free_pci_push( nfp_dev *pdev ) {
+ /* free resources allocated to PCI Push */
+ if( pdev->rd_dma_ok ) {
+ (void) ddi_dma_sync(pdev->read_dma_handle,0,0,DDI_DMA_SYNC_FORKERNEL);
+ ddi_dma_mem_free(&pdev->acchandle);
+ (void) ddi_dma_unbind_handle( pdev->read_dma_handle );
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ pdev->rd_dma_ok = 0;
+ }
+ if( pdev->read_buf ) {
+ kmem_free( pdev->read_buf, NFP_READBUF_SIZE );
+ pdev->read_buf = NULL;
+ }
+}
+
+/* include definition of nfp_set_ifvers() */
+#define nfp_ifvers NFDEV_IF_PCI_PUSH
+#include "nfp_ifvers.c"
+#undef nfp_ifvers
+
+/*--------------------*/
+/* nfp_isr */
+/*--------------------*/
+
+static u_int nfp_isr( char *pdev_in ) {
+ /* LINTED: alignment */
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+ nfp_err ne;
+ int handled;
+
+ nfp_log( NFP_DBG3, "nfp_isr: entered");
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_isr: cannot find dev");
+ return DDI_INTR_UNCLAIMED;
+ }
+
+ /* The isr needs to be mutex'ed - an SMP can call us while we're still
+ * running!
+ */
+ mutex_enter(&pdev->low_mutex);
+ ne= pdev->cmddev->isr( pdev->common.cmdctx, &handled );
+ mutex_exit(&pdev->low_mutex);
+
+ if( !ne && handled )
+ return DDI_INTR_CLAIMED;
+ if (ne)
+ nfp_log( NFP_DBG1, "nfp_isr: failed");
+ else
+ nfp_log( NFP_DBG3, "nfp_isr: unclaimed");
+ return DDI_INTR_UNCLAIMED;
+}
+
+static u_int nfp_soft_isr( char *pdev_in ) {
+ /* LINTED: alignment */
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+ int rd, wr;
+
+ nfp_log( NFP_DBG3, "nfp_soft_isr: entered");
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_soft_isr: cannot find dev");
+ return DDI_INTR_UNCLAIMED;
+ }
+ rd= wr= 0;
+
+ mutex_enter(&pdev->high_mutex);
+ if(pdev->high_read) {
+ pdev->high_read= 0;
+ mutex_exit(&pdev->high_mutex);
+ rd= 1;
+ }
+ if(pdev->high_write) {
+ pdev->high_write= 0;
+ wr= 1;
+ }
+ mutex_exit(&pdev->high_mutex);
+
+ if(rd) {
+ nfp_log( NFP_DBG3, "nfp_soft_isr: read done");
+ nfp_read_complete_final(pdev, pdev->rd_ok);
+ }
+ if(wr) {
+ nfp_log( NFP_DBG3, "nfp_soft_isr: write done");
+ nfp_write_complete_final(pdev, pdev->wr_ok);
+ }
+ if( rd || wr )
+ return DDI_INTR_CLAIMED;
+
+ nfp_log( NFP_DBG2, "nfp_isr: unclaimed");
+ return DDI_INTR_UNCLAIMED;
+}
+
+
+/*-------------------------*/
+/* nfp_read */
+/*-------------------------*/
+
+void nfp_read_complete(nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_read_complete: entering");
+
+ if(pdev->high_intr) {
+ nfp_log(NFP_DBG2, "nfp_read_complete: high_intr");
+ mutex_enter(&pdev->high_mutex);
+ nfp_log(NFP_DBG3, "nfp_read_complete: high_mutex entered");
+ if(pdev->high_read)
+ nfp_log(NFP_DBG1, "nfp_read_complete: high_read allread set!");
+ pdev->high_read= 1;
+ pdev->rd_ok= ok;
+ nfp_log(NFP_DBG3, "nfp_read_complete: exiting high_mutex");
+ mutex_exit(&pdev->high_mutex);
+ ddi_trigger_softintr(pdev->soft_int_id);
+ } else
+ nfp_read_complete_final( pdev, ok );
+ nfp_log( NFP_DBG2,"nfp_read_complete: exiting");
+}
+
+static void nfp_read_complete_final(nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: entering");
+ if(pdev->rdtimeout)
+ (void) untimeout(pdev->rdtimeout);
+ if(!pdev->rd_outstanding) {
+ nfp_log( NFP_DBG1,"nfp_read_complete_final: !pdev->rd_outstanding");
+ }
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: pdev->rd_outstanding=0, ok %d", ok);
+ mutex_enter(&pdev->isr_mutex);
+ pdev->rd_outstanding= 0;
+ pdev->rd_ready= 1;
+ pdev->rd_ok= ok;
+ cv_broadcast(&pdev->rd_cv);
+ mutex_exit(&pdev->isr_mutex);
+ pollwakeup (&pdev->pollhead, POLLRDNORM);
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: exiting");
+}
+
+static void nfp_rdtimeout( void *pdev_in )
+{
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+
+ nfp_log( NFP_DBG1, "nfp_rdtimeout: read timed out");
+
+ if (!pdev) {
+ nfp_log( NFP_DBG1, "nfp_rdtimeout: NULL pdev." );
+ return;
+ }
+ pdev->rdtimeout= 0;
+ nfp_read_complete_final(pdev, 0);
+}
+
+/* ARGSUSED */
+static int nfp_read(dev_t dev, struct uio *uiop, cred_t *credp) {
+ int ret;
+ nfp_log( NFP_DBG2, "nfp_read: entered" );
+ if (ddi_get_soft_state(state_head, getminor(dev)) != NULL) {
+ nfp_log( NFP_DBG1, "nfp_read: unable to get nfp_dev");
+ return (ENODEV);
+ }
+ nfp_log( NFP_DBG2, "nfp_read: about to physio." );
+ ret = physio(nfp_strategy, (struct buf *)0, dev, B_READ, minphys, uiop );
+ if(ret)
+ nfp_log( NFP_DBG1, "nfp_read: physio returned %x.", ret );
+ return ret;
+}
+
+/*-------------------------*/
+/* nfp_write */
+/*-------------------------*/
+
+void nfp_write_complete( nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_write_complete: entering");
+
+ if(pdev->high_intr) {
+ mutex_enter(&pdev->high_mutex);
+ if(pdev->high_write)
+ nfp_log(NFP_DBG1, "nfp_write_complete: high_write allread set!");
+ pdev->high_write= 1;
+ pdev->wr_ok= ok;
+ mutex_exit(&pdev->high_mutex);
+ ddi_trigger_softintr(pdev->soft_int_id);
+ } else
+ nfp_write_complete_final( pdev, ok );
+ nfp_log( NFP_DBG2,"nfp_write_complete: exiting");
+}
+
+static void nfp_write_complete_final( nfp_dev *pdev, int ok) {
+ struct buf *local_wr_bp;
+ nfp_log( NFP_DBG2,"nfp_write_complete_final: entering");
+ if(pdev->wrtimeout)
+ (void) untimeout(pdev->wrtimeout);
+
+ if (!pdev->wr_bp) {
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: write: wr_bp == NULL." );
+ return;
+ }
+
+ bp_mapout(pdev->wr_bp);
+ pdev->wr_bp->b_resid = ok ? 0 : pdev->wr_bp->b_bcount;
+ /* Make sure we set wr_ready before calling biodone to avoid a race */
+ pdev->wr_ready = 1;
+ bioerror(pdev->wr_bp, ok ? 0 : ENXIO);
+ local_wr_bp = pdev->wr_bp;
+ pdev->wr_bp = 0;
+ biodone(local_wr_bp);
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: isr_mutex extited");
+ pollwakeup (&pdev->pollhead, POLLWRNORM);
+
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: leaving");
+}
+
+static void nfp_wrtimeout( void *pdev_in )
+{
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+
+ nfp_log( NFP_DBG1, "nfp_wrtimeout: write timed out");
+
+ if (!pdev) {
+ nfp_log( NFP_DBG1, "nfp_wrtimeout: NULL pdev." );
+ return;
+ }
+ pdev->wrtimeout= 0;
+ nfp_write_complete_final(pdev, 0);
+}
+
+/* ARGSUSED */
+static int nfp_write(dev_t dev, struct uio *uiop, cred_t *credp) {
+ int ret;
+ nfp_log( NFP_DBG2, "nfp_write: entered." );
+ if (ddi_get_soft_state(state_head, getminor(dev)) == NULL) {
+ nfp_log( NFP_DBG1, "nfp_chread: unable to get nfp_dev.");
+ return (ENODEV);
+ }
+ nfp_log( NFP_DBG2, "nfp_write: about to physio." );
+ ret = physio(nfp_strategy, (struct buf *)0, dev, B_WRITE, minphys, uiop );
+ if(ret)
+ nfp_log( NFP_DBG1, "nfp_write: physio returned %x.", ret );
+ return ret;
+}
+
+/*-------------------------*/
+/* nfp_strategy */
+/*-------------------------*/
+
+#define NFP_STRAT_ERR(thebp,err,txt) \
+ nfp_log( NFP_DBG1, "nfp_strategy: " txt ".\n"); \
+ (thebp)->b_resid = (thebp)->b_bcount; \
+ bioerror ((thebp), err); \
+ biodone ((thebp));
+
+static int nfp_strategy(struct buf *bp) {
+ register struct nfp_dev *pdev;
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "nfp_strategy: entered." );
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(bp->b_edev)))) {
+ NFP_STRAT_ERR (bp, ENXIO, "unable to get nfp_dev");
+ return (0);
+ }
+
+ if (bp->b_flags & B_READ) {
+ int count;
+ /* read */
+ if (!pdev->rd_ready) {
+ NFP_STRAT_ERR (bp,ENXIO,"read called when not ready");
+ return (0);
+ }
+ pdev->rd_ready=0;
+ pdev->rd_pending = 0;
+ if( !pdev->rd_ok) {
+ NFP_STRAT_ERR (bp,ENXIO,"read failed");
+ return (0);
+ }
+ /* copy data from module */
+ if(pdev->ifvers >= NFDEV_IF_PCI_PUSH) {
+ nfp_log( NFP_DBG3, "nfp_strategy: copying kernel read buffer");
+ if( ddi_dma_sync(pdev->read_dma_handle,0,0,DDI_DMA_SYNC_FORKERNEL) != DDI_SUCCESS )
+ {
+ NFP_STRAT_ERR(bp,ENXIO,"ddi_dma_sync(read_dma_handle) failed");
+ return (0);
+ }
+ /* LINTED: alignment */
+ count= *(unsigned int *)(pdev->read_buf+4);
+ count= FROM_LE32_MEM(&count);
+ nfp_log( NFP_DBG3, "nfp_strategy: read count %d", count);
+ if(count<0 || count>bp->b_bcount) {
+ NFP_STRAT_ERR(bp,ENXIO,"bad read byte count from device");
+ nfp_log( NFP_DBG1, "nfp_strategy: bad read byte count (%d) from device", count);
+ return (0);
+ }
+ bp_mapin (bp);
+ bcopy( pdev->read_buf + 8, bp->b_un.b_addr, count );
+ bp_mapout (bp);
+ } else {
+ bp_mapin (bp);
+ ne= pdev->cmddev->read_block( bp->b_un.b_addr, bp->b_bcount, pdev->common.cmdctx, &count );
+ bp_mapout (bp);
+ if( ne != NFP_SUCCESS) {
+ NFP_STRAT_ERR (bp,nfp_oserr(ne),"read_block failed");
+ return (0);
+ }
+ }
+ bioerror(bp, 0);
+ bp->b_resid = 0;
+ biodone (bp);
+ } else {
+ /* write */
+ if (!pdev->wr_ready) {
+ NFP_STRAT_ERR (bp,ENXIO,"write called when not ready");
+ return (0);
+ }
+ if (pdev->wr_bp) {
+ NFP_STRAT_ERR (bp,ENXIO,"wr_bp != NULL");
+ return (0);
+ }
+ pdev->wrtimeout= timeout(nfp_wrtimeout, (caddr_t)pdev, NFP_TIMEOUT_SEC * drv_usectohz(1000000));
+ pdev->wr_bp = bp;
+ pdev->wr_ready = 0;
+ bp_mapin (bp);
+ ne= pdev->cmddev->write_block( bp->b_un.b_addr, bp->b_bcount, pdev->common.cmdctx);
+ if( ne != NFP_SUCCESS ) {
+ bp_mapout (bp);
+ (void) untimeout(pdev->wrtimeout);
+ pdev->wr_bp = 0;
+ pdev->wr_ready = 1;
+ NFP_STRAT_ERR (bp,nfp_oserr(ne),"write failed");
+ return (0);
+ }
+ }
+ nfp_log( NFP_DBG2, "nfp_strategy: leaving");
+
+ return (0);
+}
+
+
+/*--------------------*/
+/* poll / select */
+/*--------------------*/
+
+static int nfp_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp) {
+ nfp_dev *pdev;
+ short revents;
+
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(dev)))) {
+ nfp_log( NFP_DBG1, "nfp_chpoll: unable to get nfp_dev");
+ *reventsp=0;
+ return (0);
+ }
+ nfp_log( NFP_DBG2, "nfp_chpoll: entered %x", events);
+
+ revents=0;
+ if (events&POLLWRNORM) {
+ if (pdev->wr_ready) {
+ nfp_log( NFP_DBG2, "nfp_chpoll: write ready");
+ revents|=POLLWRNORM;
+ }
+ }
+
+ if (events&POLLRDNORM) {
+ if (pdev->rd_ready) {
+ nfp_log( NFP_DBG2, "nfp_chpoll: read ready");
+ revents|=POLLRDNORM;
+ }
+ }
+
+ if (!revents && !anyyet) {
+ *phpp=&pdev->pollhead;
+ }
+ *reventsp=revents;
+
+ nfp_log( NFP_DBG2, "nfp_chpoll: leaving");
+ return (0);
+}
+
+
+/*--------------------*/
+/* ioctl */
+/*--------------------*/
+
+/* ARGSUSED */
+static int nfp_ioctl(dev_t dev, int cmd, ioctlptr_t arg, int mode, cred_t *credp, int *rvalp) {
+ register struct nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: entered." );
+
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(dev)))) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: unable to get nfp dev.");
+ return (ENXIO);
+ }
+
+ switch (cmd) {
+ case NFDEV_IOCTL_ENQUIRY:
+ {
+ long *outp;
+ int outlen;
+ nfdev_enquiry_str enq_data;
+
+ enq_data.busno = (unsigned int)-1;
+ enq_data.slotno = (unsigned char)-1;
+
+ /* get our bus and slot num */
+ if (ddi_getlongprop (DDI_DEV_T_NONE,
+ pdev->dip, 0, "reg",
+ (caddr_t)&outp, &outlen) != DDI_PROP_NOT_FOUND) {
+ nfp_log( NFP_DBG2, "ddi_getlongprop('reg') ok." );
+ if( outlen > 0 ) {
+ enq_data.busno = ((*outp)>>16) & 0xff;
+ enq_data.slotno = ((*outp)>>11) & 0x1f;
+ nfp_log( NFP_DBG2, "busno %d, slotno %d.",
+ enq_data.busno, enq_data.slotno );
+ }
+ } else
+ nfp_log( NFP_DBG1, "ddi_getlongprop('reg') failed." );
+
+ if( ddi_copyout( (char *)&enq_data, (void *)arg, sizeof(enq_data), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyout() failed." );
+ return EFAULT;
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_ENSUREREADING:
+ {
+ unsigned int addr, len;
+ nfp_err ret;
+ if( ddi_copyin( (void *)arg, (char *)&len, sizeof(unsigned int), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyin() failed." );
+ return (EFAULT);
+ }
+ /* signal a read to the module */
+ nfp_log( NFP_DBG2, "nfp_ioctl: signalling read request to module, len = %x.", len );
+ if (len>8192) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: len >8192 = %x.", len );
+ return EINVAL;
+ }
+ if (pdev->rd_outstanding==1) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: not about to call read with read outstanding.");
+ return EIO;
+ }
+
+ addr= 0;
+ if(pdev->ifvers >= NFDEV_IF_PCI_PUSH) {
+ if( len > NFP_READBUF_SIZE ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: len > NFP_READBUF_SIZE = %x.", len );
+ return EINVAL;
+ }
+ addr= pdev->read_dma_cookie.dmac_address;
+ }
+
+ pdev->rd_outstanding = 1;
+ nfp_log( NFP_DBG2,"nfp_ioctl: pdev->rd_outstanding=1");
+
+ /* setup timeout timer */
+ pdev->rdtimeout= timeout(nfp_rdtimeout, (caddr_t)pdev, NFP_TIMEOUT_SEC * drv_usectohz(1000000));
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: read request");
+ ret = pdev->cmddev->ensure_reading(addr, len, pdev->common.cmdctx);
+ if ( ret != NFP_SUCCESS ) {
+ (void) untimeout(pdev->rdtimeout);
+ pdev->rdtimeout = 0;
+ pdev->rd_outstanding = 0;
+ nfp_log( NFP_DBG1, "nfp_ioctl : cmddev->ensure_reading failed ");
+ return nfp_oserr( ret );
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_PCI_IFVERS:
+ {
+ int vers;
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: NFDEV_IOCTL_PCI_IFVERS");
+
+ if( ddi_copyin( (void *)arg, (char *)&vers, sizeof(vers), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyin() failed." );
+ return (EFAULT);
+ }
+
+ if( pdev->rd_outstanding ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: can't set ifvers %d as read outstanding", vers);
+ return EIO;
+ }
+
+ nfp_set_ifvers(pdev, vers);
+ if( pdev->ifvers != vers ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: can't set ifvers %d", vers);
+ return EIO;
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_STATS:
+ {
+ if( ddi_copyout( (char *)&(pdev->common.stats),
+ (void *)arg,
+ sizeof(nfdev_stats_str),
+ mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyout() failed." );
+ return EFAULT;
+ }
+ }
+ break;
+
+ default:
+ nfp_log( NFP_DBG1, "nfp_ioctl: unknown ioctl." );
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+/*-------------------------*/
+/* nfp_open */
+/*-------------------------*/
+
+/* ARGSUSED */
+int nfp_open(dev_t *dev, int openflags, int otyp, cred_t *credp)
+{
+ nfp_err ret;
+ register struct nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "entered nfp_open." );
+
+ pdev = (nfp_dev *)ddi_get_soft_state(state_head, getminor(*dev));
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_open: unable to get nfp dev.");
+ return (ENODEV);
+ }
+
+ if( otyp != OTYP_CHR ) {
+ nfp_log( NFP_DBG1, "nfp_open: not opened as character device");
+ return (EINVAL);
+ }
+
+ mutex_enter(&pdev->busy_mutex);
+
+ if (pdev->busy) {
+ mutex_exit(&pdev->busy_mutex);
+ nfp_log( NFP_DBG1, "nfp_open: device busy");
+ return EBUSY;
+ }
+ pdev->busy= 1;
+ mutex_exit(&pdev->busy_mutex);
+
+ /* use oldest possible interface until told otherwise */
+ pdev->ifvers= NFDEV_IF_STANDARD;
+ nfp_log( NFP_DBG3, "nfp_open: setting ifvers %d", pdev->ifvers);
+ pdev->rd_ready= 0; /* drop any old data */
+
+ ret = pdev->cmddev->open(pdev->common.cmdctx);
+ if( ret != NFP_SUCCESS ) {
+ nfp_log( NFP_DBG1, "nfp_open : cmddev->open failed ");
+ return nfp_oserr( ret );
+ }
+
+ nfp_log( NFP_DBG2, "nfp_open: done");
+
+ return 0;
+}
+
+/*--------------------*/
+/* nfp_close */
+/*--------------------*/
+
+/* ARGSUSED */
+static int nfp_close(dev_t dev, int openflags, int otyp, cred_t *credp) {
+ nfp_dev *pdev;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_close: entered");
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, getminor(dev));
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_close: cannot find dev.");
+ return ENODEV;
+ }
+
+ mutex_enter(&pdev->isr_mutex);
+ if(pdev->rd_outstanding) {
+ int lbolt, err;
+ nfp_get_lbolt(&lbolt, err);
+ if(!err)
+ (void) cv_timedwait(&pdev->rd_cv, &pdev->isr_mutex, lbolt + (NFP_TIMEOUT_SEC * drv_usectohz(1000000)) );
+ }
+ mutex_exit(&pdev->isr_mutex);
+ ret = pdev->cmddev->close(pdev->common.cmdctx);
+ if (ret != NFP_SUCCESS ) {
+ nfp_log( NFP_DBG1, " nfp_close : cmddev->close failed");
+ return nfp_oserr( ret );
+ }
+
+ mutex_enter(&pdev->busy_mutex);
+ pdev->busy= 0;
+ mutex_exit(&pdev->busy_mutex);
+
+ return 0;
+}
+
+/****************************************************************************
+
+ nfp driver config
+
+ ****************************************************************************/
+
+/*-------------------------*/
+/* nfp_getinfo */
+/*-------------------------*/
+
+/* ARGSUSED */
+static int nfp_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) {
+ int error;
+ nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "nfp_getinfo: entered" );
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, getminor((dev_t)arg));
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_close: cannot find dev.");
+ return ENODEV;
+ }
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if (pdev == NULL) {
+ *result = NULL;
+ error = DDI_FAILURE;
+ } else {
+ /*
+ * don't need to use a MUTEX even though we are
+ * accessing our instance structure; dev->dip
+ * never changes.
+ */
+ *result = pdev->dip;
+ error = DDI_SUCCESS;
+ }
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)getminor((dev_t)arg);
+ error = DDI_SUCCESS;
+ break;
+ default:
+ *result = NULL;
+ error = DDI_FAILURE;
+ }
+
+ nfp_log( NFP_DBG2, "nfp_getinfo: leaving." );
+ return (error);
+}
+
+/*-------------------------*/
+/* nfp_release */
+/*-------------------------*/
+
+static int nfp_release_dev( dev_info_t *dip ) {
+ nfp_dev *pdev;
+ int instance, i;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_release_dev: entering" );
+
+ instance = ddi_get_instance(dip);
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, instance);
+ if (pdev) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing device" );
+
+ nfp_free_pci_push(pdev);
+
+ if( pdev->cmddev ) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: destroying cmd dev" );
+ ret = pdev->cmddev->destroy(pdev->common.cmdctx);
+ if (ret != NFP_SUCCESS) {
+ nfp_log( NFP_DBG1, " nfp_release_dev : cmddev->destroy failed ");
+ return nfp_oserr( ret );
+ }
+ }
+
+ if(pdev->high_iblock_cookie) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing high and soft irq" );
+ ddi_remove_softintr(pdev->soft_int_id);
+ ddi_remove_intr(pdev->dip, 0, pdev->high_iblock_cookie);
+ mutex_destroy( &pdev->busy_mutex );
+ cv_destroy( &pdev->rd_cv );
+ mutex_destroy( &pdev->isr_mutex );
+ mutex_destroy( &pdev->high_mutex );
+ } else if(pdev->iblock_cookie) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing irq" );
+ ddi_remove_intr(pdev->dip, 0, pdev->iblock_cookie);
+ mutex_destroy( &pdev->busy_mutex );
+ cv_destroy( &pdev->rd_cv );
+ mutex_destroy( &pdev->isr_mutex );
+ }
+ if(pdev->low_iblock_cookie) {
+ ddi_remove_intr(pdev->dip, 0, pdev->low_iblock_cookie);
+ mutex_destroy( &pdev->low_mutex);
+ }
+
+ for(i=0;i<6;i++) {
+ if( pdev->common.extra[i] ) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: unmapping BAR %d", i );
+ ddi_regs_map_free ((ddi_acc_handle_t *)&pdev->common.extra[i]);
+ }
+ }
+
+ ddi_remove_minor_node(dip, NULL);
+
+ if (pdev->conf_handle)
+ pci_config_teardown( &pdev->conf_handle );
+
+ ddi_soft_state_free(state_head, instance);
+ }
+ nfp_log( NFP_DBG2, "nfp_release: finished" );
+
+ return DDI_SUCCESS;
+}
+
+
+/*-------------------------*/
+/* nfp_attach */
+/*-------------------------*/
+
+static int nfp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) {
+ int instance;
+ nfp_dev *pdev = NULL;
+ int intres;
+ uint16_t device, vendor, sub_device, sub_vendor;
+ long *outp;
+ nfpcmd_dev const *cmddev;
+ int index, i;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_attach: entered." );
+
+ if (cmd != DDI_ATTACH) {
+ nfp_log( NFP_DBG1, "nfp_attach: bad command." );
+ goto bailout;
+ }
+
+ instance = ddi_get_instance(dip);
+
+ if (ddi_soft_state_zalloc(state_head, instance) != 0) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_soft_state_zalloc() failed." );
+ goto bailout;
+ }
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, instance);
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_attach: cannot find dev.");
+ return ENODEV;
+ }
+ pdev->dip = dip;
+
+ /* map in pci config registers */
+ if (pci_config_setup(dip, &pdev->conf_handle)) {
+ nfp_log( NFP_DBG1, "nfp_attach: pci_config_setup() failed." );
+ goto bailout;
+ }
+
+ /* find out what we have got */
+ vendor= PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_VENID );
+ device = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_DEVID );
+ sub_vendor = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_SUBVENID );
+ sub_device = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_SUBSYSID );
+
+ index= 0;
+ while( (cmddev = nfp_drvlist[index++]) != NULL ) {
+ if( cmddev->vendorid == vendor &&
+ cmddev->deviceid == device &&
+ cmddev->sub_vendorid == sub_vendor &&
+ cmddev->sub_deviceid == sub_device )
+ break;
+ }
+ if( !cmddev ) {
+ nfp_log( NFP_DBG1, "nfp_attach: unknonw device." );
+ goto bailout;
+ }
+
+ /* map BARs */
+ for( i=0; i<6; i++ ) {
+ if( cmddev->bar_sizes[i] ) {
+ off_t size;
+ if( ddi_dev_regsize(dip, i+1, &size) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_dev_regsize() failed for BAR %d", i );
+ goto bailout;
+ }
+ if( size < (cmddev->bar_sizes[i] & ~NFP_MEMBAR_MASK) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: BAR %d too small %x (%x)", i, size, (cmddev->bar_sizes[i] & ~0xF) );
+ goto bailout;
+ }
+ if (ddi_regs_map_setup(dip, i+1, (caddr_t *)&pdev->common.bar[i],
+ 0, cmddev->bar_sizes[i] & ~NFP_MEMBAR_MASK, &nosw_attr, (ddi_acc_handle_t *)&pdev->common.extra[i] )) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_regs_map_setup() failed for BAR %d", i );
+ goto bailout;
+ }
+ nfp_log( NFP_DBG3, "nfp_attach: BAR[%d] mapped to %x (%x)", i, pdev->common.bar[i], size );
+ }
+ }
+
+ pdev->read_buf = NULL;
+ pdev->rd_dma_ok = 0;
+
+ /* attach to minor node */
+ if (ddi_create_minor_node(dip, "nfp", S_IFCHR, instance, (char *)cmddev->name, 0) == DDI_FAILURE) {
+ ddi_remove_minor_node(dip, NULL);
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_create_minor_node() failed." );
+ goto bailout;
+ }
+
+ pdev->wr_ready = 1;
+ pdev->rd_ready = 0;
+ pdev->rd_pending = 0;
+ pdev->rd_outstanding = 0;
+ pdev->busy=0;
+ pdev->cmddev= cmddev;
+
+ ret = pdev->cmddev->create(&pdev->common);
+ if( ret != NFP_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: failed to create command device");
+ goto bailout;
+ }
+ pdev->common.dev= pdev;
+
+ if (ddi_intr_hilevel(dip, 0) != 0){
+ nfp_log( NFP_DBG2, "nfp_attach: high-level interrupt");
+ if( ddi_get_iblock_cookie(dip, 0, &pdev->high_iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(high) failed." );
+ goto bailout;
+ }
+ if( ddi_get_iblock_cookie(dip, 0, &pdev->low_iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(low) failed." );
+ goto bailout;
+ }
+ mutex_init(&pdev->high_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->high_iblock_cookie);
+ mutex_init(&pdev->low_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->low_iblock_cookie);
+ if (ddi_add_intr(dip, 0, NULL,
+ NULL, nfp_isr,
+ (caddr_t)pdev) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_add_intr(high) failed." );
+ goto bailout;
+ }
+ if( ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_HIGH,
+ &pdev->iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(soft) failed." );
+ goto bailout;
+ }
+ mutex_init(&pdev->isr_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->iblock_cookie);
+ if (ddi_add_softintr(dip, DDI_SOFTINT_HIGH, &pdev->soft_int_id,
+ &pdev->iblock_cookie, NULL,
+ nfp_soft_isr, (caddr_t)pdev) != DDI_SUCCESS)
+ goto bailout;
+ pdev->high_intr= 1;
+ } else {
+ nfp_log( NFP_DBG2, "nfp_attach: low-level interrupt");
+
+ if (ddi_get_iblock_cookie (dip, 0, &pdev->iblock_cookie)) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie() failed." );
+ goto bailout;
+ }
+
+ mutex_init(&pdev->isr_mutex, "nfp isr mutex", MUTEX_DRIVER, (void *)pdev->iblock_cookie);
+
+ if (ddi_add_intr(dip, 0, NULL,
+ (ddi_idevice_cookie_t *)NULL, nfp_isr,
+ (caddr_t)pdev) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_add_intr() failed." );
+ goto bailout;
+ }
+ }
+ mutex_init(&pdev->busy_mutex, "nfp busy mutex", MUTEX_DRIVER, NULL );
+ cv_init(&pdev->rd_cv, "nfp read condvar", CV_DRIVER, NULL );
+
+ /* get our bus and slot num */
+ if (ddi_getlongprop (DDI_DEV_T_NONE,
+ pdev->dip, 0, "reg",
+ (caddr_t)&outp, &intres) != DDI_PROP_NOT_FOUND) {
+ nfp_log( NFP_DBG2, "nfp_attach: ddi_getlongprop('reg') ok." );
+ if( intres > 0 ) {
+ nfp_log( NFP_DBG1, "nfp_attach: found PCI nfast bus %x slot %x.",
+ ((*outp)>>16) & 0xff, ((*outp)>>11) & 0x1f );
+ }
+ }
+
+ nfp_log( NFP_DBG2, "nfp_attach: attach succeeded." );
+ return DDI_SUCCESS;
+
+bailout:
+ (void) nfp_release_dev( dip );
+
+ return DDI_FAILURE;
+}
+
+/*-------------------------*/
+/* nfp_detach */
+/*-------------------------*/
+
+/*
+ * When our driver is unloaded, nfp_detach cleans up and frees the resources
+ * we allocated in nfp_attach.
+ */
+static int nfp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) {
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ (void) nfp_release_dev(dip);
+
+ return (DDI_SUCCESS);
+}
+
+/*-------------------------*/
+/* _init */
+/*-------------------------*/
+
+int _init(void) {
+ register int error;
+
+ nfp_log( NFP_DBG2, "_init: entered" );
+
+ if ((error = ddi_soft_state_init(&state_head, sizeof (struct nfp_dev), 1)) != 0) {
+ nfp_log( NFP_DBG1, "_init: soft_state_init() failed" );
+ return (error);
+ }
+
+ if ((error = mod_install(&modlinkage)) != 0) {
+ nfp_log( NFP_DBG1, "_init: mod_install() failed" );
+ ddi_soft_state_fini(&state_head);
+ }
+
+ nfp_log( NFP_DBG2, "_init: leaving" );
+ return (error);
+}
+
+/*-------------------------*/
+/* _info */
+/*-------------------------*/
+
+int _info(struct modinfo *modinfop) {
+ nfp_log( NFP_DBG2, "_info: entered" );
+
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*-------------------------*/
+/* _fini */
+/*-------------------------*/
+
+int _fini(void) {
+ int status;
+
+ nfp_log( NFP_DBG2, "_fini: entered" );
+
+ if ((status = mod_remove(&modlinkage)) != 0) {
+ nfp_log( NFP_DBG2, "_fini: mod_remove() failed." );
+ return (status);
+ }
+
+ ddi_soft_state_fini(&state_head);
+
+ nfp_log( NFP_DBG2, "_fini: leaving" );
+
+ return (status);
+}
+
diff --git a/usr/src/uts/common/io/nfp/i21285.c b/usr/src/uts/common/io/nfp/i21285.c
new file mode 100644
index 0000000000..f51a09188d
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21285.c
@@ -0,0 +1,310 @@
+/*
+
+i21285.c: nCipher PCI HSM intel/digital 21285 command driver
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+
+history
+
+09/10/2001 jsh Original
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "i21285.h"
+#include "nfp_cmd.h"
+#include "nfpci.h"
+
+/* create ------------------------------------------------------- */
+
+static nfp_err i21285_create( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_create: entered");
+ pdev->cmdctx= pdev; /* set our context to just be a pointer to our nfp_cdev */
+
+ nfp_log( NFP_DBG2, "i21285_create: enable doorbell");
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_create: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, DOORBELL_ENABLE | POSTLIST_ENABLE);
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_INTERRUPT_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* stop ------------------------------------------------------- */
+
+static nfp_err i21285_destroy( void * ctx ) {
+ nfp_cdev *pdev;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_destroy: entered");
+
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21285_destroy: NULL pdev");
+ return NFP_ENODEV;
+ }
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_destroy: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, DOORBELL_DISABLE | POSTLIST_DISABLE );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_INTERRUPT_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* open ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_open( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21285_open: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* close ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_close( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21285_close: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* isr ------------------------------------------------------- */
+
+static nfp_err i21285_isr( void *ctx, int *handled ) {
+ nfp_cdev *pdev;
+ unsigned int doorbell;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG3, "i21285_isr: entered");
+
+ *handled= 0;
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21285_isr: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ doorbell= nfp_inl( pdev, IOBAR, I21285_OFFSET_DOORBELL);
+ doorbell= FROM_LE32_IO(&doorbell) & 0xffff;
+ while( doorbell && doorbell != 0xffff) {
+ *handled= 1;
+ /* service interrupts */
+ if( doorbell & (NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ TO_LE32_IO( &tmp32, NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED);
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log(NFP_DBG2, "i21285_isr: write done interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+
+ nfp_write_complete(pdev->dev, doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+ }
+
+ if( doorbell & (NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED)) {
+ TO_LE32_IO( &tmp32, NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log(NFP_DBG2, "i21285_isr: read ack interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0 );
+ nfp_read_complete( pdev->dev, doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0);
+ }
+
+ if( doorbell & ~(NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED |
+ NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ nfp_log( NFP_DBG1, "i21285_isr: unexpected interrupt %x", doorbell );
+ TO_LE32_IO( &tmp32, 0xffff & doorbell );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+ }
+ doorbell= nfp_inl( pdev, IOBAR, I21285_OFFSET_DOORBELL);
+ doorbell= FROM_LE32_IO(&doorbell) & 0xffff;
+ }
+ return 0;
+}
+
+/* write ------------------------------------------------------- */
+
+static nfp_err i21285_write( const char *block, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ nfp_err ne;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_write: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_write: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ nfp_log(NFP_DBG2, "i21285_write: pdev->bar[ MEMBAR ]= %x\n", cdev->bar[ MEMBAR ]);
+ nfp_log(NFP_DBG2, "i21285_write: pdev->bar[ IOBAR ]= %x\n", cdev->bar[ IOBAR ]);
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_write: null BAR[%d]", MEMBAR );
+ return NFP_ENOMEM;
+ }
+ ne= nfp_copy_from_user_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_DATA, block, len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_from_user_to_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_WR_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM( &tmp32, len );
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21285_write: length not written");
+ return NFP_EIO;
+ }
+
+ TO_LE32_IO( &tmp32, NFAST_INT_HOST_WRITE_REQUEST);
+
+ nfp_outl( cdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log( NFP_DBG2, "i21285_write: done");
+ return NFP_SUCCESS;
+}
+
+/* read ------------------------------------------------------- */
+
+static nfp_err i21285_read( char *block, int len, void *ctx, int *rcount) {
+ nfp_cdev *cdev;
+ nfp_err ne;
+ int count;
+
+ nfp_log( NFP_DBG2, "i21285_read: entered, len %d", len);
+ *rcount= 0;
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_read: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_read: null BAR[%d]", MEMBAR );
+ return NFP_ENOMEM;
+ }
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)&count, 4);
+ if(ne) {
+ nfp_log( NFP_DBG1, "i21285_read: nfp_copy_from_dev failed.");
+ return ne;
+ }
+ count= FROM_LE32_MEM(&count);
+ if(count<0 || count>len) {
+ nfp_log( NFP_DBG1, "i21285_read: bad byte count (%d) from device", count);
+ return NFP_EIO;
+ }
+ ne= nfp_copy_to_user_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_DATA, block, count);
+ if( ne ) {
+ nfp_log( NFP_DBG1, "i21285_read: nfp_copy_to_user_from_dev failed.");
+ return ne;
+ }
+ nfp_log( NFP_DBG2, "i21285_read: done");
+ *rcount= count;
+ return NFP_SUCCESS;
+}
+
+/* chupdate ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_chupdate( char *data, int len, void *ctx ) {
+ nfp_log( NFP_DBG1, "i21285_chupdate: NYI");
+ return NFP_SUCCESS;
+}
+
+/* ensure reading -------------------------------------------------- */
+
+static nfp_err i21285_ensure_reading( unsigned int addr, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ unsigned int tmp32;
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "i21285_ensure_reading: entered");
+
+ if(addr) {
+ nfp_log( NFP_DBG2, "i21285_ensure_reading: bad addr");
+ return -NFP_EINVAL;
+ }
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: null BAR[%d]", MEMBAR );
+ return NFP_ENXIO;
+ }
+ nfp_log( NFP_DBG3, "i21285_ensure_reading: pdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21285_ensure_reading: pdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ TO_LE32_MEM( &hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM( &hdr[1], len);
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_RD_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: nfp_copy_to_dev failed");
+ return ne;
+ }
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: nfp_copy_from_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM( &tmp32, len );
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: len not written");
+ return NFP_EIO;
+ };
+ TO_LE32_IO( &tmp32, NFAST_INT_HOST_READ_REQUEST );
+ nfp_outl( cdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* command device structure ------------------------------------- */
+
+
+const nfpcmd_dev i21285_cmddev = {
+ "nCipher Gen 1 PCI",
+ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285,
+ PCI_VENDOR_ID_NCIPHER, PCI_DEVICE_ID_NFAST_GEN1,
+ { 0, IOSIZE | PCI_BASE_ADDRESS_SPACE_IO, NFPCI_RAM_MINSIZE, 0, 0, 0 },
+ NFP_CMD_FLG_NEED_IOBUF,
+ i21285_create,
+ i21285_destroy,
+ i21285_open,
+ i21285_close,
+ i21285_isr,
+ i21285_write,
+ i21285_read,
+ i21285_chupdate,
+ i21285_ensure_reading,
+ 0, /* no debug */
+};
+
diff --git a/usr/src/uts/common/io/nfp/i21285.h b/usr/src/uts/common/io/nfp/i21285.h
new file mode 100644
index 0000000000..4ea1d853ec
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21285.h
@@ -0,0 +1,43 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef NFP_I21285_H
+#define NFP_I21285_H
+
+#ifndef PCI_VENDOR_ID_DEC
+#define PCI_VENDOR_ID_DEC 0x1011
+#endif
+#ifndef PCI_DEVICE_ID_DEC_21285
+#define PCI_DEVICE_ID_DEC_21285 0x1065
+#endif
+#ifndef PCI_VENDOR_ID_NCIPHER
+#define PCI_VENDOR_ID_NCIPHER 0x0100
+#endif
+
+#ifndef PCI_DEVICE_ID_NFAST_GEN1
+#define PCI_DEVICE_ID_NFAST_GEN1 0x0100
+#endif
+
+#define I21285_OFFSET_DOORBELL 0x60
+#define I21285_OFFSET_INTERRUPT_MASK 0x34
+
+#define DOORBELL_ENABLE 0x0
+#define DOORBELL_DISABLE 0x4
+
+#define POSTLIST_ENABLE 0x0
+#define POSTLIST_DISABLE 0x8
+
+#define IOBAR 1
+#define MEMBAR 2
+
+#define IOSIZE 0x80
+#define MEMSIZE 0x100000
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/i21555.c b/usr/src/uts/common/io/nfp/i21555.c
new file mode 100644
index 0000000000..82024dc800
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555.c
@@ -0,0 +1,423 @@
+/*
+
+i21555.c: nCipher PCI HSM intel 21555 command driver
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+09/10/2001 jsh Original
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "i21555.h"
+#include "nfp_cmd.h"
+#include "nfpci.h"
+
+/* started ------------------------------------------------------
+ *
+ * Check that device is ready to talk, by checking that
+ * the i21555 has master enabled on its secondary interface
+ */
+
+static nfp_err i21555_started( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+#ifdef CONFIGSPACE_DEBUG
+ unsigned int reg32[64];
+ int i;
+#endif
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "i21555_started: entered");
+
+#ifdef CONFIGSPACE_DEBUG
+ /* Suck up all the registers */
+ for (i=0; i < 64; i++) {
+ ne = nfp_config_inl( pdev, i*4, &reg32[i] );
+ }
+
+ for (i=0; i < 16; i++) {
+ int j = i * 4;
+ nfp_log( NFP_DBG3, "i21555 config reg %2x: %08x %08x %08x %08x", j*4,
+ reg32[j], reg32[j+1], reg32[j+2], reg32[j+3]);
+ }
+#endif
+
+ ne = nfp_config_inl( pdev, I21555_CFG_SEC_CMD_STATUS, &tmp32 );
+ if (ne) {
+ /* succeed if PCI config reads are not implemented */
+ if (ne == NFP_EUNKNOWN)
+ return NFP_SUCCESS;
+ nfp_log( NFP_DBG1, "i21555_started: nfp_config_inl failed");
+ return ne;
+ }
+
+ tmp32= FROM_LE32_IO(&tmp32) & 0xffff;
+
+ if ( tmp32 & CFG_CMD_MASTER ) {
+ nfp_log( NFP_DBG3, "i21555_started: Yes %x", tmp32);
+ return NFP_SUCCESS;
+ } else {
+ nfp_log( NFP_DBG1, "i21555_started: device not started yet %x", tmp32);
+ return NFP_ESTARTING;
+ }
+}
+
+/* create ------------------------------------------------------- */
+
+static nfp_err i21555_create( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_create: entered");
+ pdev->cmdctx= pdev; /* set our context to just be a pointer to our nfp_cdev */
+
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_create: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ nfp_log( NFP_DBG2, "i21555_create: enable doorbell");
+ TO_LE32_IO( &tmp32, I21555_DOORBELL_PRI_ENABLE );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET_MASK, tmp32 );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK, tmp32 );
+ return NFP_SUCCESS;
+}
+
+/* stop ------------------------------------------------------- */
+
+static nfp_err i21555_destroy( void * ctx ) {
+ nfp_cdev *pdev;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_destroy: entered");
+
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21555_destroy: NULL pdev");
+ return NFP_ENODEV;
+ }
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_destroy: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, I21555_DOORBELL_PRI_DISABLE );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET_MASK, tmp32 );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* open ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_open( void * ctx ) {
+
+ nfp_log( NFP_DBG2, "i21555_open: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* close ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_close( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21555_close: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* isr ------------------------------------------------------- */
+
+static nfp_err i21555_isr( void *ctx, int *handled ) {
+ nfp_cdev *pdev;
+ nfp_err ne;
+ unsigned short doorbell;
+ unsigned short tmp16;
+
+ nfp_log( NFP_DBG3, "i21555_isr: entered");
+
+ *handled= 0;
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21555_isr: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ pdev->stats.isr++;
+
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_isr: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ /* This interrupt may not be from our module, so check that it actually is
+ * us before handling it.
+ */
+ ne = i21555_started( pdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_isr: i21555_started failed");
+ }
+ return ne;
+ }
+
+ doorbell= nfp_inw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET);
+ doorbell= FROM_LE16_IO(&doorbell);
+ while( doorbell && doorbell != 0xffff) {
+ *handled= 1;
+ /* service interrupts */
+ if( doorbell & (NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ pdev->stats.isr_write++;
+ TO_LE16_IO(&tmp16,NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+
+ nfp_log( NFP_DBG2, "i21555_isr: write done interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+
+ nfp_write_complete(pdev->dev, doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+ }
+
+ if( doorbell & (NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED)) {
+ pdev->stats.isr_read++;
+ TO_LE16_IO(&tmp16,NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+
+ nfp_log( NFP_DBG2, "i21555_isr: read ack interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0 );
+ nfp_read_complete( pdev->dev, doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0);
+ }
+
+ if( doorbell & ~(NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED |
+ NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ TO_LE16_IO(&tmp16,doorbell);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+ nfp_log( NFP_DBG1, "i21555_isr: unexpected interrupt %x", doorbell );
+ }
+ doorbell= nfp_inw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET);
+ doorbell= FROM_LE16_IO(&doorbell);
+ }
+ nfp_log( NFP_DBG3, "i21555_isr: exiting");
+ return 0;
+}
+
+/* write ------------------------------------------------------- */
+
+static nfp_err i21555_write( const char *block, int len, void *ctx) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ nfp_err ne;
+ unsigned short tmp16;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_write: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_write: NULL cdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.write_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_write: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne = i21555_started( cdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_write: i21555_started failed");
+ }
+ return ne;
+ }
+
+ nfp_log( NFP_DBG3, "i21555_write: cdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21555_write: cdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ nfp_log( NFP_DBG3, "i21555_write: block len %d", len );
+ ne= nfp_copy_from_user_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_DATA, block, len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_from_user_to_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_WR_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM(&tmp32, len);
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21555_write: length not written");
+ return NFP_EIO;
+ }
+ TO_LE16_IO(&tmp16, NFAST_INT_HOST_WRITE_REQUEST >> 16);
+ nfp_outw( cdev, IOBAR, I21555_OFFSET_DOORBELL_SEC_SET, tmp16);
+
+ cdev->stats.write_fail--;
+ cdev->stats.write_block++;
+ cdev->stats.write_byte += len;
+
+ nfp_log( NFP_DBG2, "i21555_write: done");
+ return NFP_SUCCESS;
+}
+
+/* read ------------------------------------------------------- */
+
+static nfp_err i21555_read( char *block, int len, void *ctx, int *rcount) {
+ nfp_cdev *cdev;
+ nfp_err ne;
+ int count;
+
+ nfp_log( NFP_DBG2, "i21555_read: entered");
+ *rcount= 0;
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_read: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.read_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_read: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)&count, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_read: nfp_copy_from_dev failed.");
+ return ne;
+ }
+ count= FROM_LE32_MEM(&count);
+ if(count<0 || count>len) {
+ nfp_log( NFP_DBG1, "i21555_read: bad byte count (%d) from device", count);
+ return NFP_EIO;
+ }
+ ne= nfp_copy_to_user_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_DATA, block, count);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_read: nfp_copy_to_user failed.");
+ return ne;
+ }
+ nfp_log( NFP_DBG2, "i21555_read: done");
+ *rcount= count;
+ cdev->stats.read_fail--;
+ cdev->stats.read_block++;
+ cdev->stats.read_byte += len;
+ return NFP_SUCCESS;
+}
+
+/* chupdate ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_chupdate( char *data, int len, void *ctx ) {
+ nfp_log( NFP_DBG1, "i21555_chupdate: NYI");
+ return NFP_SUCCESS;
+}
+
+/* ensure reading -------------------------------------------------- */
+
+static nfp_err i21555_ensure_reading( unsigned int addr, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[3];
+ unsigned short tmp16;
+ unsigned int tmp32;
+ nfp_err ne;
+ int hdr_len;
+
+ nfp_log( NFP_DBG2, "i21555_ensure_reading: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.ensure_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne = i21555_started( cdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: i21555_started failed");
+ }
+ return ne;
+ }
+
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: pdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: pdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ if(addr) {
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: new format, addr %x", addr);
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL_PCI_PUSH);
+ TO_LE32_MEM(&hdr[1], len);
+ TO_LE32_MEM(&hdr[2], addr);
+ hdr_len= 12;
+ } else {
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+ hdr_len= 8;
+ }
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_RD_CONTROL, (const char *)hdr, hdr_len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM(&tmp32, len);
+
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: len not written");
+ return NFP_EIO;
+ }
+ TO_LE16_IO( &tmp16, NFAST_INT_HOST_READ_REQUEST >> 16);
+ nfp_outw( cdev, IOBAR, I21555_OFFSET_DOORBELL_SEC_SET, tmp16);
+
+ cdev->stats.ensure_fail--;
+ cdev->stats.ensure++;
+
+ return NFP_SUCCESS;
+}
+
+/* command device structure ------------------------------------- */
+
+const nfpcmd_dev i21555_cmddev = {
+ "nCipher Gen 2 PCI",
+ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_21555,
+ PCI_VENDOR_ID_NCIPHER, PCI_SUBSYSTEM_ID_NFAST_REV1,
+ { 0, IOSIZE | PCI_BASE_ADDRESS_SPACE_IO, NFPCI_RAM_MINSIZE_JOBS, 0, 0, 0 },
+ NFP_CMD_FLG_NEED_IOBUF,
+ i21555_create,
+ i21555_destroy,
+ i21555_open,
+ i21555_close,
+ i21555_isr,
+ i21555_write,
+ i21555_read,
+ i21555_chupdate,
+ i21555_ensure_reading,
+ i21555_debug,
+};
diff --git a/usr/src/uts/common/io/nfp/i21555.h b/usr/src/uts/common/io/nfp/i21555.h
new file mode 100644
index 0000000000..d8f3965938
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555.h
@@ -0,0 +1,51 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef I21555_H
+#define I21555_H
+
+#ifndef PCI_VENDOR_ID_INTEL
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef PCI_DEVICE_ID_INTEL_21555
+#define PCI_DEVICE_ID_INTEL_21555 0xb555
+#endif
+
+#ifndef PCI_VENDOR_ID_NCIPHER
+#define PCI_VENDOR_ID_NCIPHER 0x0100
+#endif
+
+#ifndef PCI_SUBSYSTEM_ID_NFAST_REV1
+#define PCI_SUBSYSTEM_ID_NFAST_REV1 0x0100
+#endif
+
+#define I21555_OFFSET_DOORBELL_PRI_SET 0x9C
+#define I21555_OFFSET_DOORBELL_SEC_SET 0x9E
+#define I21555_OFFSET_DOORBELL_PRI_CLEAR 0x98
+
+#define I21555_OFFSET_DOORBELL_PRI_SET_MASK 0xA4
+#define I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK 0xA0
+
+#define I21555_DOORBELL_PRI_ENABLE 0x0000
+#define I21555_DOORBELL_PRI_DISABLE 0xFFFF
+
+#define I21555_CFG_SEC_CMD_STATUS 0x44
+
+#define CFG_CMD_MASTER 0x0004
+
+#define IOBAR 1
+#define MEMBAR 2
+
+#define IOSIZE 0x100
+
+extern nfp_err i21555_debug( int cmd, void *ctx );
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/i21555d.c b/usr/src/uts/common/io/nfp/i21555d.c
new file mode 100644
index 0000000000..183ace8275
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555d.c
@@ -0,0 +1,28 @@
+/*
+
+i21555d.c: nCipher PCI HSM intel 21555 debug ioctl
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+
+history
+
+15/05/2002 jsh Original, does nothing
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_osif.h"
+#include "i21555.h"
+
+/* ARGSUSED */
+nfp_err i21555_debug( int cmd, void *ctx) {
+ nfp_log( NFP_DBG1, "i21555_debug: entered");
+
+ return NFP_EUNKNOWN;
+}
diff --git a/usr/src/uts/common/io/nfp/nfdev-common.h b/usr/src/uts/common/io/nfp/nfdev-common.h
new file mode 100644
index 0000000000..8a97bf2c63
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfdev-common.h
@@ -0,0 +1,141 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+/** \file nfdev-common.h
+ *
+ * \brief nFast device driver (not generic SCSI) ioctl struct definition file
+ * include NFDEV-$(system) for ioctl number definitions
+ *
+ * 1998.07.13 jsh Started
+ *
+ *
+ */
+
+#ifndef NFDEV_COMMON_H
+#define NFDEV_COMMON_H
+
+/**
+ * Result of the ENQUIRY ioctl.
+ */
+typedef struct nfdev_enquiry_str {
+ unsigned int busno; /**< Which bus is the PCI device on. */
+ unsigned char slotno; /**< Which slot is the PCI device in. */
+ unsigned char reserved[3]; /**< for consistant struct alignment */
+} nfdev_enquiry_str;
+
+/**
+ * Result of the STATS ioctl.
+ */
+typedef struct nfdev_stats_str {
+ unsigned long isr; /**< Count interrupts. */
+ unsigned long isr_read; /**< Count read interrupts. */
+ unsigned long isr_write; /**< Count write interrupts. */
+ unsigned long write_fail; /**< Count write failures. */
+ unsigned long write_block; /**< Count blocks written. */
+ unsigned long write_byte; /**< Count bytes written. */
+ unsigned long read_fail; /**< Count read failures. */
+ unsigned long read_block; /**< Count blocks read. */
+ unsigned long read_byte; /**< Count bytes read. */
+ unsigned long ensure_fail; /**< Count read request failures. */
+ unsigned long ensure; /**< Count read requests. */
+} nfdev_stats_str;
+
+/**
+ * Input to the CONTROL ioctl.
+ */
+typedef struct nfdev_control_str {
+ unsigned control; /**< Control flags. */
+} nfdev_control_str;
+
+/** Control bit indicating host supports MOI control */
+#define NFDEV_CONTROL_HOST_MOI 0x0001
+
+/** Index of control bits indicating desired mode
+ *
+ * Desired mode follows the M_ModuleMode enumeration.
+ */
+#define NFDEV_CONTROL_MODE_SHIFT 1
+
+/** Detect a backwards-compatible control value
+ *
+ * Returns true if the request control value "makes no difference", i.e.
+ * and the failure of an attempt to set it is therefore uninteresting.
+ */
+#define NFDEV_CONTROL_HARMLESS(c) ((c) <= 1)
+
+/**
+ * Result of the STATUS ioctl.
+ */
+typedef struct nfdev_status_str {
+ unsigned status; /**< Status flags. */
+ char error[8]; /**< Error string. */
+} nfdev_status_str;
+
+/** Monitor firmware supports MOI control and error reporting */
+#define NFDEV_STATUS_MONITOR_MOI 0x0001
+
+/** Application firmware supports MOI control and error reporting */
+#define NFDEV_STATUS_APPLICATION_MOI 0x0002
+
+/** Application firmware running and supports error reporting */
+#define NFDEV_STATUS_APPLICATION_RUNNING 0x0004
+
+/** HSM failed
+ *
+ * Consult error[] for additional information.
+ */
+#define NFDEV_STATUS_FAILED 0x0008
+
+/** Standard PCI interface. */
+#define NFDEV_IF_STANDARD 0x01
+
+/** PCI interface with results pushed from device
+ * via DMA.
+ */
+#define NFDEV_IF_PCI_PUSH 0x02
+
+/* platform independant base ioctl numbers */
+
+/** Enquiry ioctl.
+ * \return nfdev_enquiry_str describing the attached device. */
+#define NFDEV_IOCTL_NUM_ENQUIRY 0x01
+/** Channel Update ioctl.
+ * \deprecated */
+#define NFDEV_IOCTL_NUM_CHUPDATE 0x02
+/** Ensure Reading ioctl.
+ * Signal a read request to the device.
+ * \param (unsigned int) Length of data to be read.
+ */
+#define NFDEV_IOCTL_NUM_ENSUREREADING 0x03
+/** Device Count ioctl.
+ * Not implemented for on all platforms.
+ * \return (int) the number of attached devices. */
+#define NFDEV_IOCTL_NUM_DEVCOUNT 0x04
+/** Internal Debug ioctl.
+ * Not implemented in release drivers. */
+#define NFDEV_IOCTL_NUM_DEBUG 0x05
+/** PCI Interface Version ioctl.
+ * \param (int) Maximum PCI interface version
+ * supported by the user of the device. */
+#define NFDEV_IOCTL_NUM_PCI_IFVERS 0x06
+/** Statistics ioctl.
+ * \return nfdev_enquiry_str describing the attached device. */
+#define NFDEV_IOCTL_NUM_STATS 0x07
+
+/** Module control ioctl
+ * \param (nfdev_control_str) Value to write to HSM control register
+ */
+#define NFDEV_IOCTL_NUM_CONTROL 0x08
+
+/** Module state ioctl
+ * \return (nfdev_status_str) Values read from HSM status/error registers
+ */
+#define NFDEV_IOCTL_NUM_STATUS 0x09
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfdev-solaris.h b/usr/src/uts/common/io/nfp/nfdev-solaris.h
new file mode 100644
index 0000000000..923b902e46
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfdev-solaris.h
@@ -0,0 +1,37 @@
+/*
+
+nfdev-solaris.h: nFast solaris specific device ioctl interface.
+
+(C) Copyright nCipher Corporation Ltd 1998-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+14/07/1998 jsh Original
+
+*/
+
+#ifndef NFDEV_SOLARIS_H
+#define NFDEV_SOLARIS_H
+
+#include "nfdev-common.h"
+
+#define NFDEV_IOCTL_TYPE ('n'<<8)
+
+#define NFDEV_IOCTL_ENQUIRY ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_ENQUIRY )
+#define NFDEV_IOCTL_ENSUREREADING ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_ENSUREREADING )
+#define NFDEV_IOCTL_DEVCOUNT ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_DEVCOUNT )
+#define NFDEV_IOCTL_DEBUG ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_DEBUG )
+#define NFDEV_IOCTL_PCI_IFVERS ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_PCI_IFVERS )
+#define NFDEV_IOCTL_STATS ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_STATS )
+
+#endif /* NFDEV_SOLARIS_H */
diff --git a/usr/src/uts/common/io/nfp/nfp.h b/usr/src/uts/common/io/nfp/nfp.h
new file mode 100644
index 0000000000..9704f04fbc
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp.h
@@ -0,0 +1,113 @@
+/*
+
+nfp.h: nFast PCI driver for Solaris 2.5, 2.6 and 2.7
+
+(C) Copyright nCipher Corporation Ltd 2001-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+06/05/1998 jsh Original solaris 2.6
+21/05/1999 jsh added support for solaris 2.5
+10/06/1999 jsh added support for solaris 2.7 (32 and 64 bit)
+16/10/2001 jsh moved from nfast to new structure in nfdrv
+
+*/
+
+#ifndef NFP_H
+#define NFP_H
+
+#ifndef _KERNEL
+#error Hello? this is a driver, please compile with -D_KERNEL
+#endif
+
+#if ( CH_KERNELVER < 260 )
+typedef int ioctlptr_t;
+typedef unsigned short uint16_t;
+#define DDI_GET32 ddi_getl
+#define DDI_PUT32 ddi_putl
+#define DDI_GET16 ddi_getw
+#define DDI_PUT16 ddi_putw
+#define DDI_REP_GET8 ddi_rep_getb
+#define DDI_REP_PUT8 ddi_rep_putb
+#define DDI_REP_GET32 ddi_rep_getl
+#define DDI_REP_PUT32 ddi_rep_putl
+#define PCI_CONFIG_GET16 pci_config_getw
+#else /* ( CH_KERNELVER >= 260 ) */
+typedef intptr_t ioctlptr_t;
+#define DDI_GET32 ddi_get32
+#define DDI_PUT32 ddi_put32
+#define DDI_GET16 ddi_get16
+#define DDI_PUT16 ddi_put16
+#define DDI_REP_GET8 ddi_rep_get8
+#define DDI_REP_PUT8 ddi_rep_put8
+#define DDI_REP_GET32 ddi_rep_get32
+#define DDI_REP_PUT32 ddi_rep_put32
+#define PCI_CONFIG_GET16 pci_config_get16
+#endif
+
+#if ( CH_KERNELVER < 270 )
+typedef int nfp_timeout_t;
+#define EXTRA_CB_FLAGS 0
+#define VSXPRINTF(s, n, format, ap) vsprintf (s, format, ap)
+#else /* ( CH_KERNELVER >= 270 ) */
+typedef timeout_id_t nfp_timeout_t;
+#define EXTRA_CB_FLAGS D_64BIT
+#define VSXPRINTF(s, n, format, ap) vsnprintf(s, n, format, ap)
+#endif
+
+typedef struct nfp_dev {
+ int rd_ok;
+ int wr_ok;
+
+ int ifvers;
+
+ /* for PCI push read interface */
+ unsigned char *read_buf;
+ ddi_dma_handle_t read_dma_handle;
+ ddi_dma_cookie_t read_dma_cookie;
+
+ ddi_acc_handle_t acchandle;
+
+ int rd_dma_ok;
+
+ nfp_timeout_t wrtimeout;
+ nfp_timeout_t rdtimeout;
+
+ struct buf *wr_bp;
+ int wr_ready;
+ int rd_ready;
+ int rd_pending;
+ int rd_outstanding;
+ kcondvar_t rd_cv;
+
+ struct pollhead pollhead;
+ dev_info_t *dip;
+
+ ddi_iblock_cookie_t high_iblock_cookie; /* for mutex */
+ ddi_iblock_cookie_t low_iblock_cookie; /* for mutex */
+ kmutex_t high_mutex;
+ kmutex_t low_mutex;
+ int high_intr;
+ ddi_softintr_t soft_int_id;
+ int high_read;
+ int high_write;
+
+ ddi_iblock_cookie_t iblock_cookie; /* for mutex */
+ kmutex_t isr_mutex;
+
+ kmutex_t busy_mutex;
+ int busy;
+
+ ddi_acc_handle_t conf_handle;
+
+ nfp_cdev common;
+ const nfpcmd_dev *cmddev;
+} nfp_dev;
+
+extern struct nfp_dev *nfp_dev_list[];
+
+#endif /* NFP_H */
diff --git a/usr/src/uts/common/io/nfp/nfp_cmd.h b/usr/src/uts/common/io/nfp/nfp_cmd.h
new file mode 100644
index 0000000000..db8af0b2f9
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_cmd.h
@@ -0,0 +1,68 @@
+/*
+
+nfp_cmd.h: nCipher PCI HSM command driver decalrations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFPCMD_H
+#define NFPCMD_H
+
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+
+/* read and write called with userspace buffer */
+
+typedef struct nfpcmd_dev {
+ const char *name;
+ unsigned short vendorid, deviceid,
+ sub_vendorid, sub_deviceid;
+ unsigned int bar_sizes[6]; /* includes IO bit */
+ unsigned int flags;
+ nfp_err (*create)(struct nfp_cdev *pdev);
+ nfp_err (*destroy)(void * ctx);
+ nfp_err (*open)(void * ctx);
+ nfp_err (*close)(void * ctx);
+ nfp_err (*isr)(void *ctx, int *handled);
+ nfp_err (*write_block)( const char *ublock, int len, void *ctx );
+ nfp_err (*read_block)( char *ublock, int len, void *ctx, int *rcount);
+ nfp_err (*channel_update)( char *data, int len, void *ctx);
+ nfp_err (*ensure_reading)( unsigned int addr, int len, void *ctx );
+ nfp_err (*debug)( int cmd, void *ctx);
+} nfpcmd_dev;
+
+#define NFP_CMD_FLG_NEED_IOBUF 0x1
+
+/* list of all supported drivers ---------------------------------------- */
+
+extern const nfpcmd_dev *nfp_drvlist[];
+
+extern const nfpcmd_dev i21285_cmddev;
+extern const nfpcmd_dev i21555_cmddev;
+extern const nfpcmd_dev bcm5820_cmddev;
+
+#ifndef PCI_BASE_ADDRESS_SPACE_IO
+#define PCI_BASE_ADDRESS_SPACE_IO 0x1
+#endif
+
+#define NFP_MAXDEV 16
+
+
+#define NFP_MEMBAR_MASK ~0xf
+#define NFP_IOBAR_MASK ~0x3
+/*
+ This masks off the bottom bits of the PCI_CSR_BAR which signify that the
+ BAR is an IO BAR rather than a MEM BAR
+*/
+
+#endif
+
diff --git a/usr/src/uts/common/io/nfp/nfp_common.h b/usr/src/uts/common/io/nfp/nfp_common.h
new file mode 100644
index 0000000000..d1d2100fea
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_common.h
@@ -0,0 +1,68 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef NFP_COMMON_H
+#define NFP_COMMON_H
+
+#include <sys/types.h>
+#include <sys/conf.h>
+
+typedef uint32_t UINT32;
+typedef uint8_t BYTE;
+
+#define DEFINE_NFPCI_PACKED_STRUCTS
+#include "nfpci.h"
+#include "nfdev-solaris.h"
+
+typedef int oserr_t;
+
+#if CH_BIGENDIAN
+
+/* Big Endian Sparc */
+
+#define SWP32(x) \
+( (((unsigned int)(x)>>24)&0xff) | (((unsigned int)(x)>>8)&0xff00) | (((unsigned int)(x)<<8)&0xff0000) | (((unsigned int)(x)<<24)&0xff000000) )
+
+#define SWP16(x) ( (((x)>>8)&0xff) | (((x)<<8)&0xff00) )
+
+#define FROM_LE32_IO(x) SWP32(*x)
+#define TO_LE32_IO(x,y) *x=SWP32(y)
+
+#define FROM_LE32_MEM(x) SWP32(*x)
+#define TO_LE32_MEM(x,y) *x=SWP32(y)
+
+#define FROM_LE16_IO(x) SWP16(*x)
+#define TO_LE16_IO(x,y) *x=SWP16(y)
+
+#else
+
+/* Little Endian x86 */
+
+#define FROM_LE32_IO(x) (*x)
+#define TO_LE32_IO(x,y) (*x=y)
+
+#define FROM_LE32_MEM(x) (*x)
+#define TO_LE32_MEM(x,y) (*x=y)
+
+#define FROM_LE16_IO(x) (*x)
+#define TO_LE16_IO(x,y) (*x=y)
+
+#endif /* !CH_BIGENDIAN */
+
+#include <sys/types.h>
+
+#if CH_KERNELVER == 260
+#define nfp_get_lbolt( lbolt, err ) err= drv_getparm( LBOLT, lbolt )
+#else
+#define nfp_get_lbolt( lbolt, err ) { *lbolt= ddi_get_lbolt(); err= 0; }
+#endif
+
+#endif
+
diff --git a/usr/src/uts/common/io/nfp/nfp_error.h b/usr/src/uts/common/io/nfp/nfp_error.h
new file mode 100644
index 0000000000..d64cb78fd4
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_error.h
@@ -0,0 +1,48 @@
+/*
+
+nfp_error.h: nCipher PCI HSM error handling
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+05/12/2001 jsh Original
+
+*/
+
+#ifndef NFP_ERROR_H
+#define NFP_ERROR_H
+
+#include "nfp_common.h"
+
+#define NFP_SUCCESS 0x0
+#define NFP_EFAULT 0x1
+#define NFP_ENOMEM 0x2
+#define NFP_EINVAL 0x3
+#define NFP_EIO 0x4
+#define NFP_ENXIO 0x5
+#define NFP_ENODEV 0x6
+#define NFP_EINTR 0x7
+#define NFP_ESTARTING 0x8
+#define NFP_EAGAIN 0x9
+#define NFP_EUNKNOWN 0x100
+
+typedef int nfp_err;
+
+extern oserr_t nfp_oserr( nfp_err nerr );
+extern nfp_err nfp_error( oserr_t oerr );
+
+#define nfr( x) \
+ return nfp_error((x))
+
+#define nfer(x, fn, msg) \
+ { oserr_t err=(x); if(err) { nfp_log( NFP_DBG1, #fn ": " msg); return nfp_error(err); } }
+
+#define er(x, fn, msg ) \
+{ nfp_err err=(x); if(err) { nfp_log( NFP_DBG1, #fn ": " msg); return err; } }
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfp_hostif.h b/usr/src/uts/common/io/nfp/nfp_hostif.h
new file mode 100644
index 0000000000..3e7d8187e5
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_hostif.h
@@ -0,0 +1,54 @@
+/*
+
+nfp_hostif.h: nCipher PCI HSM host interface declarations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFP_HOSTIF_H
+#define NFP_HOSTIF_H
+
+#include "nfdev-common.h"
+
+struct nfp_dev;
+
+/* common device structure */
+
+typedef struct nfp_cdev {
+ unsigned char *bar[6];
+ void *extra[6];
+
+ int busno;
+ int slotno;
+
+ void *cmdctx;
+
+ char *iobuf;
+
+ struct nfp_dev* dev;
+
+ struct nfdev_stats_str stats;
+
+} nfp_cdev;
+
+/* callbacks from command drivers -------------------------------------- */
+
+void nfp_read_complete( struct nfp_dev *pdev, int ok);
+void nfp_write_complete( struct nfp_dev *pdev, int ok);
+
+#define NFP_READ_MAX (8 * 1024)
+#define NFP_READBUF_SIZE (NFP_READ_MAX + 8)
+#define NFP_TIMEOUT_SEC 10
+
+#define NFP_DRVNAME "nCipher nFast PCI driver"
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfp_ifvers.c b/usr/src/uts/common/io/nfp/nfp_ifvers.c
new file mode 100644
index 0000000000..807b4f24c5
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_ifvers.c
@@ -0,0 +1,51 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/*
+ * nfp_ifervs.c - common pci interface versioning
+ *
+ * uses:
+ *
+ * int pdev->ifvers
+ * device interface version
+ *
+ * int nfp_ifvers
+ * interface version limit
+ *
+ * int nfp_alloc_pci_push( nfp_dev *pdev )
+ * allocates resources needed for PCI Push,
+ * if not already allocated, and return True if successful
+ *
+ * void nfp_free_pci_push( nfp_dev *pdev ) {
+ * frees any resources allocated to PCI Push
+ */
+
+void nfp_set_ifvers( nfp_dev *pdev, int vers ) {
+ if( nfp_ifvers != 0 && vers > nfp_ifvers ) {
+ nfp_log( NFP_DBG2,
+ "nfp_set_ifvers: can't set ifvers %d"
+ " as nfp_ifvers wants max ifvers %d",
+ vers, nfp_ifvers);
+ return;
+ }
+ if( vers >= NFDEV_IF_PCI_PUSH ) {
+ if(!nfp_alloc_pci_push(pdev)) {
+ nfp_log( NFP_DBG1,
+ "nfp_set_ifvers: can't set ifvers %d"
+ " as resources not available",
+ vers);
+ return;
+ }
+ } else {
+ nfp_free_pci_push(pdev);
+ }
+ pdev->ifvers= vers;
+ nfp_log( NFP_DBG3, "nfp_set_ifvers: setting ifvers %d", vers);
+}
diff --git a/usr/src/uts/common/io/nfp/nfp_osif.h b/usr/src/uts/common/io/nfp/nfp_osif.h
new file mode 100644
index 0000000000..17ffe469ce
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_osif.h
@@ -0,0 +1,105 @@
+/*
+
+nfp_osif.h: nCipher PCI HSM OS interface declarations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFP_OSIF_H
+#define NFP_OSIF_H
+
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+
+/* general typedefs ----------------------------------------------- */
+
+typedef volatile unsigned int reg32;
+typedef volatile unsigned short reg16;
+typedef volatile unsigned char reg8;
+
+/* sempaphores, mutexs and events --------------------------------- */
+
+#if 0
+extern nfp_err nfp_sema_init( nfp_sema *sema, int initial);
+extern void nfp_sema_destroy( nfp_sema *sema );
+extern void nfp_sema_post( nfp_sema *sema );
+extern void nfp_sema_wait( nfp_sema *sema );
+extern int nfp_sema_wait_sig( nfp_sema *sema );
+
+extern nfp_err nfp_mutex_init( nfp_mutex *mutex );
+extern void nfp_mutex_destroy( nfp_mutex *mutex );
+extern void nfp_mutex_enter( nfp_mutex *mutex );
+extern void nfp_mutex_exit( nfp_mutex *mutex );
+
+extern nfp_err nfp_event_init( nfp_event *event );
+extern void nfp_event_destroy( nfp_event *event );
+extern void nfp_event_set( nfp_event *event );
+extern void nfp_event_clear( nfp_event *event );
+extern void nfp_event_wait( nfp_event *event );
+extern void nfp_event_wait_sig( nfp_event *event );
+
+#endif
+
+/* timeouts ------------------------------------------------------ */
+
+extern void nfp_sleep( int ms );
+
+/* memory handling ----------------------------------------------- */
+
+#define KMALLOC_DMA 0
+#define KMALLOC_CACHED 1
+
+extern void *nfp_kmalloc( int size, int flags );
+extern void *nfp_krealloc( void *ptr, int size, int flags );
+extern void nfp_kfree( void * );
+
+/* config space access ------------------------------------------------ */
+
+/* return Little Endian 32 bit config register */
+extern nfp_err nfp_config_inl( nfp_cdev *pdev, int offset, unsigned int *res );
+
+/* io space access ------------------------------------------------ */
+
+extern unsigned int nfp_inl( nfp_cdev *pdev, int bar, int offset );
+extern unsigned short nfp_inw( nfp_cdev *pdev, int bar, int offset );
+extern void nfp_outl( nfp_cdev *pdev, int bar, int offset, unsigned int data );
+extern void nfp_outw( nfp_cdev *pdev, int bar, int offset, unsigned short data );
+
+/* user and device memory space access ---------------------------- */
+
+/* NB these 2 functions are not guarenteed to be re-entrant for a given device */
+extern nfp_err nfp_copy_from_user_to_dev( nfp_cdev *cdev, int bar, int offset, const char *ubuf, int len);
+extern nfp_err nfp_copy_to_user_from_dev( nfp_cdev *cdev, int bar, int offset, char *ubuf, int len);
+
+extern nfp_err nfp_copy_from_user( char *kbuf, const char *ubuf, int len );
+extern nfp_err nfp_copy_to_user( char *ubuf, const char *kbuf, int len );
+
+extern nfp_err nfp_copy_from_dev( nfp_cdev *cdev, int bar, int offset, char *kbuf, int len );
+extern nfp_err nfp_copy_to_dev( nfp_cdev *cdev, int bar, int offset, const char *kbuf, int len);
+
+/* debug ------------------------------------------------------------ */
+
+#define NFP_DBG1 1
+#define NFP_DBGE NFP_DBG1
+#define NFP_DBG2 2
+#define NFP_DBG3 3
+#define NFP_DBG4 4
+
+#ifdef STRANGE_VARARGS
+extern void nfp_log();
+#else
+extern void nfp_log( int severity, const char *format, ...);
+#endif
+
+extern int nfp_debug;
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfpci.h b/usr/src/uts/common/io/nfp/nfpci.h
new file mode 100644
index 0000000000..793f5995e6
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfpci.h
@@ -0,0 +1,171 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/*
+*
+* NFPCI.H - nFast PCI interface definition file
+*
+*
+*
+* 1998.06.09 IH Started
+*
+* The interface presented by nFast PCI devices consists of:
+*
+* A region of shared RAM used for data transfer & control information
+* A doorbell interrupt register, so both sides can give each other interrupts
+* A number of DMA channels for transferring data
+*/
+
+#ifndef NFPCI_H
+#define NFPCI_H
+
+/* Sizes of some regions */
+#define NFPCI_RAM_MINSIZE 0x00100000
+/* This is the minimum size of shared RAM. In future it may be possible to
+ negotiate larger sizes of shared RAM or auto-detect how big it is */
+#define NFPCI_RAM_MINSIZE_JOBS 0x00020000 /* standard jobs only */
+#define NFPCI_RAM_MINSIZE_KERN 0x00040000 /* standard and kernel jobs */
+
+/* Offsets within shared memory space.
+ The following main regions are:
+ jobs input area
+ jobs output area
+ kernel jobs input area
+ kernel output area
+*/
+
+#define NFPCI_OFFSET_JOBS 0x00000000
+#define NFPCI_OFFSET_JOBS_WR 0x00000000
+#define NFPCI_OFFSET_JOBS_RD 0x00010000
+#define NFPCI_OFFSET_KERN 0x00020000
+#define NFPCI_OFFSET_KERN_WR 0x00020000
+#define NFPCI_OFFSET_KERN_RD 0x00030000
+
+/* Interrupts, defined by bit position in doorbell register */
+
+/* Interrupts from device to host */
+#define NFAST_INT_DEVICE_WRITE_OK 0x00000001
+#define NFAST_INT_DEVICE_WRITE_FAILED 0x00000002
+#define NFAST_INT_DEVICE_READ_OK 0x00000004
+#define NFAST_INT_DEVICE_READ_FAILED 0x00000008
+#define NFAST_INT_DEVICE_KERN_WRITE_OK 0x00000010
+#define NFAST_INT_DEVICE_KERN_WRITE_FAILED 0x00000020
+#define NFAST_INT_DEVICE_KERN_READ_OK 0x00000040
+#define NFAST_INT_DEVICE_KERN_READ_FAILED 0x00000080
+
+/* Interrupts from host to device */
+#define NFAST_INT_HOST_WRITE_REQUEST 0x00010000
+#define NFAST_INT_HOST_READ_REQUEST 0x00020000
+#define NFAST_INT_HOST_DEBUG 0x00040000
+#define NFAST_INT_HOST_KERN_WRITE_REQUEST 0x00080000
+#define NFAST_INT_HOST_KERN_READ_REQUEST 0x00100000
+
+/* Ordinary job submission ------------------------ */
+
+/* The NFPCI_OFFSET_JOBS_WR and NFPCI_OFFSET_JOBS_RD regions are defined
+ by the following (byte) address offsets... */
+
+#define NFPCI_OFFSET_CONTROL 0x0
+#define NFPCI_OFFSET_LENGTH 0x4
+#define NFPCI_OFFSET_DATA 0x8
+#define NFPCI_OFFSET_PUSH_ADDR 0x8
+
+#define NFPCI_JOBS_WR_CONTROL (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_CONTROL)
+#define NFPCI_JOBS_WR_LENGTH (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_LENGTH)
+#define NFPCI_JOBS_WR_DATA (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_DATA)
+#define NFPCI_MAX_JOBS_WR_LEN (0x0000FFF8)
+
+#define NFPCI_JOBS_RD_CONTROL (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_CONTROL)
+#define NFPCI_JOBS_RD_LENGTH (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_LENGTH)
+#define NFPCI_JOBS_RD_DATA (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_DATA)
+/* address in PCI space of host buffer for NFPCI_JOB_CONTROL_PCI_PUSH */
+#define NFPCI_JOBS_RD_PUSH_ADDR (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_PUSH_ADDR)
+#define NFPCI_MAX_JOBS_RD_LEN (0x000FFF8)
+
+/* Kernel inferface job submission ---------------- */
+
+#define NFPCI_KERN_WR_CONTROL (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_CONTROL)
+#define NFPCI_KERN_WR_LENGTH (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_LENGTH)
+#define NFPCI_KERN_WR_DATA (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_DATA)
+#define NFPCI_MAX_KERN_WR_LEN (0x0000FFF8)
+
+#define NFPCI_KERN_RD_CONTROL (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_CONTROL)
+#define NFPCI_KERN_RD_LENGTH (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_LENGTH)
+#define NFPCI_KERN_RD_DATA (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_DATA)
+/* address in PCI space of host buffer for NFPCI_JOB_CONTROL_PCI_PUSH */
+#define NFPCI_KERN_RD_ADDR (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_PUSH_ADDR)
+#define NFPCI_MAX_KERN_RD_LEN (0x000FFF8)
+
+#ifdef DEFINE_NFPCI_PACKED_STRUCTS
+typedef struct
+{
+ UINT32 controlword;
+ UINT32 length; /* length of data to follow */
+ union {
+ BYTE data[1];
+ UINT32 addr;
+ } uu;
+}
+ NFPCI_JOBS_BLOCK;
+#endif
+
+
+#define NFPCI_JOB_CONTROL 0x00000001
+#define NFPCI_JOB_CONTROL_PCI_PUSH 0x00000002
+/*
+ The 'Control' word is analogous to the SCSI read/write address;
+ 1 = standard push/pull IO
+ 2 = push/push IO
+
+ To submit a block of job data, the host:
+ - sets the (32-bit, little-endian) word at NFPCI_JOBS_WR_CONTROL to NFPCI_JOB_CONTROL
+ - sets the word at NFPCI_JOBS_WR_LENGTH to the length of the data
+ - copies the data to NFPCI_JOBS_WR_DATA
+ - sets interrupt NFAST_INT_HOST_WRITE_REQUEST in the doorbell register
+ - awaits the NFAST_INT_DEVICE_WRITE_OK (or _FAILED) interrupts back
+
+ To read a block of jobs back, the host:
+ - sets the word at NFPCI_JOBS_RD_CONTROL to NFPCI_JOB_CONTROL
+ - sets the word at NFPCI_JOBS_RD_LENGTH to the max length for returned data
+ - sets interrupt NFAST_INT_HOST_READ_REQUEST
+ - awaits the NFAST_INT_DEVICE_READ_OK (or _FAILED) interrupt
+ - reads the data from NFPCI_JOBS_RD_DATA; the module will set the word at
+ NFPCI_JOBS_RD_LENGTH to its actual length.
+
+ Optionally the host can request the PCI read data to be pushed to host PCI mapped ram:
+ - allocates a contiguous PCI addressable buffer for a NFPCI_JOBS_BLOCK of max
+ size NFPCI_MAX_JOBS_RD_LEN (or NFPCI_MAX_KERN_RD_LEN) + 8
+ - sets the word at NFPCI_JOBS_RD_CONTROL to NFPCI_JOB_CONTROL_PCI_PUSH
+ - sets the word at NFPCI_JOBS_RD_LENGTH to the max length for returned data
+ - sets the word at NFPCI_JOBS_RD_PUSH_ADDR to be the host PCI address of
+ the buffer
+ - sets interrupt NFAST_INT_HOST_READ_REQUEST
+ - awaits the NFAST_INT_DEVICE_READ_OK (or _FAILED) interrupt
+ - reads the data from the buffer at NFPCI_OFFSET_DATA in the buffer. The
+ module will set NFPCI_OFFSET_LENGTH to the actual length.
+*/
+
+#define NFPCI_SCRATCH_CONTROL 0
+
+#define NFPCI_SCRATCH_CONTROL_HOST_MOI (1<<0)
+#define NFPCI_SCRATCH_CONTROL_MODE_SHIFT 1
+#define NFPCI_SCRATCH_CONTROL_MODE_MASK (3<<NFPCI_SCRATCH_CONTROL_MODE_SHIFT)
+
+#define NFPCI_SCRATCH_STATUS 1
+
+#define NFPCI_SCRATCH_STATUS_MONITOR_MOI (1<<0)
+#define NFPCI_SCRATCH_STATUS_APPLICATION_MOI (1<<1)
+#define NFPCI_SCRATCH_STATUS_APPLICATION_RUNNING (1<<2)
+#define NFPCI_SCRATCH_STATUS_ERROR (1<<3)
+
+#define NFPCI_SCRATCH_ERROR_LO 2
+#define NFPCI_SCRATCH_ERROR_HI 3
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/osif.c b/usr/src/uts/common/io/nfp/osif.c
new file mode 100644
index 0000000000..fba62f9a37
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/osif.c
@@ -0,0 +1,184 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/map.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+
+#include "nfp_common.h"
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+#include "nfp_osif.h"
+#include "nfp_cmd.h"
+#include "nfp.h"
+#include "autoversion.h"
+
+/* config space access ---------------------------------- */
+
+nfp_err nfp_config_inl( nfp_cdev *pdev, int offset, unsigned int *res ) {
+ unsigned int tmp32;
+ if ( !pdev || !pdev->dev || !pdev->dev->conf_handle )
+ return NFP_ENODEV;
+
+/* pci_config_get32() does byte swapping, so put back to LE */
+ tmp32 = pci_config_get32( pdev->dev->conf_handle, offset );
+ TO_LE32_IO(res, tmp32);
+
+ return NFP_SUCCESS;
+}
+
+/* user space memory access ---------------------------------- */
+
+nfp_err nfp_copy_from_user( char *kbuf, const char *ubuf, int len) {
+ bcopy(ubuf, kbuf, len);
+ return 0;
+}
+
+nfp_err nfp_copy_to_user( char *ubuf, const char *kbuf, int len) {
+ bcopy(kbuf, ubuf, len);
+ return 0;
+}
+
+nfp_err nfp_copy_from_user_to_dev( nfp_cdev *cdev, int bar, int offset, const char *ubuf, int len) {
+ /* dirty hack on Solaris, as we are called from strategy we are, in fact, copying from kernel mem */
+ return nfp_copy_to_dev( cdev, bar, offset, ubuf, len );
+}
+
+nfp_err nfp_copy_to_user_from_dev( nfp_cdev *cdev, int bar, int offset, char *ubuf, int len) {
+ /* dirty hack on Solaris, as we are called from strategy we are, in fact, copying to kernel mem */
+ return nfp_copy_from_dev( cdev, bar, offset, ubuf, len );
+}
+
+nfp_err nfp_copy_from_dev( nfp_cdev *cdev, int bar, int offset, char *kbuf, int len) {
+ if( len & 0x3 || offset & 0x3 )
+ DDI_REP_GET8( cdev->extra[bar], (unsigned char *)kbuf, cdev->bar[bar] + offset, len, DDI_DEV_AUTOINCR);
+ else
+ /* LINTED: alignment */
+ DDI_REP_GET32( cdev->extra[bar], (unsigned int *)kbuf, (unsigned int *)(cdev->bar[bar] + offset), len / 4, DDI_DEV_AUTOINCR);
+ return NFP_SUCCESS;
+}
+
+nfp_err nfp_copy_to_dev( nfp_cdev *cdev, int bar, int offset, const char *kbuf, int len) {
+ if( len & 0x3 || offset & 0x3 )
+ DDI_REP_PUT8( cdev->extra[bar], (unsigned char *)kbuf, cdev->bar[bar] + offset, len, DDI_DEV_AUTOINCR );
+ else
+ /* LINTED: alignment */
+ DDI_REP_PUT32( cdev->extra[bar], (unsigned int *)kbuf, (unsigned int *)(cdev->bar[bar] + offset), len / 4, DDI_DEV_AUTOINCR );
+ return NFP_SUCCESS;
+}
+
+/* pci io space access --------------------------------------- */
+
+unsigned int nfp_inl( nfp_cdev *pdev, int bar, int offset ) {
+ nfp_log( NFP_DBG3, "nfp_inl: addr %x", (uintptr_t) pdev->bar[bar] + offset);
+ /* LINTED: alignment */
+ return DDI_GET32( pdev->extra[bar], (uint32_t *)(pdev->bar[bar] + offset) );
+}
+
+unsigned short nfp_inw( nfp_cdev *pdev, int bar, int offset ) {
+ nfp_log( NFP_DBG3, "nfp_inw: addr %x", (uintptr_t) pdev->bar[bar] + offset);
+ /* LINTED: alignment */
+ return DDI_GET16( pdev->extra[bar], (unsigned short *)(pdev->bar[ bar ] + offset) );
+}
+
+void nfp_outl( nfp_cdev *pdev, int bar, int offset, unsigned int data ) {
+ nfp_log( NFP_DBG3, "nfp_outl: addr %x, data %x", (uintptr_t) pdev->bar[bar] + offset, data);
+ /* LINTED: alignment */
+ DDI_PUT32( pdev->extra[bar], (uint32_t *)(pdev->bar[ bar ] + offset), data );
+}
+
+void nfp_outw( nfp_cdev *pdev, int bar, int offset, unsigned short data ) {
+ nfp_log( NFP_DBG3, "nfp_outl: addr %x, data %x", (uintptr_t) pdev->bar[bar] + offset, data);
+ /* LINTED: alignment */
+ DDI_PUT16( pdev->extra[bar], (unsigned short *)(pdev->bar[ bar ] + offset), data );
+}
+
+/* logging ---------------------------------------------------- */
+
+void nfp_log( int level, const char *fmt, ...)
+{
+ auto char buf[256];
+ va_list ap;
+
+ switch (level) {
+ case NFP_DBG4: if (nfp_debug < 4) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG3: if (nfp_debug < 3) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG2: if (nfp_debug < 2) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG1: if (nfp_debug < 1) break;
+ /*FALLTHROUGH*/
+ default:
+ va_start(ap, fmt);
+ (void) vsnprintf(buf, 256, fmt, ap);
+ va_end(ap);
+ cmn_err(CE_CONT, "!" VERSION_COMPNAME " " VERSION_NO ": %s\n", buf);
+ break;
+ }
+}
+
+struct errstr {
+ int oserr;
+ nfp_err nferr;
+};
+
+
+static struct errstr errtab[] = {
+ { EFAULT, NFP_EFAULT },
+ { ENOMEM, NFP_ENOMEM },
+ { EINVAL, NFP_EINVAL },
+ { EIO, NFP_EIO },
+ { ENXIO, NFP_ENXIO },
+ { ENODEV, NFP_ENODEV },
+ { EINVAL, NFP_EUNKNOWN },
+ { 0, 0 }
+};
+
+nfp_err nfp_error( int oserr )
+{
+ struct errstr *perr;
+ if(!oserr)
+ return 0;
+ perr= errtab;
+ while(perr->nferr) {
+ if(perr->oserr == oserr)
+ return perr->nferr;
+ perr++;
+ }
+ return NFP_EUNKNOWN;
+}
+
+int nfp_oserr( nfp_err nferr )
+{
+ struct errstr *perr;
+ if(nferr == NFP_SUCCESS)
+ return 0;
+ perr= errtab;
+ while(perr->nferr) {
+ if(perr->nferr == nferr)
+ return perr->oserr;
+ perr++;
+ }
+ return EIO;
+}
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index 400e9ffd10..07ffddc123 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -447,6 +447,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp)
return (0);
}
+static boolean_t
+ptmptsopencb(ptmptsopencb_arg_t arg)
+{
+ struct pt_ttys *ptmp = (struct pt_ttys *)arg;
+ boolean_t rval;
+
+ PT_ENTER_READ(ptmp);
+ rval = (ptmp->pt_nullmsg != NULL);
+ PT_EXIT_READ(ptmp);
+ return (rval);
+}
+
/*
* The wput procedure will only handle ioctl and flush messages.
*/
@@ -574,6 +586,41 @@ ptmwput(queue_t *qp, mblk_t *mp)
miocack(qp, mp, 0, 0);
break;
}
+ case PTMPTSOPENCB:
+ {
+ mblk_t *dp; /* ioctl reply data */
+ ptmptsopencb_t *ppocb;
+
+ /* only allow the kernel to invoke this ioctl */
+ if (iocp->ioc_cr != kcred) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* we don't support transparent ioctls */
+ ASSERT(iocp->ioc_count != TRANSPARENT);
+ if (iocp->ioc_count == TRANSPARENT) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* allocate a response message */
+ dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED);
+ if (dp == NULL) {
+ miocnak(qp, mp, 0, EAGAIN);
+ break;
+ }
+
+ /* initialize the ioctl results */
+ ppocb = (ptmptsopencb_t *)dp->b_rptr;
+ ppocb->ppocb_func = ptmptsopencb;
+ ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp;
+
+ /* send the reply data */
+ mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0);
+ qreply(qp, mp);
+ break;
+ }
}
break;
diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c
index ae1e7e0fc3..dc5dc22e37 100644
--- a/usr/src/uts/common/io/scsi/targets/sd.c
+++ b/usr/src/uts/common/io/scsi/targets/sd.c
@@ -3503,9 +3503,13 @@ sd_set_mmc_caps(sd_ssc_t *ssc)
* according to the successful response to the page
* 0x2A mode sense request.
*/
- scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
- "sd_set_mmc_caps: Mode Sense returned "
- "invalid block descriptor length\n");
+ /*
+ * The following warning occurs due to the KVM CD-ROM
+ * mishandling the multi-media commands. Ignore it.
+ * scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
+ * "sd_set_mmc_caps: Mode Sense returned "
+ * "invalid block descriptor length\n");
+ */
kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
return;
}
@@ -4450,18 +4454,77 @@ sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
{
struct scsi_inquiry *sd_inq;
int rval = SD_SUCCESS;
+ char *p;
+ int chk_vidlen = 0, chk_pidlen = 0;
+ int has_tail = 0;
+ static const int VSZ = sizeof (sd_inq->inq_vid);
+ static const int PSZ = sizeof (sd_inq->inq_pid);
ASSERT(un != NULL);
sd_inq = un->un_sd->sd_inq;
ASSERT(id != NULL);
/*
- * We use the inq_vid as a pointer to a buffer containing the
- * vid and pid and use the entire vid/pid length of the table
- * entry for the comparison. This works because the inq_pid
- * data member follows inq_vid in the scsi_inquiry structure.
+ * We would like to use the inq_vid as a pointer to a buffer
+ * containing the vid and pid and use the entire vid/pid length of
+ * the table entry for the comparison. However, this does not work
+ * because, while the inq_pid data member follows inq_vid in the
+ * scsi_inquiry structure, we do not control the contents of this
+ * buffer, and some broken devices violate SPC 4.3.1 and return
+ * fields with null bytes in them.
+ */
+ chk_vidlen = MIN(VSZ, idlen);
+ p = id + chk_vidlen - 1;
+ while (*p == ' ' && chk_vidlen > 0) {
+ --p;
+ --chk_vidlen;
+ }
+
+ /*
+ * If it's all spaces, check the whole thing.
*/
- if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
+ if (chk_vidlen == 0)
+ chk_vidlen = MIN(VSZ, idlen);
+
+ if (idlen > VSZ) {
+ chk_pidlen = idlen - VSZ;
+ p = id + idlen - 1;
+ while (*p == ' ' && chk_pidlen > 0) {
+ --p;
+ --chk_pidlen;
+ }
+ if (chk_pidlen == 0)
+ chk_pidlen = MIN(PSZ, idlen - VSZ);
+ }
+
+ /*
+ * There's one more thing we need to do here. If the user specified
+ * an ID with trailing spaces, we need to make sure the inquiry
+ * vid/pid has only spaces or NULs after the check length; otherwise, it
+ * can't match.
+ */
+ if (idlen > chk_vidlen && chk_vidlen < VSZ) {
+ for (p = sd_inq->inq_vid + chk_vidlen;
+ p < sd_inq->inq_vid + VSZ; ++p) {
+ if (*p != ' ' && *p != '\0') {
+ ++has_tail;
+ break;
+ }
+ }
+ }
+ if (idlen > chk_pidlen + VSZ && chk_pidlen < PSZ) {
+ for (p = sd_inq->inq_pid + chk_pidlen;
+ p < sd_inq->inq_pid + PSZ; ++p) {
+ if (*p != ' ' && *p != '\0') {
+ ++has_tail;
+ break;
+ }
+ }
+ }
+
+ if (has_tail || strncasecmp(sd_inq->inq_vid, id, chk_vidlen) != 0 ||
+ (idlen > VSZ &&
+ strncasecmp(sd_inq->inq_pid, id + VSZ, chk_pidlen) != 0)) {
/*
* The user id string is compared to the inquiry vid/pid
* using a case insensitive comparison and ignoring
diff --git a/usr/src/uts/common/io/udmf/dm9601reg.h b/usr/src/uts/common/io/udmf/dm9601reg.h
new file mode 100644
index 0000000000..a36f2b0fc8
--- /dev/null
+++ b/usr/src/uts/common/io/udmf/dm9601reg.h
@@ -0,0 +1,348 @@
+/*
+ * %W% %E%
+ * Macro definitions for Davicom DM9601 USB to fast ethernet controler
+ * based on Davicom DM9601E data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+#ifndef __DM9601_H__
+#define __DM9601_H__
+
+/*
+ * offset of registers
+ */
+#define NCR 0x00U /* network control register */
+#define NSR 0x01U /* network status register */
+#define TCR 0x02U /* tx control register */
+#define TSR1 0x03U /* tx status register 1 */
+#define TSR2 0x04U /* tx status register 2 */
+#define RCR 0x05U /* rx control register */
+#define RSR 0x06U /* rx status register */
+#define ROCR 0x07U /* rx overflow counter register */
+#define BPTR 0x08U /* back pressure threshold regster */
+#define FCTR 0x09U /* flow control threshold regster */
+#define FCR 0x0aU /* flow control threshold regster */
+#define EPCR 0x0bU /* eeprom & phy control register */
+#define EPAR 0x0cU /* eeprom & phy address register */
+#define EPDR 0x0dU /* eeprom & phy data register (2byte) */
+#define WCR 0x0fU /* wake up control register */
+#define PAR 0x10U /* physical address register (6byte) */
+#define MAR 0x16U /* multicast address register (8byte) */
+#define GPCR 0x1eU /* general purpose control register */
+#define GPR 0x1fU /* general purpose register */
+#define VID 0x28U /* vendor ID (2byte) */
+#define PID 0x2aU /* product ID (2byte) */
+#define CHIPR 0x2cU /* chip revision */
+#define USBDA 0xf0U /* usb device address register */
+#define RXC 0xf1U /* received packet counter register */
+#define TUSC 0xf2U /* tx packet counter/usb status register */
+#define USBC 0xf4U /* usb control register */
+
+/*
+ * register definitions
+ */
+/* network control register */
+#define NCR_EXT_PHY 0x80U /* 1: select external phy */
+#define NCR_WAKEEN 0x40U /* 1: wake up event enable */
+#define NCR_FCOL 0x10U /* force collision mode for test */
+#define NCR_FDX 0x08U /* 1: full duplex mode (for external phy) */
+#define NCR_LBK 0x06U
+#define NCR_LBK_SHIFT 1
+#define NCR_LBK_NORMAL (0U << NCR_LBK_SHIFT)
+#define NCR_LBK_MAC (1U << NCR_LBK_SHIFT)
+#define NCR_LBK_PHY_D (2U << NCR_LBK_SHIFT)
+#define NCR_LBK_PHY_A (3U << NCR_LBK_SHIFT)
+#define NCR_RST 0x01U /* 1: reset, auto clear */
+
+#define NCR_BITS \
+ "\020" \
+ "\010EXT_PHY" \
+ "\007WAKEEN" \
+ "\005FCOL" \
+ "\004FDX" \
+ "\001RST"
+
+/* network status register */
+#define NSR_SPEED 0x80U /* 1:10M 0:100M */
+#define NSR_LINKST 0x40U /* 1:ok 0:fail */
+#define NSR_WAKEST 0x20U /* 1:enabled */
+#define NSR_TXFULL 0x10U /* 1:tx fifo full */
+#define NSR_TX2END 0x08U /* tx packet2 complete status */
+#define NSR_TX1END 0x04U /* tx packet1 complete status */
+#define NSR_RXOV 0x02U /* rx fifo overflow */
+#define NSR_RXRDY 0x01U /* rx packet ready */
+
+#define NSR_BITS \
+ "\020" \
+ "\010SPEED_10" \
+ "\007LINKST_UP" \
+ "\006WAKEST" \
+ "\005TXFULL" \
+ "\004TX2END" \
+ "\003TX1END" \
+ "\002RXOV" \
+ "\001RXRDY"
+
+/* tx control register */
+#define TCR_TJDIS 0x40U /* tx jitter control */
+#define TCR_EXCEDM 0x20U /* excessive collision mode */
+#define TCR_PAD_DIS2 0x10U /* PAD appends disable for pkt2 */
+#define TCR_CRC_DIS2 0x08U /* CRC appends disable for pkt2 */
+#define TCR_PAD_DIS1 0x04U /* PAD appends disable for pkt1 */
+#define TCR_CRC_DIS1 0x02U /* CRC appends disable for pkt1 */
+
+#define TCR_BITS \
+ "\020" \
+ "\007TJDIS" \
+ "\006EXCEDM" \
+ "\005PAD_DIS2" \
+ "\004CRC_DIS2" \
+ "\003PAD_DIS1" \
+ "\002CRC_DIS1"
+
+/* tx status register (ro) */
+#define TSR_TJTO 0x80U /* tx jabber time out */
+#define TSR_LC 0x40U /* loss of carrier */
+#define TSR_NC 0x20U /* no carrier */
+#define TSR_LATEC 0x10U /* late collision */
+#define TSR_COL 0x08U /* late collision */
+#define TSR_EL 0x04U /* excessive collision */
+
+#define TSR_BITS \
+ "\020" \
+ "\010TJTO" \
+ "\007LC" \
+ "\006NC" \
+ "\005LATEC" \
+ "\004COL" \
+ "\003EL"
+
+/* rx control register */
+#define RCR_WTDIS 0x40U /* watch dog timer disable */
+#define RCR_DIS_LONG 0x20U /* discard longer packets than 1522 */
+#define RCR_DIS_CRC 0x10U /* discard crc error packets */
+#define RCR_ALL 0x08U /* pass all multicast */
+#define RCR_RUNT 0x04U /* pass runt packets */
+#define RCR_PRMSC 0x02U /* promiscuous mode */
+#define RCR_RXEN 0x01U /* rx enable */
+
+#define RCR_BITS \
+ "\020" \
+ "\007WTDIS" \
+ "\006DIS_LONG" \
+ "\005DIS_CRC" \
+ "\004ALL" \
+ "\003RUNT" \
+ "\002PRMSC" \
+ "\001RXEN"
+
+/* rx status register */
+#define RSR_RF 0x80U /* runt frame */
+#define RSR_MF 0x40U /* multicast frame */
+#define RSR_LCS 0x20U /* late collision seen */
+#define RSR_RWTO 0x10U /* receive watchdog timeout */
+#define RSR_PLE 0x08U /* physical layer error */
+#define RSR_AE 0x04U /* alignment error */
+#define RSR_CE 0x02U /* crc error */
+#define RSR_FOE 0x01U /* fifo overflow error */
+
+#define RSR_BITS \
+ "\020" \
+ "\010RF" \
+ "\007MF" \
+ "\006LCS" \
+ "\005RWTO" \
+ "\004PLE" \
+ "\003AE" \
+ "\002CE" \
+ "\001FOE"
+
+/* receive overflow counter register */
+#define ROCR_RXFU 0x80U /* receive overflow counter overflow */
+#define ROCR_ROC 0x7fU /* receive overflow counter */
+
+#define ROCR_BITS \
+ "\020" \
+ "\010RXFU"
+
+/* back pressure threshold register */
+#define BPTR_BPHW 0xf0U /* high water overflow threshold */
+#define BPTR_BPHW_SHIFT 4
+#define BPTR_BPHW_UNIT 1024U
+#define BPTR_BPHW_DEFAULT (3 << BPTR_BPHW_SHIFT) /* 3k */
+#define BPTR_JPT 0x0fU /* jam pattern time */
+#define BPTR_JPT_SHIFT 0
+#define BPTR_JPT_5us (0U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_10us (1U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_15us (2U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_25us (3U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_50us (4U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_100us (5U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_150us (6U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_200us (7U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_250us (8U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_300us (9U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_350us (10U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_400us (11U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_450us (12U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_500us (13U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_550us (14U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_600us (15U << BPTR_JPT_SHIFT)
+
+/* flow control threshold register */
+#define FCTR_HWOT 0xf0U /* rx fifo high water overflow threshold */
+#define FCTR_HWOT_SHIFT 4
+#define FCTR_HWOT_UNIT 1024U
+#define FCTR_LWOT 0x0fU /* rx fifo low water overflow threshold */
+#define FCTR_LWOT_SHIFT 0
+#define FCTR_LWOT_UNIT 1024U
+
+/* rx/tx flow control register */
+#define FCR_TXPO 0x80U /* tx pause packet */
+#define FCR_TXPF 0x40U /* tx pause packet */
+#define FCR_TXPEN 0x20U /* tx pause packet */
+#define FCR_BKPA 0x10U /* back pressure mode */
+#define FCR_BKPM 0x08U /* back pressure mode */
+#define FCR_BKPS 0x04U /* rx pause packet current status (r/c) */
+#define FCR_RXPCS 0x02U /* rx pause packet current status (ro) */
+#define FCR_FLCE 0x01U /* flow control enbale */
+
+#define FCR_BITS \
+ "\020" \
+ "\000TXPO" \
+ "\000TXPF" \
+ "\000TXPEN" \
+ "\000BKPA" \
+ "\000BKPM" \
+ "\000BKPS" \
+ "\000RXPCS" \
+ "\000FLCE"
+
+/* EEPROM & PHY control register (0x0b) */
+#define EPCR_REEP 0x20U /* reload eeprom */
+#define EPCR_WEP 0x10U /* write eeprom enable */
+#define EPCR_EPOS 0x08U /* select device, 0:eeprom, 1:phy */
+#define EPCR_ERPRR 0x04U /* read command */
+#define EPCR_ERPRW 0x02U /* write command */
+#define EPCR_ERRE 0x01U /* eeprom/phy access in progress (ro) */
+
+#define EPCR_BITS \
+ "\020" \
+ "\005REEP" \
+ "\004WEP" \
+ "\003EPOS" \
+ "\002ERPRR" \
+ "\001ERPRW" \
+ "\000ERRE"
+
+/* EEPROM & PHY access register (0x0c) */
+#define EPAR_PHYADR 0xc0U /* phy address, internal phy(1) or external */
+#define EPAR_PHYADR_SHIFT 6
+#define EPAR_EROA 0x3fU /* eeprom word addr or phy register addr */
+#define EPAR_EROA_SHIFT 0
+
+/* EEPROM & PHY data register (0x0d(low)-0x0e(hi)) */
+
+/* wake up control register (0x0f) */
+#define WCR_LINKEN 0x20U /* enable link status event */
+#define WCR_SAMPLEEN 0x10U /* enable sample frame event */
+#define WCR_MAGICEN 0x08U /* enable magic pkt event */
+#define WCR_LINKST 0x04U /* link status change occur ro */
+#define WCR_SAMPLEST 0x02U /* sample frame rx occur ro */
+#define WCR_MAGICST 0x01U /* magic pkt rx occur ro */
+
+#define WCR_BITS \
+ "\020" \
+ "\000LINKEN" \
+ "\000SAMPLEEN" \
+ "\000MAGICEN" \
+ "\000LINKST" \
+ "\000SAMPLEST" \
+ "\000MAGICST"
+
+/* physical address register (0x10-0x15) */
+/* multicast address register (0x16-0x1c) */
+/* general purpose control register (0x1e) */
+#define GPCR_GEPCTRL 0x7f
+#define GPCR_OUT(n) (1U << (n))
+
+#define GPCR_BITS \
+ "\020" \
+ "\006OUT5" \
+ "\005OUT4" \
+ "\004OUT3" \
+ "\003OUT2" \
+ "\002OUT1" \
+ "\001OUT0"
+
+/* general purpose register (0x1f) */
+#define GPR_GEPIO5 0x20U
+#define GPR_GEPIO4 0x10U
+#define GPR_GEPIO3 0x08U
+#define GPR_GEPIO2 0x04U
+#define GPR_GEPIO1 0x02U
+#define GPR_GEPIO0 0x01U
+
+#define GPR_BITS \
+ "\020" \
+ "\006GEPIO5" \
+ "\005GEPIO4" \
+ "\004GEPIO3" \
+ "\003GEPIO2" \
+ "\002GEPIO1" \
+ "\001GEPIO0"
+
+/* vendor id register (0x28-0x29) */
+/* product id register (0x2a-0x2b) */
+/* chip revision register (0x2c) */
+
+/* usb device address register (0xf0) */
+#define USBDA_USBFA 0x3fU /* usb device address */
+#define USBDA_USBFA_SHIFT 0
+
+/* receive packet counter register (0xf1) */
+
+/* transmitpacket counter/usb status register (0xf2) */
+#define TUSR_RXFAULT 0x80U /* indicate rx has unexpected condition */
+#define TUSR_SUSFLAG 0x40U /* indicate device has suspended condition */
+#define TUSR_EP1RDY 0x20U /* ready for read from ep1 pipe */
+#define TUSR_SRAM 0x18U /* sram size 0:32K, 1:48K, 2:16K, 3:64K */
+#define TUSR_SRAM_SHIFT 3
+#define TUSR_SRAM_32K (0U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_48K (1U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_16K (2U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_64K (3U << TUSR_SRAM_SHIFT)
+#define TUSR_TXC2 0x04U /* two or more packets in tx buffer */
+#define TUSR_TXC1 0x02U /* one packet in tx buffer */
+#define TUSR_TXC0 0x01U /* no packet in tx buffer */
+
+#define TUSR_BITS \
+ "\020" \
+ "\010RXFAULT" \
+ "\007SUSFLAG" \
+ "\006EP1RDY" \
+ "\003TXC2" \
+ "\002TXC1" \
+ "\001TXC0"
+
+/* usb control register (0xf4) */
+#define USBC_EP3ACK 0x20U /* ep3 will alway return 8byte data if NAK=0*/
+#define USBC_EP3NACK 0x10U /* ep3 will alway return NAK */
+#define USBC_MEMTST 0x01U
+
+/* bulk message format */
+#define TX_HEADER_SIZE 2
+#define RX_HEADER_SIZE 3
+
+/* interrupt msg format */
+struct intr_msg {
+ uint8_t im_nsr;
+ uint8_t im_tsr1;
+ uint8_t im_tsr2;
+ uint8_t im_rsr;
+ uint8_t im_rocr;
+ uint8_t im_rxc;
+ uint8_t im_txc;
+ uint8_t im_gpr;
+};
+#endif /* __DM9601_H__ */
diff --git a/usr/src/uts/common/io/udmf/udmf_usbgem.c b/usr/src/uts/common/io/udmf/udmf_usbgem.c
new file mode 100644
index 0000000000..0637de054b
--- /dev/null
+++ b/usr/src/uts/common/io/udmf/udmf_usbgem.c
@@ -0,0 +1,1036 @@
+/*
+ * udmfE_usbgem.c : Davicom DM9601E USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2009-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "dm9601reg.h"
+
+char ident[] = "dm9601 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+#define LE16P(p) ((((uint8_t *)(p))[1] << 8) | ((uint8_t *)(p))[0])
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int udmf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (udmf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for dm9601
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct udmf_dev {
+ /*
+ * Misc HW information
+ */
+ uint8_t rcr;
+ uint8_t last_nsr;
+ uint8_t mac_addr[ETHERADDRL];
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t udmf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void udmf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int udmf_reset_chip(struct usbgem_dev *);
+static int udmf_init_chip(struct usbgem_dev *);
+static int udmf_start_chip(struct usbgem_dev *);
+static int udmf_stop_chip(struct usbgem_dev *);
+static int udmf_set_media(struct usbgem_dev *);
+static int udmf_set_rx_filter(struct usbgem_dev *);
+static int udmf_get_stats(struct usbgem_dev *);
+static void udmf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* packet operations */
+static mblk_t *udmf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *udmf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUT(dp, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 1, \
+ /* wValue */ 0, \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define OUTB(dp, ix, val, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 3, \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ 0, \
+ /* value */ NULL, \
+ /* size */ 0)) != USB_SUCCESS) goto label
+
+#define IN(dp, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 0, \
+ /* wValue */ 0, \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static void
+udmf_enable_phy(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ /* de-assert reset signal to phy */
+ OUTB(dp, GPCR, GPCR_OUT(0), &err, usberr);
+ OUTB(dp, GPR, 0, &err, usberr);
+usberr:
+ ;
+}
+
+static int
+udmf_reset_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ OUTB(dp, NCR, NCR_LBK_NORMAL | NCR_RST, &err, usberr);
+ drv_usecwait(100);
+usberr:
+ return (err);
+}
+
+/*
+ * Setup dm9601
+ */
+static int
+udmf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err = USB_SUCCESS;
+ uint16_t reg;
+ uint8_t buf[2];
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ OUTB(dp, NCR, NCR_LBK_NORMAL, &err, usberr);
+
+ /* tx control regiser: enable padding and crc generation */
+ OUTB(dp, TCR, 0, &err, usberr);
+
+ /* rx control register: will be set later by udmf_set_rx_filer() */
+ lp->rcr = RCR_RUNT;
+
+ /* back pressure threshold: */
+ OUTB(dp, BPTR, (2 << BPTR_BPHW_SHIFT) | BPTR_JPT_200us,
+ &err, usberr);
+
+ /* flow control threshold: same as default */
+ OUTB(dp, FCTR, (3 << FCTR_HWOT_SHIFT) | (8 << FCTR_LWOT_SHIFT),
+ &err, usberr);
+
+ /* usb control register */
+ OUTB(dp, USBC, USBC_EP3ACK | 0x06, &err, usberr);
+
+ /* flow control: will be set later by udmf_set_media() */
+
+ /* wake up control register: */
+ OUTB(dp, WCR, 0, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+
+ /* enable Rx */
+ lp->rcr |= RCR_RXEN;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_stop_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+
+ /* disable rx */
+ lp->rcr &= ~RCR_RXEN;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_get_stats(struct usbgem_dev *dp)
+{
+ /* EMPTY */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+udmf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_le(addr) & 0x3f);
+}
+
+static int
+udmf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t rcr;
+ uint8_t mode;
+ uint8_t mhash[8];
+ uint8_t *mac;
+ uint_t h;
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+ static uint8_t invalid_mac[ETHERADDRL] = {0, 0, 0, 0, 0, 0};
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & RCR_RXEN) {
+ /* set promiscuous mode before changing rx filter mode */
+ OUTB(dp, RCR, lp->rcr | RCR_PRMSC, &err, usberr);
+ }
+
+ lp->rcr &= ~(RCR_ALL | RCR_PRMSC);
+ mode = 0;
+ bzero(mhash, sizeof (mhash));
+ mac = dp->cur_addr.ether_addr_octet;
+
+ if ((dp->rxmode & RXMODE_ENABLE) == 0) {
+ mac = invalid_mac;
+ } else if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_PRMSC;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 32) {
+ /* accept all multicast packets */
+ mode |= RCR_ALL;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ for (i = 0; i < dp->mc_count; i++) {
+ /* hash table is 64 = 2^6 bit width */
+ h = dp->mc_list[i].hash;
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+
+ /* set node address */
+ if (bcmp(mac, lp->mac_addr, ETHERADDRL) != 0) {
+ OUT(dp, PAR, ETHERADDRL, dp->cur_addr.ether_addr_octet,
+ &err, usberr);
+ bcopy(mac, lp->mac_addr, ETHERADDRL);
+ }
+
+ /* set multicast hash table */
+ OUT(dp, MAR, sizeof (mhash), &mhash[0], &err, usberr);
+
+ /* update rcr */
+ lp->rcr |= mode;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+#if DEBUG_LEVEL > 1
+ /* verify rcr */
+ IN(dp, RCR, 1, &rcr, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, rcr, RCR_BITS);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_set_media(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ uint8_t fcr;
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* setup flow control */
+ fcr = 0;
+ if (dp->full_duplex) {
+ /* select flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_RX_PAUSE:
+ fcr |= FCR_FLCE;
+ break;
+
+ case FLOW_CONTROL_TX_PAUSE:
+ fcr |= FCR_TXPEN;
+ break;
+
+ case FLOW_CONTROL_SYMMETRIC:
+ fcr |= FCR_FLCE | FCR_TXPEN;
+ break;
+ }
+ }
+
+ /* update flow control register */
+ OUTB(dp, FCR, fcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+udmf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int n;
+ size_t pkt_size;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ uint_t align_mask;
+
+ pkt_size = msgdsize(mp);
+ align_mask = 63;
+
+ /*
+ * re-allocate the mp
+ */
+
+ /* minimum ethernet packet size of ETHERMIN */
+ pkt_size = max(pkt_size, ETHERMIN);
+
+#if 0 /* CONFIG_ADD_TX_DELIMITOR_ALWAYS */
+ pkt_size += TX_HEADER_SIZE;
+#endif
+ if (((pkt_size + TX_HEADER_SIZE) & align_mask) == 0) {
+ /* padding is required in usb communication */
+ pkt_size += TX_HEADER_SIZE;
+ }
+
+ if ((new = allocb(TX_HEADER_SIZE + pkt_size, 0)) == NULL) {
+ return (NULL);
+ }
+ new->b_wptr = new->b_rptr + TX_HEADER_SIZE + pkt_size;
+
+ /* add a header */
+ bp = new->b_rptr;
+ bp[0] = (uint8_t)pkt_size;
+ bp[1] = (uint8_t)(pkt_size >> 8);
+ bp += TX_HEADER_SIZE;
+
+ /* copy contents of the buffer */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ n = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, n);
+ bp += n;
+ }
+
+ /* clear the rest including the next zero length header */
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ return (new);
+}
+
+static void
+udmf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+udmf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int len;
+ uint8_t rx_stat;
+
+ len = mp->b_wptr - mp->b_rptr;
+
+ if (len <= RX_HEADER_SIZE) {
+ /*
+ * the usb bulk-in frame doesn't include a valid
+ * ethernet packet.
+ */
+ return (NULL);
+ }
+
+ /* remove rx header */
+ rx_stat = mp->b_rptr[0];
+ if (rx_stat & (RSR_RF | RSR_LCS | RSR_RWTO |
+ RSR_PLE | RSR_AE | RSR_CE | RSR_FOE)) {
+ if (rx_stat & RSR_RF) {
+ dp->stats.runt++;
+ }
+ if (rx_stat & RSR_LCS) {
+ /* late collision */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_RWTO) {
+ /* rx timeout */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_PLE) {
+ /* physical layer error */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_AE) {
+ /* alignment error */
+ dp->stats.frame++;
+ }
+ if (rx_stat & RSR_CE) {
+ /* crc error */
+ dp->stats.crc++;
+ }
+ if (rx_stat & RSR_FOE) {
+ /* fifo overflow error */
+ dp->stats.overflow++;
+ }
+ dp->stats.errrcv++;
+ }
+ len = LE16P(&mp->b_rptr[1]);
+ if (len >= ETHERFCSL) {
+ len -= ETHERFCSL;
+ }
+ mp->b_rptr += RX_HEADER_SIZE;
+ mp->b_wptr = mp->b_rptr + len;
+
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+udmf_ep_read(struct usbgem_dev *dp, uint_t which, uint_t addr, int *errp)
+{
+ int i;
+ uint8_t epcr;
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, addr));
+
+ OUTB(dp, EPAR, addr, errp, usberr);
+ OUTB(dp, EPCR, which | EPCR_ERPRR, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, EPCR, sizeof (epcr), &epcr, errp, usberr);
+ if ((epcr & EPCR_ERRE) == 0) {
+ /* done */
+ IN(dp, EPDR, sizeof (val), &val, errp, usberr);
+ val = LE_16(val);
+ goto done;
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout", dp->name, __func__);
+ val = 0;
+done:
+ OUTB(dp, EPCR, 0, errp, usberr);
+ return (val);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ *errp, *errp == USB_SUCCESS ? "success" : "error"));
+ return (0);
+}
+
+static void
+udmf_ep_write(struct usbgem_dev *dp, uint_t which, uint_t addr,
+ uint16_t val, int *errp)
+{
+ int i;
+ uint8_t epcr;
+
+ DPRINTF(5, (CE_CONT, "!%s: %s called", dp->name, __func__));
+
+ val = LE_16(val);
+ OUT(dp, EPDR, sizeof (val), &val, errp, usberr);
+
+ OUTB(dp, EPAR, addr, errp, usberr);
+
+ OUTB(dp, EPCR, which | EPCR_WEP | EPCR_ERPRW, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, EPCR, 1, &epcr, errp, usberr);
+ if ((epcr & EPCR_ERRE) == 0) {
+ /* done */
+ goto done;
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout", dp->name, __func__);
+done:
+ OUTB(dp, EPCR, 0, errp, usberr);
+ return;
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ *errp, *errp == USB_SUCCESS ? "success" : "error"));
+}
+
+static uint16_t
+udmf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint16_t val;
+
+ val = udmf_ep_read(dp, EPCR_EPOS,
+ (dp->mii_phy_addr << EPAR_PHYADR_SHIFT) | index, errp);
+
+ return (val);
+}
+
+static void
+udmf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ udmf_ep_write(dp, EPCR_EPOS,
+ (dp->mii_phy_addr << EPAR_PHYADR_SHIFT) | index, val, errp);
+}
+
+static void
+udmf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ struct intr_msg *imp;
+ struct udmf_dev *lp = dp->private;
+
+ imp = (struct intr_msg *)&mp->b_rptr[0];
+
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s: size:%d, nsr:%b tsr1:%b tsr2:%b"
+ " rsr:%b rocr:%b rxc:%02x txc:%b gpr:%b",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ imp->im_nsr, NSR_BITS,
+ imp->im_tsr1, TSR_BITS,
+ imp->im_tsr2, TSR_BITS,
+ imp->im_rsr, RSR_BITS,
+ imp->im_rocr, ROCR_BITS,
+ imp->im_rxc,
+ imp->im_txc, TUSR_BITS,
+ imp->im_gpr, GPR_BITS));
+
+ if ((lp->last_nsr ^ imp->im_nsr) & NSR_LINKST) {
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_nsr = imp->im_nsr;
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static uint16_t
+udmf_eeprom_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint16_t val;
+
+ val = udmf_ep_read(dp, 0, index, errp);
+
+ return (val);
+}
+
+#ifdef DEBUG_LEVEL
+static void
+udmf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint16_t w0, w1, w2, w3;
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+
+ err = USB_SUCCESS;
+
+ for (i = 0; i < size; i += 4) {
+ w0 = udmf_eeprom_read(dp, i + 0, &err);
+ w1 = udmf_eeprom_read(dp, i + 1, &err);
+ w2 = udmf_eeprom_read(dp, i + 2, &err);
+ w3 = udmf_eeprom_read(dp, i + 3, &err);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i, w0, w1, w2, w3);
+ }
+usberr:
+ ;
+}
+#endif
+
+static int
+udmf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint_t val;
+ uint8_t *m;
+ int err;
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s enter", dp->name, __func__));
+
+ /*
+ * get mac address from EEPROM
+ */
+ m = dp->dev_addr.ether_addr_octet;
+ for (i = 0; i < ETHERADDRL; i += 2) {
+ val = udmf_eeprom_read(dp, i/2, &err);
+ m[i + 0] = (uint8_t)val;
+ m[i + 1] = (uint8_t)(val >> 8);
+ }
+
+ /* invalidate a private cache for mac addr */
+ bzero(lp->mac_addr, sizeof (lp->mac_addr));
+#ifdef CONFIG_VLAN
+ dp->misc_flag = USBGEM_VLAN;
+#endif
+#if DEBUG_LEVEL > 0
+ udmf_eeprom_dump(dp, /* 0x3f + 1 */ 128);
+#endif
+{
+ static uint8_t bcst[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ DPRINTF(0, (CE_CONT, "!%s: %s: hash of bcast:%x",
+ dp->name, __func__, usbgem_ether_crc_be(bcst)));
+}
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: usb error detected (%d)",
+ dp->name, __func__, err);
+ return (USB_FAILURE);
+}
+
+static int
+udmf_mii_probe(struct usbgem_dev *dp)
+{
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ udmf_enable_phy(dp);
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+udmf_mii_init(struct usbgem_dev *dp)
+{
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ udmf_enable_phy(dp);
+ return (USB_SUCCESS);
+}
+
+static int
+udmfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int revid;
+ int unit;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct udmf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ /*
+ * softmac requires that ppa is the instance number
+ * of the device, otherwise it hangs in seaching the device.
+ */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 64;
+
+ ugcp->usbgc_rx_header_len = RX_HEADER_SIZE;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+#if 1
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+#else
+ /*
+ * XXX - flow control caused link down frequently under
+ * heavy traffic
+ */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_NONE;
+#endif
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval =
+ USBGEM_LINK_WATCH_INTERVAL;
+ ugcp->usbgc_mii_an_watch_interval =
+ USBGEM_LINK_WATCH_INTERVAL/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = (25*ONESEC)/10;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+ ugcp->usbgc_mii_hw_link_detection = B_TRUE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &udmf_attach_chip;
+ ugcp->usbgc_reset_chip = &udmf_reset_chip;
+ ugcp->usbgc_init_chip = &udmf_init_chip;
+ ugcp->usbgc_start_chip = &udmf_start_chip;
+ ugcp->usbgc_stop_chip = &udmf_stop_chip;
+ ugcp->usbgc_multicast_hash = &udmf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &udmf_set_rx_filter;
+ ugcp->usbgc_set_media = &udmf_set_media;
+ ugcp->usbgc_get_stats = &udmf_get_stats;
+ ugcp->usbgc_interrupt = &udmf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &udmf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &udmf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &udmf_mii_probe;
+ ugcp->usbgc_mii_init = &udmf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &udmf_mii_read;
+ ugcp->usbgc_mii_write = &udmf_mii_write;
+ ugcp->usbgc_mii_addr_min = 1;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct udmf_dev), KM_SLEEP);
+ lp->last_nsr;
+
+ ddi_set_driver_private(dip, NULL);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct udmf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct udmf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+
+ return (DDI_FAILURE);
+}
+
+static int
+udmfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(udmf_ops, udmfattach, udmfdetach);
+#else
+static struct module_info udmfminfo = {
+ 0, /* mi_idnum */
+ "udmf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit udmfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &udmfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit udmfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &udmfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab udmf_info = {
+ &udmfrinit, /* st_rdinit */
+ &udmfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_udmf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &udmf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops udmf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ udmfattach, /* devo_attach */
+ udmfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_udmf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &udmf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!udmf: _init: called"));
+
+ status = usbgem_mod_init(&udmf_ops, "udmf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&udmf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!udmf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&udmf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/upf/adm8511reg.h b/usr/src/uts/common/io/upf/adm8511reg.h
new file mode 100644
index 0000000000..68a2207bb5
--- /dev/null
+++ b/usr/src/uts/common/io/upf/adm8511reg.h
@@ -0,0 +1,205 @@
+/*
+ * @(#)adm8511reg.h 1.1 09/06/20
+ * Register dehinitsions of ADMtek ADM8511 Fast Ethernet to USB controller.
+ * Codeded by Masayuki Murayama(KHF04453@nifty.ne.jp)
+ * This file is public domain.
+ */
+
+#define EC0 0x00 /* B */
+#define EC1 0x01 /* B */
+#define EC2 0x02 /* B */
+#define MA 0x08 /* 8byte array */
+#define EID 0x10 /* B */
+#define PAUSETIMER 0x18 /* B pause timer */
+#define RPNBFC 0x1a /* B */
+#define ORFBFC 0x1b /* B */
+#define EP1C 0x1c /* B */
+#define RXFC 0x1d /* B */
+#define BIST 0x1e /* B */
+#define EEOFFSET 0x20 /* B */
+#define EEDATA 0x21 /* W */
+#define EECTRL 0x23 /* B */
+#define PHYA 0x25 /* B */
+#define PHYD 0x26 /* W */
+#define PHYAC 0x28 /* B */
+#define USBSTAT 0x2a /* B */
+#define ETHTXSTAT 0x2b /* W */
+#define ETHRXSTAT 0x2d /* B */
+#define LOSTCNT 0x2e /* W */
+#define WF0MASK 0x30 /* 16byte array */
+#define WF0OFFSET 0x40 /* W */
+#define WF0CRC 0x41 /* W */
+#define WF1MASK 0x48 /* 16byte array */
+#define WF1OFFSET 0x58 /* W */
+#define WF1CRC 0x59 /* W */
+#define WF2MASK 0x60 /* 16byte array */
+#define WF2OFFSET 0x70 /* W */
+#define WF2CRC 0x71 /* W */
+#define WCTRL 0x78 /* B */
+#define WSTAT 0x7a /* B */
+#define IPHYC 0x7b /* B */
+#define GPIO54 0x7c /* B */
+#define GPIO10 0x7e /* B */
+#define GPIO32 0x7f /* B */
+#define TEST 0x80 /* B */
+#define TM 0x81 /* B */
+#define RPN 0x82 /* B */
+
+/* Ethernet control register 0: offset 0 */
+#define EC0_TXE 0x80U
+#define EC0_RXE 0x40U
+#define EC0_RXFCE 0x20U
+#define EC0_WOE 0x10U
+#define EC0_RXSA 0x08U
+#define EC0_SBO 0x04U
+#define EC0_RXMA 0x02U
+#define EC0_RXCS 0x01U
+
+#define EC0_BITS \
+ "\020" \
+ "\010TXE" \
+ "\007RXE" \
+ "\006RXFCE" \
+ "\005WOE" \
+ "\004RXSA" \
+ "\003SBO" \
+ "\002RXMA" \
+ "\001RXCS"
+
+/* Ethernet control register 1: offset 1 */
+#define EC1_FD 0x20U
+#define EC1_100M 0x10U /* 0:10Mbps 1:100Mbps */
+#define EC1_RM 0x08U /* reset mac */
+
+#define EC1_BITS \
+ "\020" \
+ "\006FD" \
+ "\005100M" \
+ "\004RM"
+
+/* Ethernet control register 2: offset 2 */
+#define EC2_MEPL 0x80U /* 8515: MTU 0:1528, 1:1638 */
+#define EC2_RPNC 0x40U
+#define EC2_LEEPRS 0x20U
+#define EC2_EEPRW 0x10U
+#define EC2_LB 0x08U
+#define EC2_PROM 0x04U
+#define EC2_RXBP 0x02U
+#define EC2_EP3RC 0x01U
+
+#define EC2_BITS \
+ "\020" \
+ "\010MEPS" \
+ "\007RPNC" \
+ "\006LEEPRS" \
+ "\005EEPRW" \
+ "\004LB" \
+ "\003PROM" \
+ "\002RXBP" \
+ "\001EP3RC"
+
+/* Recieve Packet number based Flow Control register: offset 0x1a */
+#define RPNBFC_PN 0x7eU /* */
+#define RPNBFC_PN_SHIFT 1
+#define RPNBFC_FCP 0x01U /* enable rx flow control */
+
+/* Occupied Recieve FIFO based Flow Control register: offset 0x1b */
+#define ORFBFC_RXS 0x7eU /* */
+#define ORFBFC_RXS_SHIFT 1
+#define ORFBFC_RXS_UNIT 1024U
+#define ORFBFC_FCRXS 0x01U /* enable rx flow control */
+
+/* EP1 control register: offset 0x1c */
+#define EP1C_EP1S0E 0x80U /* send 0 enable */
+#define EP1C_ITMA 0x60U /* internal test mode A */
+#define EP1C_ITMB 0x1fU /* internal test mode B */
+
+#define EP1C_BITS \
+ "\020" \
+ "\010EP1S0E"
+
+/* Rx FIFO Control register: offset 0x1d */
+#define RXFC_EXT_SRAM 0x02 /* enable external 32k sram */
+#define RXFC_RX32PKT 0x01 /* max 32 packet */
+
+/* EEPROM offset register: offset 0x20 */
+#define EEOFFSET_MASK 0x3f /* eeprom offset address in word */
+
+/* EEPROM access control register: offset 0x23 */
+#define EECTRL_DONE 0x04
+#define EECTRL_RD 0x02
+#define EECTRL_WR 0x01
+
+#define EECTRL_BITS \
+ "\020" \
+ "\003DONE" \
+ "\002RD" \
+ "\001WR"
+
+/* PHY control register: offset 28 */
+#define PHYAC_DO 0x80U /* Done */
+#define PHYAC_RDPHY 0x40U /* read phy */
+#define PHYAC_WRPHY 0x20U /* write phy */
+#define PHYAC_PHYRA 0x1fU /* PHY register address */
+
+#define PHYCTRL_BITS \
+ "\020" \
+ "\010DO" \
+ "\007RDPHY" \
+ "\006WRPHY"
+
+/* Internal PHY control register: offset 7b */
+#define IPHYC_EPHY 0x02
+#define IPHYC_PHYR 0x01
+
+#define IPHYC_BITS \
+ "\020" \
+ "\002EPHY" \
+ "\001PHYR"
+
+/* GPIO45 register: offset 7c */
+#define GPIO54_5OE 0x20
+#define GPIO54_5O 0x10
+#define GPIO54_5I 0x08
+#define GPIO54_4OE 0x04
+#define GPIO54_4O 0x02
+#define GPIO54_4I 0x01
+
+/* GPIO01 register: offset 7e */
+#define GPIO10_1OE 0x20
+#define GPIO10_1O 0x10
+#define GPIO10_1I 0x08
+#define GPIO10_0OE 0x04
+#define GPIO10_0O 0x02
+#define GPIO10_0I 0x01
+
+/* GPIO23 register: offset 7f */
+#define GPIO32_3OE 0x20
+#define GPIO32_3O 0x10
+#define GPIO32_3I 0x08
+#define GPIO32_2OE 0x04
+#define GPIO32_2O 0x02
+#define GPIO32_2I 0x01
+
+/* rx status at the end of received packets */
+/* byte 0 and 1 is packet length in little endian */
+/* byte 2 is receive status */
+#define RSR_DRIBBLE 0x10
+#define RSR_CRC 0x08
+#define RSR_RUNT 0x04
+#define RSR_LONG 0x02
+#define RSR_MULTI 0x01
+
+#define RSR_ERRORS \
+ (RSR_DRIBBLE | RSR_CRC | RSR_RUNT | RSR_LONG | RSR_MULTI)
+
+#define RSR_BITS \
+ "\020" \
+ "\005DRIBBLE" \
+ "\004CRC" \
+ "\003RUNT" \
+ "\002LONG" \
+ "\001MULTI"
+/* byte 3 is reserved */
+
+/* TEST register: offset 80 */
diff --git a/usr/src/uts/common/io/upf/upf_usbgem.c b/usr/src/uts/common/io/upf/upf_usbgem.c
new file mode 100644
index 0000000000..5614803158
--- /dev/null
+++ b/usr/src/uts/common/io/upf/upf_usbgem.c
@@ -0,0 +1,1213 @@
+/*
+ * upf_usbgem.c : ADMtek an986/adm8511/adm8513/adm8515 USB to
+ * Fast Ethernet Driver for Solaris
+ */
+
+/*
+ * Copyright (c) 2004-2011 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "adm8511reg.h"
+
+char ident[] = "pegasus usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(val, label) \
+ if ((val) != USB_SUCCESS) { goto label; }
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int upf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (upf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for ADMtek Pegasus/PegasusII
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct upf_dev {
+ /*
+ * Misc HW information
+ */
+ uint8_t ec[3];
+ uint8_t mac_addr[ETHERADDRL];
+ int chip_type;
+#define CHIP_AN986 1 /* avoid 0 */
+#define CHIP_ADM8511 2 /* including adm8515 */
+#define CHIP_ADM8513 3
+ boolean_t phy_init_done;
+ uint8_t last_link_state;
+
+ uint16_t vid; /* vendor id */
+ uint16_t pid; /* product id */
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t upf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void upf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int upf_attach_chip(struct usbgem_dev *);
+static int upf_reset_chip(struct usbgem_dev *);
+static int upf_init_chip(struct usbgem_dev *);
+static int upf_start_chip(struct usbgem_dev *);
+static int upf_stop_chip(struct usbgem_dev *);
+static int upf_set_media(struct usbgem_dev *);
+static int upf_set_rx_filter(struct usbgem_dev *);
+static int upf_get_stats(struct usbgem_dev *);
+
+/* packet operations */
+static mblk_t *upf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *upf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* interrupt handler */
+static void upf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define UPF_REQ_GET_REGISTER 0xf0
+#define UPF_REQ_SET_REGISTER 0xf1
+#define OUTB(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ (v), \
+ /* wIndex */ (p), \
+ /* wLength */ 1, \
+ /* buf */ NULL, \
+ /* size */ 0)) != USB_SUCCESS) goto label;
+
+#define OUTW(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 2, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ (len), \
+ /* buf */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define INB(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 1, \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INW(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 2, \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ (len), \
+ /* buf */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+upf_reset_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t val;
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ bzero(lp->mac_addr, sizeof (lp->mac_addr));
+
+ lp->ec[1] = 0;
+ OUTB(dp, EC1, EC1_RM, &err, usberr);
+
+ for (i = 0; i < 1000; i++) {
+ INB(dp, EC1, &val, &err, usberr);
+ if ((val & EC1_RM) == 0) {
+ lp->ec[1] = val;
+ return (USB_SUCCESS);
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ cmn_err(CE_WARN, "!%s: failed to reset: timeout", dp->name);
+ return (USB_FAILURE);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * Setup an986/adm8511/adm8513/adm8515
+ */
+static int
+upf_init_chip(struct usbgem_dev *dp)
+{
+ uint64_t zero64 = 0;
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* ethernet control register 0 */
+ lp->ec[0] |= EC0_RXSA | EC0_RXCS;
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+
+ /* ethernet control reg1: will be set later in set_rx_filter() */
+
+ /* ethernet control register 2: will be set later in set_rx_filter() */
+ INB(dp, EC2, &lp->ec[2], &err, usberr);
+ lp->ec[2] |= EC2_RXBP | EC2_EP3RC;
+#ifdef CONFIG_VLAN
+ if (dp->misc_flag & USBGEM_VLAN) {
+ lp->ec[2] |= EC2_MEPL;
+ }
+#endif
+ OUTB(dp, EC2, lp->ec[2], &err, usberr);
+
+ /* Multicast address hash: clear */
+ OUTS(dp, MA, &zero64, 8, &err, usberr);
+
+ /* Ethernet ID : will be set later in upf_set_rx_filter() */
+
+ /* PAUSE timer */
+ OUTB(dp, PAUSETIMER, 0x1f, &err, usberr);
+
+ /* receive packet number based pause control:set in upf_set_media() */
+
+ /* occupied receive FIFO based pause control:set in upf_set_media() */
+
+ /* EP1 control: default */
+
+ /* Rx FIFO control */
+ if (lp->chip_type != CHIP_AN986) {
+ /* use 24K internal sram, 16pkts in fifo */
+ OUTB(dp, RXFC, 0, &err, usberr);
+ }
+
+ /* BIST contror: do nothing */
+ err = upf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (success)", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* enable RX and TX */
+ lp->ec[0] |= EC0_TXE | EC0_RXE;
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_stop_chip(struct usbgem_dev *dp)
+{
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* disable RX and TX */
+ lp->ec[0] &= ~(EC0_TXE | EC0_RXE);
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_get_stats(struct usbgem_dev *dp)
+{
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+upf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ /* hash table is 64 = 2^6 bit width */
+ return (usbgem_ether_crc_le(addr) & 0x3f);
+}
+
+static int
+upf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+#ifdef DEBUG_LEVEL
+ uint8_t reg0;
+ uint8_t reg1;
+ uint8_t reg2;
+#endif
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called, rxmode:%b",
+ dp->name, __func__, dp->rxmode, RXMODE_BITS));
+
+ /* reset rx mode */
+ lp->ec[0] &= ~EC0_RXMA;
+ lp->ec[2] &= ~EC2_PROM;
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ lp->ec[0] |= EC0_RXMA;
+ lp->ec[2] |= EC2_PROM;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 0) {
+ /* XXX - multicast hash table didin't work */
+ /* accept all multicast packets */
+ lp->ec[0] |= EC0_RXMA;
+ }
+
+ if (bcmp(dp->cur_addr.ether_addr_octet,
+ lp->mac_addr, ETHERADDRL) != 0) {
+
+ /* need to update mac address */
+ bcopy(dp->cur_addr.ether_addr_octet,
+ lp->mac_addr, ETHERADDRL);
+ OUTS(dp, EID,
+ lp->mac_addr, ETHERADDRL, &err, usberr);
+ }
+
+ /* update rx mode */
+ OUTS(dp, EC0, lp->ec, 3, &err, usberr);
+
+#if DEBUG_LEVEL > 0
+ INB(dp, EC0, &reg0, &err, usberr);
+ INB(dp, EC1, &reg1, &err, usberr);
+ INB(dp, EC2, &reg2, &err, usberr);
+
+ cmn_err(CE_CONT, "!%s: %s: returned, ec:%b %b %b",
+ dp->name, __func__,
+ reg0, EC0_BITS, reg1, EC1_BITS, reg2, EC2_BITS);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (err);
+}
+
+static int
+upf_set_media(struct usbgem_dev *dp)
+{
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ lp->ec[1] &= ~(EC1_FD | EC1_100M);
+
+ /* select duplex */
+ if (dp->full_duplex) {
+ lp->ec[1] |= EC1_FD;
+ }
+
+ /* select speed */
+ if (dp->speed == USBGEM_SPD_100) {
+ lp->ec[1] |= EC1_100M;
+ }
+
+ /* rx flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_RX_PAUSE:
+ lp->ec[0] |= EC0_RXFCE;
+ break;
+
+ default:
+ lp->ec[0] &= ~EC0_RXFCE;
+ break;
+ }
+
+ /* tx flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_TX_PAUSE:
+ if (lp->chip_type != CHIP_AN986) {
+ /* pegasus II has internal 24k fifo */
+ OUTB(dp, ORFBFC,
+ (12 << ORFBFC_RXS_SHIFT) | ORFBFC_FCRXS,
+ &err, usberr);
+
+ /* 16 packts can be stored in rx fifo */
+ OUTB(dp, RPNBFC_PN,
+ (8 << RPNBFC_PN_SHIFT) | RPNBFC_FCP,
+ &err, usberr);
+ } else {
+ /* an986 has external 32k fifo */
+ OUTB(dp, ORFBFC,
+ (16 << ORFBFC_RXS_SHIFT) | ORFBFC_FCRXS,
+ &err, usberr);
+
+ /* AN986 fails to link up when RPNBFC is enabled */
+ OUTB(dp, RPNBFC, 0, &err, usberr);
+ }
+ break;
+
+ default:
+ OUTB(dp, ORFBFC, 0, &err, usberr);
+ OUTB(dp, RPNBFC, 0, &err, usberr);
+ break;
+ }
+
+ /* update ether control registers */
+ OUTS(dp, EC0, lp->ec, 2, &err, usberr);
+ DPRINTF(0, (CE_CONT, "!%s: %s: returned, ec0:%b, ec1:%b",
+ dp->name, __func__, lp->ec[0], EC0_BITS, lp->ec[1], EC1_BITS));
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: failed to write ec1", dp->name, __func__);
+ return (err);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+upf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ size_t len;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ int msglen;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ len = msgdsize(mp);
+ if (len < ETHERMIN) {
+ len = ETHERMIN;
+ }
+
+ /* allocate msg block */
+ msglen = len + sizeof (uint16_t);
+
+ /* avoid usb controller bug */
+ if ((msglen & 0x3f) == 0) {
+ /* add a header for additional 0-length usb message */
+ msglen += sizeof (uint16_t);
+ }
+
+ if ((new = allocb(msglen, 0)) == NULL) {
+ return (NULL);
+ }
+
+ /* copy contents of the buffer */
+ new->b_wptr = new->b_rptr + msglen;
+ bp = new->b_rptr;
+
+ /* the nic requires a two byte header of the packet size */
+ bp[0] = (uint8_t)len;
+ bp[1] = (uint8_t)(len >> 8);
+ bp += sizeof (uint16_t);
+
+ /* copy the payload */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ if (len > 0) {
+ bcopy(tp->b_rptr, bp, len);
+ bp += len;
+ }
+ }
+
+ /* clear ethernet pads and additional usb header if we have */
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ return (new);
+}
+
+static void
+upf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+upf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *p;
+ uint16_t rxhd;
+ uint_t len;
+ uint8_t rsr;
+ struct upf_dev *lp = dp->private;
+
+ ASSERT(mp != NULL);
+
+#ifdef DEBUG_LEVEL
+ len = msgdsize(mp);
+ DPRINTF(2, (CE_CONT, "!%s: time:%d %s: cont:%p",
+ dp->name, ddi_get_lbolt(), __func__, len, mp->b_cont));
+
+ if (upf_debug > 3) {
+ upf_dump_packet(dp, mp->b_rptr, max(6, len));
+ }
+#endif
+ /* get the length of Rx packet */
+ p = mp->b_wptr - 4;
+ rsr = p[3];
+ if (lp->chip_type == CHIP_ADM8513) {
+ /* As Rx packets from ADM8513 have two byte header, remove it */
+ p = mp->b_rptr;
+ len = ((p[1] << 8) | p[0]) & 0x0fff;
+ mp->b_rptr += 2;
+ } else {
+ len = (((p[1] << 8) | p[0]) & 0x0fff) - ETHERFCSL - 4;
+ }
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: rsr:%b len:%d",
+ dp->name, __func__, rsr, RSR_BITS, len));
+
+ /* check if error happen */
+ if (rsr & RSR_ERRORS) {
+ DPRINTF(0, (CE_CONT, "!%s: rsr:%b", dp->name, rsr, RSR_BITS));
+ if (rsr & (RSR_CRC | RSR_DRIBBLE)) {
+ dp->stats.frame++;
+ }
+ if (rsr & RSR_LONG) {
+ dp->stats.frame_too_long++;
+ }
+ if (rsr & RSR_RUNT) {
+ dp->stats.runt++;
+ }
+
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#ifndef CONFIG_VLAN
+ /* check packet size */
+ if (len > ETHERMAX) {
+ /* too long */
+ dp->stats.frame_too_long++;
+ dp->stats.errrcv++;
+ return (NULL);
+ } else if (len < ETHERMIN) {
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#endif
+ /* remove tailing crc and rx status fields */
+ mp->b_wptr = mp->b_rptr + len;
+ ASSERT(mp->b_next == NULL);
+ return (mp);
+}
+
+/*
+ * Device depend interrupt handler
+ */
+static void
+upf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *bp;
+ struct upf_dev *lp = dp->private;
+
+ bp = mp->b_rptr;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: size:%d, %02x %02x %02x %02x %02x %02x %02x %02x",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]));
+
+ if ((lp->last_link_state ^ bp[5]) & 1) {
+ DPRINTF(1, (CE_CONT, "!%s:%s link status changed:",
+ dp->name, __func__));
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_link_state = bp[5] & 1;
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+upf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint8_t phyctrl;
+ uint16_t val;
+ int i;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+ ASSERT(index >= 0 && index < 32);
+
+ *errp = USB_SUCCESS;
+
+ /* set PHYADDR */
+ OUTB(dp, PHYA, dp->mii_phy_addr, errp, usberr);
+
+ /* Initiate MII read transaction */
+ OUTB(dp, PHYAC, index | PHYAC_RDPHY, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, PHYAC, &phyctrl, errp, usberr);
+ if (phyctrl & PHYAC_DO) {
+ /* done */
+ INW(dp, PHYD, &val, errp, usberr);
+ DPRINTF(4, (CE_CONT, "!%s: %s: return %04x",
+ dp->name, __func__, val));
+ return (val);
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout detected", dp->name, __func__);
+ *errp = USB_FAILURE;
+ return (0);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+upf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ int i;
+ uint8_t phyctrl;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s called index:%d val:0x%04x",
+ dp->name, __func__, index, val));
+ ASSERT(index >= 0 && index < 32);
+
+ *errp = USB_SUCCESS;
+
+ OUTW(dp, PHYD, val, errp, usberr);
+ OUTB(dp, PHYA, dp->mii_phy_addr, errp, usberr);
+ OUTB(dp, PHYAC, index | PHYAC_WRPHY, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, PHYAC, &phyctrl, errp, usberr);
+ if (phyctrl & PHYAC_DO) {
+ /* done */
+ return;
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ cmn_err(CE_WARN, "!%s: %s: timeout detected", dp->name, __func__);
+ *errp = USB_FAILURE;
+ return;
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+}
+
+
+static int
+upf_enable_phy(struct usbgem_dev *dp)
+{
+ uint8_t val;
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ /*
+ * first, try to enable internal phy
+ */
+ INB(dp, IPHYC, &val, &err, usberr);
+ val = (val | IPHYC_EPHY) & ~IPHYC_PHYR;
+ OUTB(dp, IPHYC, val, &err, usberr);
+
+ INB(dp, IPHYC, &val, &err, usberr);
+ DPRINTF(0, (CE_CONT, "!%s: %s: IPHYC: %b",
+ dp->name, __func__, val, IPHYC_BITS));
+ if (val) {
+ /* reset internal phy */
+ OUTB(dp, IPHYC, val | IPHYC_PHYR, &err, usberr);
+ OUTB(dp, IPHYC, val, &err, usberr);
+ delay(drv_usectohz(10000));
+
+ /* identify the chip generation */
+ OUTB(dp, 0x83, 0xa5, &err, usberr);
+ INB(dp, 0x83, &val, &err, usberr);
+ if (val == 0xa5) {
+ lp->chip_type = CHIP_ADM8513;
+ } else {
+ /* adm8511 or adm8515 */
+ lp->chip_type = CHIP_ADM8511;
+ }
+ dp->ugc.usbgc_mii_hw_link_detection = B_TRUE;
+ } else {
+ /*
+ * It should be AN986 which doesn't have an internal PHY.
+ * We need to setup gpio ports in AN986, which are
+ * connected to external PHY control pins.
+ */
+ lp->chip_type = CHIP_AN986;
+
+ /* reset external phy */
+ /* output port#0 L, port#1 L */
+ OUTB(dp, GPIO10, GPIO10_0O | GPIO10_0OE, &err, usberr);
+
+ /* output port#0 H, port#1 L */
+ OUTB(dp, GPIO10,
+ GPIO10_0O | GPIO10_0OE | GPIO10_1OE, &err, usberr);
+
+ /* hw link detection doesn't work correctly */
+ dp->ugc.usbgc_mii_hw_link_detection = B_FALSE;
+ }
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+upf_mii_probe(struct usbgem_dev *dp)
+{
+ int err;
+ uint16_t val;
+ struct upf_dev *lp = dp->private;
+
+ if (!lp->phy_init_done) {
+ upf_enable_phy(dp);
+ lp->phy_init_done = B_TRUE;
+ }
+
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+upf_mii_init(struct usbgem_dev *dp)
+{
+ uint16_t val;
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ if (!lp->phy_init_done) {
+ upf_enable_phy(dp);
+ }
+ lp->phy_init_done = B_FALSE;
+
+ if (lp->chip_type == CHIP_AN986 &&
+ (lp->vid == 0x0db7 /* elecom */ ||
+ lp->vid == 0x066b /* linksys */ ||
+ lp->vid == 0x077b /* linksys */ ||
+ lp->vid == 0x2001 /* dlink */)) {
+ /* special treatment for Linksys products */
+ val = upf_mii_read(dp, 0x1b, &err) | 0x4;
+ upf_mii_write(dp, 0x1b, val, &err);
+ }
+ return (err);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static uint16_t
+upf_read_eeprom(struct usbgem_dev *dp, int index, int *errp)
+{
+ int i;
+ uint8_t eectrl;
+ uint16_t data;
+
+ *errp = USB_SUCCESS;
+
+ OUTB(dp, EECTRL, 0, errp, usberr);
+
+ OUTB(dp, EEOFFSET, index, errp, usberr);
+ OUTB(dp, EECTRL, EECTRL_RD, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, EECTRL, &eectrl, errp, usberr);
+ if (eectrl & EECTRL_DONE) {
+ INW(dp, EEDATA, &data, errp, usberr);
+ return (data);
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ *errp = USB_FAILURE;
+ return (0);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+upf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+
+ cmn_err(CE_CONT, "!%s: %s dump:", dp->name, __func__);
+
+ for (i = 0; i < size; i += 4) {
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i*2,
+ upf_read_eeprom(dp, i + 0, &err),
+ upf_read_eeprom(dp, i + 1, &err),
+ upf_read_eeprom(dp, i + 2, &err),
+ upf_read_eeprom(dp, i + 3, &err));
+ }
+}
+
+static int
+upf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint16_t val;
+ uint8_t *mac;
+ struct upf_dev *lp = dp->private;
+
+ /*
+ * Read mac address from EEPROM
+ */
+ mac = dp->dev_addr.ether_addr_octet;
+ for (i = 0; i < 3; i++) {
+ val = upf_read_eeprom(dp, i, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ mac[i*2+0] = (uint8_t)val;
+ mac[i*2+1] = (uint8_t)(val >> 8);
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "%s: %s: mac: %02x:%02x:%02x:%02x:%02x:%02x",
+ dp->name, __func__,
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]));
+
+ dp->misc_flag = 0;
+#ifdef CONFIG_VLAN
+ dp->misc_flag |= USBGEM_VLAN;
+#endif
+#if DEBUG_LEVEL > 3
+ upf_eeprom_dump(dp, 0x80);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usb error detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+upfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int unit;
+ uint32_t tcr;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct upf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 16;
+
+ ugcp->usbgc_rx_header_len = 4;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_NONE;
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = MII_AN_TIMEOUT/2;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &upf_attach_chip;
+ ugcp->usbgc_reset_chip = &upf_reset_chip;
+ ugcp->usbgc_init_chip = &upf_init_chip;
+ ugcp->usbgc_start_chip = &upf_start_chip;
+ ugcp->usbgc_stop_chip = &upf_stop_chip;
+ ugcp->usbgc_multicast_hash = &upf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &upf_set_rx_filter;
+ ugcp->usbgc_set_media = &upf_set_media;
+ ugcp->usbgc_get_stats = &upf_get_stats;
+ ugcp->usbgc_interrupt = &upf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &upf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &upf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &upf_mii_probe;
+ ugcp->usbgc_mii_init = &upf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &upf_mii_read;
+ ugcp->usbgc_mii_write = &upf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct upf_dev), KM_SLEEP);
+
+ lp->vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "usb-vendor-id", -1);
+ lp->pid = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "usb-product-id", -1);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct upf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct upf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+ if (cmd == DDI_RESUME) {
+ dp = USBGEM_GET_DEV(dip);
+ lp = dp->private;
+ lp->phy_init_done = B_FALSE;
+
+ return (usbgem_resume(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+static int
+upfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(upf_ops, upfattach, upfdetach);
+#else
+static struct module_info upfminfo = {
+ 0, /* mi_idnum */
+ "upf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ 32*1024, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit upfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &upfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit upfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &upfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab upf_info = {
+ &upfrinit, /* st_rdinit */
+ &upfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_upf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &upf_info, /* cb_stream */
+ D_MP /* cb_flag */
+};
+
+static struct dev_ops upf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ upfattach, /* devo_attach */
+ upfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_upf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+
+};
+#endif
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &upf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!upf: _init: called"));
+
+ status = usbgem_mod_init(&upf_ops, "upf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&upf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!upf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&upf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/urf/rtl8150reg.h b/usr/src/uts/common/io/urf/rtl8150reg.h
new file mode 100644
index 0000000000..7cba53356e
--- /dev/null
+++ b/usr/src/uts/common/io/urf/rtl8150reg.h
@@ -0,0 +1,218 @@
+/*
+ * @(#)rtl8150reg.h 1.1 04/09/16
+ * Macro definitions for Realtek 8150 USB to fast ethernet controller
+ * based on Realtek RTL8150 data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+/*
+ * Register offset
+ */
+#define IDR 0x0120 /* Base of ID registers */
+#define MAR 0x0126 /* Base of multicast registers */
+#define CR 0x012e /* Command register */
+#define TCR 0x012f /* Transmit Configuration register */
+#define RCR 0x0130 /* Receive Configuration register */
+#define TSR 0x0132 /* Transmit Status register */
+#define RSR 0x0133 /* Receive Status register */
+#define CON0 0x0135 /* Configuration register 0 */
+#define CON1 0x0136 /* Configuration register 1 */
+#define MSR 0x0137 /* Media Status register */
+#define PHYADD 0x0138 /* PHY address register */
+#define PHYDAT 0x0139 /* PHY data register */
+#define PHYCNT 0x013b /* PHY control register */
+#define GPPC 0x013d /* General purpose pin control */
+#define WAKECNT 0x013e /* Wake up event control */
+#define BMCR 0x0140 /* Basic Mode Control register */
+#define BMSR 0x0142 /* Basic Mode Status register */
+#define ANAR 0x0144 /* Auto Negotiation Advertisement register */
+#define ANLP 0x0146 /* Auto Negotiation Link Partner register */
+#define ANER 0x0148 /* Auto Negotiation Expansion register */
+#define NWAYT 0x014a /* Nway test register */
+#define CSCR 0x014c /* CS configuration register */
+#define CRC0 0x014e /* Power management register for wakeup frame0 */
+#define CRC1 0x0150 /* Power management register for wakeup frame1 */
+#define CRC2 0x0152 /* Power management register for wakeup frame2 */
+#define CRC3 0x0154 /* Power management register for wakeup frame3 */
+#define CRC4 0x0156 /* Power management register for wakeup frame4 */
+#define BYTEMASK0 0x0158 /* Power management wakeup frame0 bytemask */
+#define BYTEMASK1 0x0160 /* Power management wakeup frame1 bytemask */
+#define BYTEMASK2 0x0168 /* Power management wakeup frame2 bytemask */
+#define BYTEMASK3 0x0170 /* Power management wakeup frame3 bytemask */
+#define BYTEMASK4 0x0178 /* Power management wakeup frame4 bytemask */
+#define PHY1 0x0180 /* PHY parameter 1 */
+#define PHY2 0x0184 /* PHY parameter 2 */
+#define TW1 0x0186 /* Twister parameter 1 */
+
+/*
+ * Bit field definitions
+ */
+/* CR : Command register (uint8_t) */
+#define CR_WEPROM 0x20 /* EEPROM write enable */
+#define CR_SOFT_RST 0x10 /* Reset */
+#define CR_RE 0x08 /* Ethernet receive enable */
+#define CR_TE 0x04 /* Ethernet transmit enable */
+#define CR_EP3CLREN 0x02 /* clear performance counter after EP3 */
+#define CR_AUTOLOAD 0x01 /* autoload contents of 93c46 */
+
+#define CR_BITS "\020\006WEPROM\005SOFT_RST\004RE\003TE\002EP3CLREN\001AUTOLOAD"
+
+/* TCR: Transmit Configuration register */
+#define TCR_TXRR 0xc0 /* Tx retry count */
+#define TCR_TXRR_SHIFT 6
+#define TCR_IFG 0x18 /* Interframe Gap */
+#define TCR_IFG_SHIFT 3
+#define TCR_IFG_802_3 (3 << TCR_IFG_SHIFT) /* 802.3 standard */
+#define TCR_NOCRC 0x01 /* Inhibit Appending CRC */
+
+#define TCR_BITS "\020\001NOCRC"
+
+/* Receive Configuration register */
+#define RCR_TAIL 0x0080 /* Rx header forward to host in CRC field */
+#define RCR_AER 0x0040 /* Accept Error packet */
+#define RCR_AR 0x0020 /* Accept runt */
+#define RCR_AM 0x0010 /* Accept multicast */
+#define RCR_AB 0x0008 /* Accept broadcast */
+#define RCR_AD 0x0004 /* Accept physical match */
+#define RCR_AAM 0x0002 /* Accept all Multicast */
+#define RCR_AAP 0x0001 /* Accept all physical */
+
+#define RCR_ACCEPT_MODE \
+ (RCR_AER | RCR_AR | RCR_AM | RCR_AB | RCR_AD | RCR_AAM | RCR_AAP)
+
+#define RCR_BITS \
+ "\020\010TAIL\007AER\006AR\005AM\004AB\003AD\002AAM\001AAP"
+
+/* Transmit Status register */
+
+#define TSR_ECOL 0x20 /* excessive collision indication */
+#define TSR_LCOL 0x10 /* late collision indication */
+#define TSR_LOSS_CRS 0x08 /* lost of carrier indication */
+#define TSR_JBR 0x04 /* jabber time out indication */
+#define TSR_BUF_EMPTY 0x02 /* Tx buffer is empty */
+#define TSR_BUF_FULL 0x01 /* Tx buffer is full */
+
+#define TSR_BITS \
+ "\020" \
+ "\006ECOL" \
+ "\005LCOL" \
+ "\004LOSS_CRS" \
+ "\003JBR" \
+ "\002BUF_EMPTY" \
+ "\001BUF_FULL"
+
+/* Receive status register in Rx packet field */
+#define RSR_WEVENT 0x80 /* Wakeup event indication */
+#define RSR_RX_BUF_FULL 0x40 /* Receive buffer full indication */
+#define RSR_LKCHG 0x20 /* Link change indication */
+#define RSR_RUNT 0x10 /* short packet indication */
+#define RSR_LONG 0x08 /* Long packet indication*/
+#define RSR_CRC 0x04 /* CRC error indication*/
+#define RSR_FAE 0x02 /* Frame alignment error */
+#define RSR_ROK 0x01 /* Receive OK indication */
+
+#define RSR_ERRS (RSR_RUNT | RSR_LONG | RSR_CRC | RSR_FAE)
+#define RSR_BITS \
+ "\020" \
+ "\010WEVENT" \
+ "\007RX_BUF_FULL" \
+ "\006LKCHG" \
+ "\005RUNT" \
+ "\004LONG" \
+ "\003CRC" \
+ "\002FAE" \
+ "\001ROK"
+
+/* Config 0 */
+
+#define CON0_SUSLED 0x80
+#define CON0_PARM_EN 0x40 /* parameter enable */
+#define CON0_LDPS 0x08
+#define CON0_MSEL 0x04 /* media select 1:MII, 0:auto */
+#define CON0_LEDS 0x03 /* LED pattern */
+
+/* Config 1 */
+#define CON0_BWF 0x40 /* Broadcast wakeup function 1:on 0:off */
+#define CON0_MWF 0x20 /* Multicast wakeup function 1:on 0:off */
+#define CON0_UWF 0x10 /* Unicast wakeup function 1:on 0:off */
+#define CON0_LONGWF1 0x02 /* */
+#define CON0_LONGWF0 0x01 /* */
+
+
+/* MSR : Media Status register */
+#define MSR_TXFCE 0x80 /* Tx Flow control enable */
+#define MSR_RXFCE 0x40 /* Rx Flow control enable */
+#define MSR_DUPLEX 0x10 /* full duplex */
+#define MSR_SPEED_100 0x08 /* 100Mbps mode */
+#define MSR_LINK 0x04 /* link status */
+#define MSR_TXPF 0x02 /* 8150 sends pause packet */
+#define MSR_RXPF 0x01 /* 8150 is in backoff state*/
+
+#define MSR_BITS \
+ "\020" \
+ "\010TXFCE" \
+ "\007RXFCE" \
+ "\005DUPLEX" \
+ "\004SPEED_100" \
+ "\003LINK" \
+ "\002TXPF" \
+ "\001RXPF"
+
+/* MII PHY Address */
+#define PHYADD_MASK 0x1f
+
+/* MII PHY Data */
+#define PHYCNT_OWN 0x40 /* 8150 owns:1 not owns:0 */
+#define PHYCNT_RWCR 0x20 /* write:1 read:0 */
+#define PHYCNT_PHYOFF 0x1f
+
+/* BMCR (almost same with MII_CONTROL register) */
+#define BMCR_RESET 0x8000 /* PHY reset */
+#define BMCR_Spd_Set 0x2000 /* 100Mbps */
+#define BMCR_ANE 0x1000 /* auto negotiation enable */
+#define BMCR_RSA 0x0200 /* restart auto negotiation */
+#define BMCR_duplex 0x0100 /* 100Mbps */
+
+/* Basic mode status register */
+/* Auto-negotiation Advertisement register */
+/* Auto-negotiation Link Partner Ability register */
+/* Auto-negotiation Expansion register */
+
+/* Nway test register */
+#define NWAYT_NWLPBK 0x0080
+#define NWAYT_ENNWLE 0x0008
+#define NWAYT_FLAGABD 0x0004
+#define NWAYT_FLAGPDF 0x0002
+#define NWAYT_FLAGLSC 0x0001
+
+/* CS configuration register */
+#define CS_TESTFUN 0x8000 /* */
+#define CS_LD 0x0200 /* */
+#define CS_HEARTBEAT 0x0100 /* */
+#define CS_JBEN 0x0080 /* */
+#define CS_F_LINK100 0x0040 /* */
+#define CS_F_CONNECT 0x0020 /* */
+#define CS_CON_STATUS 0x0008 /* */
+#define CS_CON_STATUS_EN 0x0004 /* */
+#define CS_PASS_SCR 0x0001 /* bypass scramble function */
+
+/*
+ * header format of rx packet
+ */
+#define RXHD_MULT 0x8000 /* multicast packet */
+#define RXHD_PHYS 0x4000 /* physical match packet */
+#define RXHD_RUNT 0x2000 /* too short */
+#define RXHD_VALID 0x1000 /* packet is ok */
+#define RXHD_BYTECNT 0x0fff /* rx byte count */
+
+#define RXHD_BITS \
+ "\020" \
+ "\020MULT" \
+ "\017PHYS" \
+ "\016RUNT" \
+ "\015VALID"
+/*
+ * Offset to EPROM contents
+ */
+#define URF_EEPROM_BASE 0x1200
+#define EPROM_EthernetID 0x0002
diff --git a/usr/src/uts/common/io/urf/urf_usbgem.c b/usr/src/uts/common/io/urf/urf_usbgem.c
new file mode 100644
index 0000000000..f61c8e3502
--- /dev/null
+++ b/usr/src/uts/common/io/urf/urf_usbgem.c
@@ -0,0 +1,1039 @@
+/*
+ * urf_usbgem.c : Realtek RTL8150 USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2003-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+#include "usbgem_mii.h"
+#include "rtl8150reg.h"
+
+char ident[] = "rtl8150 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define ROUNDUP2(x, y) (((x)+(y)-1) & ~((y)-1))
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int urf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (urf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for rtl8150
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct chip_info {
+ int flags;
+ char *name;
+ int type;
+};
+
+#define CHIPTABLESIZE (sizeof (chiptbl_8150) / sizeof (struct chip_info))
+
+struct urf_dev {
+ /*
+ * Misc HW information
+ */
+ struct chip_info *chip;
+ uint8_t cr;
+ uint8_t tsr;
+ uint16_t rcr;
+ uint8_t txok_cnt;
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t urf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void urf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int urf_attach_chip(struct usbgem_dev *);
+static int urf_reset_chip(struct usbgem_dev *);
+static int urf_init_chip(struct usbgem_dev *);
+static int urf_start_chip(struct usbgem_dev *);
+static int urf_stop_chip(struct usbgem_dev *);
+static int urf_set_media(struct usbgem_dev *);
+static int urf_set_rx_filter(struct usbgem_dev *);
+static int urf_get_stats(struct usbgem_dev *);
+
+/* packet operations */
+static mblk_t *urf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *urf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUTB(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ 1, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTW(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ 2, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define IN(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ sizeof ((*vp)), \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * variables
+ */
+/* =============================================================== */
+static int urf_ppa = 0;
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+urf_reset_chip(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint8_t reg;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ lp->cr = 0;
+ OUTB(dp, CR, lp->cr | CR_SOFT_RST, &err, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, CR, &reg, &err, usberr);
+ if ((reg & CR_SOFT_RST) == 0) {
+ return (USB_SUCCESS);
+ }
+ }
+ /* time out */
+ cmn_err(CE_WARN, "%s: failed to reset: timeout", dp->name);
+ return (USB_FAILURE);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * Setup rtl8150
+ */
+static int
+urf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* ID registers: set later by urf_set_rx_filter */
+
+ /* Multicast registers: set later by urf_set_rx_filter */
+
+ /* Command register : Enable Tx and Rx before writing TCR and RCR */
+ lp->cr |= CR_RE | CR_TE;
+ OUTB(dp, CR, lp->cr, &err, usberr);
+
+ /* Transmit configration register : */
+ OUTB(dp, TCR, TCR_IFG_802_3, &err, usberr);
+
+ /* Receive configuration register : disable rx filter */
+ lp->rcr = RCR_TAIL | RCR_AER | RCR_AR;
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+#ifdef notdef
+ /* Media status register */
+ err = urf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+#endif
+ /* Configuration register 0: no need to change */
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (success)", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+urf_start_chip(struct usbgem_dev *dp)
+{
+ struct urf_dev *lp = dp->private;
+
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static int
+urf_stop_chip(struct usbgem_dev *dp)
+{
+ return (urf_reset_chip(dp));
+}
+
+static int
+urf_get_stats(struct usbgem_dev *dp)
+{
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+urf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_be(addr));
+}
+
+static int
+urf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint16_t mode;
+ uint8_t mhash[8];
+ int err;
+ int16_t rcr;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & (RCR_AB | RCR_AD | RCR_AAM | RCR_AAP | RCR_AM)) {
+#ifdef notdef
+ /* disable rx filter before changing it. */
+ lp->rcr &= ~(RCR_AB | RCR_AD | RCR_AAM | RCR_AAP | RCR_AM);
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+#else
+ /* receive all packets while we change rx filter*/
+ OUTW(dp, RCR, lp->rcr | RCR_AAM | RCR_AAP, &err, usberr);
+#endif
+ }
+
+ mode = RCR_AB /* accept broadcast */
+ | RCR_AD; /* accept physical match */
+ bzero(mhash, sizeof (mhash));
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_AAM | RCR_AAP;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 64/2) {
+ /* accept all multicast packets */
+ mode |= RCR_AAM;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ mode |= RCR_AM;
+ for (i = 0; i < dp->mc_count; i++) {
+ uint_t h;
+ /* hash table is 64 = 2^6 bit width */
+ h = dp->mc_list[i].hash >> (32 - 6);
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+ lp->rcr |= mode;
+
+ /* set mac address */
+ OUTS(dp, IDR, dp->cur_addr.ether_addr_octet, ETHERADDRL, &err, usberr);
+
+ /* set multicast hash table */
+ if (mode & RCR_AM) {
+ /* need to set up multicast hash table */
+ OUTS(dp, MAR, mhash, sizeof (mhash), &err, usberr);
+ }
+
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+
+#if DEBUG_LEVEL > 2
+ IN(dp, RCR, &rcr, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, rcr, RCR_BITS);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+urf_set_media(struct usbgem_dev *dp)
+{
+ uint8_t new;
+ uint8_t old;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* select duplex: do nothing */
+
+ /* select speed: do nothing */
+
+ /* flow control */
+ IN(dp, MSR, &old, &err, usberr);
+
+
+ /* setup flow control */
+ new = old & ~(MSR_TXFCE | MSR_RXFCE);
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ new |= MSR_TXFCE | MSR_RXFCE;
+ break;
+
+ case FLOW_CONTROL_TX_PAUSE:
+ new |= MSR_TXFCE;
+ break;
+
+ case FLOW_CONTROL_RX_PAUSE:
+ new |= MSR_RXFCE;
+ break;
+
+ case FLOW_CONTROL_NONE:
+ default:
+ break;
+ }
+
+ if (new != old) {
+ OUTB(dp, MSR, new, &err, usberr);
+ }
+ DPRINTF(2, (CE_CONT, "!%s: %s: returned", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+urf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ size_t len;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+
+ len = msgdsize(mp);
+
+ if (len < ETHERMIN || mp->b_cont != NULL || (len & 0x3f) == 0) {
+ /*
+ * re-allocate mp
+ */
+ len = max(len, ETHERMIN);
+
+ if ((len & 0x3f) == 0) {
+ /* workaround for buggy USB hba */
+ len++;
+ }
+
+ if ((new = allocb(len, 0)) == NULL) {
+ return (NULL);
+ }
+
+ /* copy contents of the buffer */
+ new->b_wptr = new->b_rptr + len;
+ bp = new->b_rptr;
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, len);
+ bp += len;
+ }
+
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ mp = new;
+ }
+
+ return (mp);
+}
+
+static void
+urf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+urf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *p;
+ uint16_t rxhd;
+ uint_t len;
+
+ ASSERT(mp != NULL);
+ len = msgdsize(mp);
+#ifdef DEBUG_LEVEL
+ DPRINTF(2, (CE_CONT, "!%s: time:%d %s: len:%d cont:%p",
+ dp->name, ddi_get_lbolt(), __func__, len, mp->b_cont));
+
+ if (urf_debug > 2) {
+ urf_dump_packet(dp, mp->b_rptr, max(6, len));
+ }
+#endif
+ if (len < ETHERMIN + ETHERFCSL) {
+ /* Too short */
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+
+ /* get Rx header which is placed at tail of the packet. */
+ p = mp->b_wptr - 4;
+ rxhd = (p[1] << 8) | p[0];
+ len = rxhd & RXHD_BYTECNT;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: rsr:%b len:%d",
+ dp->name, __func__, rxhd, RXHD_BITS, len));
+
+ /* check if error happen */
+ if ((rxhd & (RXHD_VALID)) == 0) {
+ DPRINTF(-1, (CE_CONT, "!%s: %s: rxhd:%b",
+ dp->name, __func__, rxhd, RXHD_BITS));
+ if (rxhd & RXHD_RUNT) {
+ dp->stats.runt++;
+ }
+
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#ifdef notdef
+ /* check packet size */
+ if (len > ETHERMAX + ETHERFCSL) {
+ /* too long */
+ dp->stats.frame_too_long++;
+ dp->stats.errrcv++;
+ return (NULL);
+ } else if (len < ETHERMIN + ETHERFCSL) {
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#endif
+ /* remove tailing crc field */
+ mp->b_wptr -= ETHERFCSL;
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+urf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ int reg;
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+
+ *errp = USB_SUCCESS;
+
+ switch (index) {
+ case MII_CONTROL:
+ reg = BMCR;
+ break;
+
+ case MII_STATUS:
+ reg = BMSR;
+ break;
+
+ case MII_AN_ADVERT:
+ reg = ANAR;
+ break;
+
+ case MII_AN_LPABLE:
+ reg = ANLP;
+ break;
+
+ case MII_AN_EXPANSION:
+ reg = ANER;
+ break;
+
+ default:
+ return (0);
+ }
+
+ IN(dp, reg, &val, errp, usberr);
+
+ if (index == MII_STATUS) {
+ uint8_t msr;
+ /*
+ * Fix MII status register as it does't have LINKUP and
+ * MFPRMBLSUPR bits.
+ */
+ IN(dp, MSR, &msr, errp, usberr);
+
+ val |= (MII_STATUS_MFPRMBLSUPR | MII_STATUS_LINKUP);
+ if ((msr & MSR_LINK) == 0) {
+ val &= ~MII_STATUS_LINKUP;
+ }
+ }
+
+ return (val);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+
+ return (0);
+}
+
+static void
+urf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ int reg;
+
+ DPRINTF(5, (CE_CONT, "!%s: %s called", dp->name, __func__));
+
+ *errp = USB_SUCCESS;
+
+ switch (index) {
+ case MII_CONTROL:
+ reg = BMCR;
+ break;
+
+ case MII_STATUS:
+ reg = BMSR;
+ break;
+
+ case MII_AN_ADVERT:
+ reg = ANAR;
+ break;
+
+ case MII_AN_LPABLE:
+ reg = ANLP;
+ break;
+
+ case MII_AN_EXPANSION:
+ reg = ANER;
+ break;
+
+ default:
+ return;
+ }
+
+ OUTW(dp, reg, val, errp, usberr);
+usberr:
+ ;
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static void
+urf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint16_t w0, w1, w2, w3;
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+ for (i = URF_EEPROM_BASE; i < size + URF_EEPROM_BASE; i += 8) {
+ IN(dp, i + 0, &w0, &err, usberr);
+ IN(dp, i + 2, &w1, &err, usberr);
+ IN(dp, i + 4, &w2, &err, usberr);
+ IN(dp, i + 6, &w3, &err, usberr);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i - URF_EEPROM_BASE, w0, w1, w2, w3);
+ }
+usberr:
+ ;
+}
+
+static int
+urf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t old;
+ uint_t new;
+ uint8_t reg;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ /*
+ * setup flow control bit in eeprom
+ */
+ IN(dp, URF_EEPROM_BASE + 9, &old, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: eeprom offset 9: %02x", dp->name, old));
+
+ if (dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE) {
+ /* enable PAUSE bit */
+ new = old | 0x04;
+ } else {
+ /* clear PAUSE bit */
+ new = old & ~0x04;
+ }
+ if (new != old) {
+ /* make eeprom writable */
+ OUTB(dp, CR, lp->cr | CR_WEPROM, &err, usberr);
+
+ /* eerom allows only word access for writing */
+ IN(dp, URF_EEPROM_BASE + 8, &reg, &err, usberr);
+ new = (new << 8) | reg;
+
+ OUTW(dp, URF_EEPROM_BASE + 8, new, &err, usberr);
+
+ /* make eeprom non-writable */
+ OUTB(dp, CR, lp->cr, &err, usberr);
+ }
+
+ /*
+ * load EEPROM contents into nic
+ */
+ OUTB(dp, CR, lp->cr | CR_AUTOLOAD, &err, usberr);
+ CHECK_AND_JUMP(err, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, CR, &reg, &err, usberr);
+ if ((reg & CR_AUTOLOAD) == 0) {
+ goto autoload_done;
+ }
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "%s: %s: failed to autoload: timeout",
+ dp->name, __func__);
+ goto usberr;
+
+autoload_done:
+ /*
+ * mac address in EEPROM has loaded to ID registers.
+ */
+ INS(dp, IDR, dp->dev_addr.ether_addr_octet, ETHERADDRL, &err, usberr);
+
+ /* no need to scan phy */
+ dp->mii_phy_addr = -1;
+
+#if DEBUG_LEVEL > 2
+ urf_eeprom_dump(dp, 0x80);
+#endif
+
+#ifdef CONFIG_VLAN
+ dp->misc_flag = USBGEM_VLAN;
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: urf_attach_chip: usb error detected", dp->name);
+ return (USB_FAILURE);
+}
+
+static int
+urfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int unit;
+ struct chip_info *p;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct urf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, __func__, unit, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * Check if the chip is supported.
+ */
+
+ /*
+ * Check the chip if it is really realtek rtl8150
+ */
+
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ sprintf(ugcp->usbgc_name,
+ "%s%d(ppa=%d)", drv_name, unit, urf_ppa);
+#ifdef USBGEM_CONFIG_GLDv3
+ ugcp->usbgc_ppa = urf_ppa;
+#else
+ ugcp->usbgc_ppa = unit;
+#endif
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 16;
+
+ /* the rx status partially replaces FCS */
+ ugcp->usbgc_rx_header_len = 0;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = ONESEC;
+
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = (25*ONESEC)/10;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &urf_attach_chip;
+ ugcp->usbgc_reset_chip = &urf_reset_chip;
+ ugcp->usbgc_init_chip = &urf_init_chip;
+ ugcp->usbgc_start_chip = &urf_start_chip;
+ ugcp->usbgc_stop_chip = &urf_stop_chip;
+ ugcp->usbgc_multicast_hash = &urf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &urf_set_rx_filter;
+ ugcp->usbgc_set_media = &urf_set_media;
+ ugcp->usbgc_get_stats = &urf_get_stats;
+#ifdef notdef
+ ugcp->usbgc_interrupt = &urf_interrupt;
+#else
+ ugcp->usbgc_interrupt = NULL;
+#endif
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &urf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &urf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &usbgem_mii_probe_default;
+ ugcp->usbgc_mii_init = &usbgem_mii_init_default;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &urf_mii_read;
+ ugcp->usbgc_mii_write = &urf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct urf_dev), KM_SLEEP);
+ lp->chip = p;
+
+ ddi_set_driver_private(dip, NULL);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct urf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ urf_ppa++;
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct urf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+static int
+urfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ urf_ppa--;
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(urf_ops, urfattach, urfdetach);
+#else
+static struct module_info urfminfo = {
+ 0, /* mi_idnum */
+ "urf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit urfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &urfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit urfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &urfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab urf_info = {
+ &urfrinit, /* st_rdinit */
+ &urfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_urf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &urf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops urf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ urfattach, /* devo_attach */
+ urfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_urf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &urf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!urf: _init: called"));
+
+ status = usbgem_mod_init(&urf_ops, "urf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&urf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!urf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&urf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/usb/usba/usba_ugen.c b/usr/src/uts/common/io/usb/usba/usba_ugen.c
index cb20c24270..5852e40799 100644
--- a/usr/src/uts/common/io/usb/usba/usba_ugen.c
+++ b/usr/src/uts/common/io/usb/usba/usba_ugen.c
@@ -23,6 +23,10 @@
*/
/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
* UGEN: USB Generic Driver support code
*
* This code provides entry points called by the ugen driver or other
@@ -1082,7 +1086,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events,
((epp->ep_state &
UGEN_EP_STATE_INTR_IN_POLLING_ON) == 0)) {
*reventsp |= POLLIN;
- } else if (!anyyet) {
+ }
+
+ if ((!*reventsp && !anyyet) ||
+ (events & POLLET)) {
*phpp = &epp->ep_pollhead;
epp->ep_state |=
UGEN_EP_STATE_INTR_IN_POLL_PENDING;
@@ -1101,7 +1108,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events,
((epp->ep_state &
UGEN_EP_STATE_ISOC_IN_POLLING_ON) == 0)) {
*reventsp |= POLLIN;
- } else if (!anyyet) {
+ }
+
+ if ((!*reventsp && !anyyet) ||
+ (events & POLLET)) {
*phpp = &epp->ep_pollhead;
epp->ep_state |=
UGEN_EP_STATE_ISOC_IN_POLL_PENDING;
@@ -1115,9 +1125,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events,
break;
case UGEN_MINOR_DEV_STAT_NODE:
- if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) {
+ if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED)
*reventsp |= POLLIN;
- } else if (!anyyet) {
+
+ if ((!*reventsp && !anyyet) || (events & POLLET)) {
*phpp = &ugenp->ug_ds.dev_pollhead;
ugenp->ug_ds.dev_stat |=
UGEN_DEV_STATUS_POLL_PENDING;
@@ -1131,9 +1142,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events,
break;
}
} else {
- if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) {
+ if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED)
*reventsp |= POLLHUP|POLLIN;
- } else if (!anyyet) {
+
+ if ((!*reventsp && !anyyet) || (events & POLLET)) {
*phpp = &ugenp->ug_ds.dev_pollhead;
ugenp->ug_ds.dev_stat |=
UGEN_DEV_STATUS_POLL_PENDING;
diff --git a/usr/src/uts/common/io/usbgem/usbgem.c b/usr/src/uts/common/io/usbgem/usbgem.c
new file mode 100644
index 0000000000..a42f7119ef
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem.c
@@ -0,0 +1,6389 @@
+/*
+ * usbgem.c: General USB to Fast Ethernet mac driver framework
+ *
+ * Copyright (c) 2002-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "@(#)usbgem.c 1.6 12/02/09"
+
+/*
+ * Change log
+ */
+
+/*
+ * TODO:
+ * implement DELAYED_START
+ */
+
+/*
+ * System Header files.
+ */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/vtrace.h>
+#include <sys/ethernet.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#ifndef USBGEM_CONFIG_GLDv3
+#include <sys/dlpi.h>
+#include <sys/strsubr.h>
+#endif
+#include <sys/stream.h> /* required for MBLK* */
+#include <sys/strsun.h> /* required for mionack() */
+#include <sys/byteorder.h>
+
+#include <sys/usb/usba.h>
+#ifdef USBGEM_CONFIG_GLDv3
+#include <inet/common.h>
+#include <inet/led.h>
+#include <inet/mi.h>
+#include <inet/nd.h>
+#endif
+
+/* supplement definitions */
+extern const char *usb_str_cr(usb_cr_t);
+
+#ifndef USBGEM_CONFIG_GLDv3
+#pragma weak gld_linkstate
+#endif
+#include <sys/note.h>
+
+#include "usbgem_mii.h"
+#include "usbgem.h"
+
+#ifdef MODULE
+char ident[] = "usb general ethernet mac driver v" VERSION;
+#else
+extern char ident[];
+#endif
+
+/* Debugging support */
+#ifdef USBGEM_DEBUG_LEVEL
+static int usbgem_debug = USBGEM_DEBUG_LEVEL;
+#define DPRINTF(n, args) if (usbgem_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Useful macros and typedefs
+ */
+#define ROUNDUP(x, a) (((x) + (a) - 1) & ~((a) - 1))
+#define DEFAULT_PIPE(dp) ((dp)->reg_data->dev_default_ph)
+#define VTAG_SIZE 4
+#define BOOLEAN(x) ((x) != 0)
+/*
+ * configuration parameters
+ */
+#define USBDRV_MAJOR_VER 2
+#define USBDRV_MINOR_VER 0
+
+#define ETHERHEADERL (sizeof (struct ether_header))
+#define MAXPKTLEN(dp) ((dp)->mtu + ETHERHEADERL)
+#define MAXPKTBUF(dp) ((dp)->mtu + ETHERHEADERL + ETHERFCSL)
+
+#define WATCH_INTERVAL_FAST drv_usectohz(100*1000)
+
+#define STOP_GRACEFUL B_TRUE
+
+/*
+ * Private functions
+ */
+static int usbgem_open_pipes(struct usbgem_dev *dp);
+static int usbgem_close_pipes(struct usbgem_dev *dp);
+static void usbgem_intr_cb(usb_pipe_handle_t, usb_intr_req_t *);
+static void usbgem_bulkin_cb(usb_pipe_handle_t, usb_bulk_req_t *);
+static void usbgem_bulkout_cb(usb_pipe_handle_t, usb_bulk_req_t *);
+
+static int usbgem_mii_start(struct usbgem_dev *);
+static void usbgem_mii_stop(struct usbgem_dev *);
+
+/* local buffer management */
+static int usbgem_init_rx_buf(struct usbgem_dev *);
+
+/* internal mac interfaces */
+static void usbgem_tx_timeout(struct usbgem_dev *);
+static void usbgem_mii_link_watcher(struct usbgem_dev *);
+static int usbgem_mac_init(struct usbgem_dev *);
+static int usbgem_mac_start(struct usbgem_dev *);
+static int usbgem_mac_stop(struct usbgem_dev *, int, boolean_t);
+static void usbgem_mac_ioctl(struct usbgem_dev *, queue_t *, mblk_t *);
+
+int usbgem_speed_value[] = {10, 100, 1000};
+
+static int usbgem_ctrl_retry = 5;
+
+/* usb event support */
+static int usbgem_disconnect_cb(dev_info_t *dip);
+static int usbgem_reconnect_cb(dev_info_t *dip);
+int usbgem_suspend(dev_info_t *dip);
+int usbgem_resume(dev_info_t *dip);
+
+static uint8_t usbgem_bcastaddr[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#ifdef MODULE
+extern struct mod_ops mod_miscops;
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "usbgem v" VERSION,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlmisc, NULL
+};
+
+/*
+ * _init : done
+ */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!usbgem: _init: called"));
+ status = mod_install(&modlinkage);
+
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!usbgem: _fini: called"));
+ status = mod_remove(&modlinkage);
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+#endif /* MODULE */
+
+/* ============================================================== */
+/*
+ * Ether CRC calculation utilities
+ */
+/* ============================================================== */
+/*
+ * Ether CRC calculation according to 21143 data sheet
+ */
+#define CRC32_POLY_LE 0xedb88320
+uint32_t
+usbgem_ether_crc_le(const uint8_t *addr)
+{
+ int idx;
+ int bit;
+ uint_t data;
+ uint32_t crc = 0xffffffff;
+
+ crc = 0xffffffff;
+ for (idx = 0; idx < ETHERADDRL; idx++) {
+ for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1) {
+ crc = (crc >> 1) ^
+ (((crc ^ data) & 1) ? CRC32_POLY_LE : 0);
+ }
+ }
+ return (crc);
+}
+
+#define CRC32_POLY_BE 0x04c11db7
+uint32_t
+usbgem_ether_crc_be(const uint8_t *addr)
+{
+ int idx;
+ int bit;
+ uint_t data;
+ uint32_t crc;
+
+ crc = 0xffffffff;
+ for (idx = 0; idx < ETHERADDRL; idx++) {
+ for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1) {
+ crc = (crc << 1) ^
+ ((((crc >> 31) ^ data) & 1) ? CRC32_POLY_BE : 0);
+ }
+ }
+ return (crc);
+}
+
+int
+usbgem_prop_get_int(struct usbgem_dev *dp, char *prop_template, int def_val)
+{
+ char propname[32];
+
+ (void) sprintf(propname, prop_template, dp->name);
+
+ return (ddi_prop_get_int(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, propname, def_val));
+}
+
+static int
+usbgem_population(uint32_t x)
+{
+ int i;
+ int cnt;
+
+ cnt = 0;
+ for (i = 0; i < 32; i++) {
+ if (x & (1 << i)) {
+ cnt++;
+ }
+ }
+ return (cnt);
+}
+
+static clock_t
+usbgem_timestamp_nz()
+{
+ clock_t now;
+ now = ddi_get_lbolt();
+ return (now ? now : (clock_t)1);
+}
+
+#ifdef USBGEM_DEBUG_LEVEL
+#ifdef USBGEM_DEBUG_VLAN
+#ifdef notdef
+#include <netinet/in.h>
+#endif
+static void
+usbgem_dump_packet(struct usbgem_dev *dp, char *title, mblk_t *mp,
+ boolean_t check_cksum)
+{
+ char msg[180];
+ uint8_t buf[18+20+20];
+ uint8_t *p;
+ size_t offset;
+ uint_t ethertype;
+ uint_t proto;
+ uint_t ipproto = 0;
+ uint_t iplen;
+ uint_t iphlen;
+ uint_t tcplen;
+ uint_t udplen;
+ uint_t cksum;
+ int rest;
+ int len;
+ char *bp;
+ mblk_t *tp;
+ extern uint_t ip_cksum(mblk_t *, int, uint32_t);
+
+ msg[0] = 0;
+ bp = msg;
+
+ rest = sizeof (buf);
+ offset = 0;
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ len = min(rest, len);
+ bcopy(tp->b_rptr, &buf[offset], len);
+ rest -= len;
+ offset += len;
+ if (rest == 0) {
+ break;
+ }
+ }
+
+ offset = 0;
+ p = &buf[offset];
+
+ /* ethernet address */
+ sprintf(bp,
+ "ether: %02x:%02x:%02x:%02x:%02x:%02x"
+ " -> %02x:%02x:%02x:%02x:%02x:%02x",
+ p[6], p[7], p[8], p[9], p[10], p[11],
+ p[0], p[1], p[2], p[3], p[4], p[5]);
+ bp = &msg[strlen(msg)];
+
+ /* vlag tag and etherrtype */
+ ethertype = GET_ETHERTYPE(p);
+ if (ethertype == VTAG_TPID) {
+ sprintf(bp, " vtag:0x%04x", GET_NET16(&p[14]));
+ bp = &msg[strlen(msg)];
+
+ offset += VTAG_SIZE;
+ p = &buf[offset];
+ ethertype = GET_ETHERTYPE(p);
+ }
+ sprintf(bp, " type:%04x", ethertype);
+ bp = &msg[strlen(msg)];
+
+ /* ethernet packet length */
+ sprintf(bp, " mblklen:%d", msgdsize(mp));
+ bp = &msg[strlen(msg)];
+ if (mp->b_cont) {
+ sprintf(bp, "(");
+ bp = &msg[strlen(msg)];
+ for (tp = mp; tp; tp = tp->b_cont) {
+ if (tp == mp) {
+ sprintf(bp, "%d", tp->b_wptr - tp->b_rptr);
+ } else {
+ sprintf(bp, "+%d", tp->b_wptr - tp->b_rptr);
+ }
+ bp = &msg[strlen(msg)];
+ }
+ sprintf(bp, ")");
+ bp = &msg[strlen(msg)];
+ }
+
+ if (ethertype != ETHERTYPE_IP) {
+ goto x;
+ }
+
+ /* ip address */
+ offset += sizeof (struct ether_header);
+ p = &buf[offset];
+ ipproto = p[9];
+ iplen = GET_NET16(&p[2]);
+ sprintf(bp, ", ip: %d.%d.%d.%d -> %d.%d.%d.%d proto:%d iplen:%d",
+ p[12], p[13], p[14], p[15],
+ p[16], p[17], p[18], p[19],
+ ipproto, iplen);
+ bp = (void *)&msg[strlen(msg)];
+
+ iphlen = (p[0] & 0xf) * 4;
+
+ /* cksum for psuedo header */
+ cksum = *(uint16_t *)&p[12];
+ cksum += *(uint16_t *)&p[14];
+ cksum += *(uint16_t *)&p[16];
+ cksum += *(uint16_t *)&p[18];
+ cksum += BE_16(ipproto);
+
+ /* tcp or udp protocol header */
+ offset += iphlen;
+ p = &buf[offset];
+ if (ipproto == IPPROTO_TCP) {
+ tcplen = iplen - iphlen;
+ sprintf(bp, ", tcp: len:%d cksum:%x",
+ tcplen, GET_NET16(&p[16]));
+ bp = (void *)&msg[strlen(msg)];
+
+ if (check_cksum) {
+ cksum += BE_16(tcplen);
+ cksum = (uint16_t)ip_cksum(mp, offset, cksum);
+ sprintf(bp, " (%s)",
+ (cksum == 0 || cksum == 0xffff) ? "ok" : "ng");
+ bp = (void *)&msg[strlen(msg)];
+ }
+ } else if (ipproto == IPPROTO_UDP) {
+ udplen = GET_NET16(&p[4]);
+ sprintf(bp, ", udp: len:%d cksum:%x",
+ udplen, GET_NET16(&p[6]));
+ bp = (void *)&msg[strlen(msg)];
+
+ if (GET_NET16(&p[6]) && check_cksum) {
+ cksum += *(uint16_t *)&p[4];
+ cksum = (uint16_t)ip_cksum(mp, offset, cksum);
+ sprintf(bp, " (%s)",
+ (cksum == 0 || cksum == 0xffff) ? "ok" : "ng");
+ bp = (void *)&msg[strlen(msg)];
+ }
+ }
+x:
+ cmn_err(CE_CONT, "!%s: %s: %s", dp->name, title, msg);
+}
+#endif /* USBGEM_DEBUG_VLAN */
+#endif /* USBGEM_DEBUG_LEVEL */
+
+#ifdef GEM_GCC_RUNTIME
+/*
+ * gcc3 runtime routines
+ */
+#pragma weak memcmp
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ int i;
+ int ret;
+
+ ret = 0;
+ for (i = 0; i < n; i++) {
+ ret = (int)((uint8_t *)s1)[i] - (int)((uint8_t *)s2)[i];
+ if (ret) {
+ return (ret);
+ }
+ }
+ return (0);
+}
+
+#pragma weak memset
+void *
+memset(void *s, int c, size_t n)
+{
+ if ((c & 0xff) == 0) {
+ bzero(s, n);
+ } else {
+ while (n--) {
+ ((uint8_t *)s)[n] = c;
+ }
+ }
+ return (s);
+}
+
+#pragma weak _memcpy = memcpy
+#pragma weak memcpy
+void *
+memcpy(void *s1, const void *s2, size_t n)
+{
+ bcopy(s2, s1, n);
+ return (s1);
+}
+#endif /* GEM_GCC_RUNTIME */
+/* ============================================================== */
+/*
+ * hardware operations
+ */
+/* ============================================================== */
+static int
+usbgem_hal_reset_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_reset_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_init_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_init_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_attach_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_attach_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_set_rx_filter(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_set_rx_filter)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_set_media(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_set_media)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_start_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_start_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_stop_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_stop_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_get_stats(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_get_stats)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+
+/* ============================================================== */
+/*
+ * USB pipe management
+ */
+/* ============================================================== */
+static boolean_t
+usbgem_rx_start_unit(struct usbgem_dev *dp, usb_bulk_req_t *req)
+{
+ mblk_t *mp;
+ int err;
+ usb_flags_t flags;
+
+ ASSERT(req);
+
+ mp = allocb(dp->rx_buf_len, BPRI_MED);
+ if (mp == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: failed to allocate mblk",
+ dp->name, __func__);
+ goto err;
+ }
+
+ req->bulk_len = dp->rx_buf_len;
+ req->bulk_data = mp;
+ req->bulk_client_private = (usb_opaque_t)dp;
+ req->bulk_timeout = 0;
+ req->bulk_attributes = USB_ATTRS_SHORT_XFER_OK;
+ req->bulk_cb = usbgem_bulkin_cb;
+ req->bulk_exc_cb = usbgem_bulkin_cb;
+ req->bulk_completion_reason = 0;
+ req->bulk_cb_flags = 0;
+
+ flags = 0;
+ err = usb_pipe_bulk_xfer(dp->bulkin_pipe, req, flags);
+
+ if (err != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: failed to bulk_xfer for rx, err:%d",
+ dp->name, err);
+
+ /* free req and mp */
+ usb_free_bulk_req(req);
+ goto err;
+ }
+ return (B_TRUE);
+err:
+ return (B_FALSE);
+}
+
+/* ============================================================== */
+/*
+ * Rx/Tx buffer management
+ */
+/* ============================================================== */
+static int
+usbgem_init_rx_buf(struct usbgem_dev *dp)
+{
+ int i;
+ usb_bulk_req_t *req;
+
+ ASSERT(dp->mac_state == MAC_STATE_ONLINE);
+
+ for (i = 0; i < dp->ugc.usbgc_rx_list_max; i++) {
+ req = usb_alloc_bulk_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to allocate bulkreq for rx",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ if (!usbgem_rx_start_unit(dp, req)) {
+ return (USB_FAILURE);
+ }
+ mutex_enter(&dp->rxlock);
+ dp->rx_busy_cnt++;
+ mutex_exit(&dp->rxlock);
+ }
+ return (USB_SUCCESS);
+}
+
+/* ============================================================== */
+/*
+ * memory resource management
+ */
+/* ============================================================== */
+static int
+usbgem_free_memory(struct usbgem_dev *dp)
+{
+ usb_bulk_req_t *req;
+
+ /* free all tx requst structure */
+ while ((req = dp->tx_free_list) != NULL) {
+ dp->tx_free_list =
+ (usb_bulk_req_t *)req->bulk_client_private;
+ req->bulk_data = NULL;
+ usb_free_bulk_req(req);
+ }
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_alloc_memory(struct usbgem_dev *dp)
+{
+ int i;
+ usb_bulk_req_t *req;
+
+ /* allocate tx requests */
+ dp->tx_free_list = NULL;
+ for (i = 0; i < dp->ugc.usbgc_tx_list_max; i++) {
+ req = usb_alloc_bulk_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN,
+ "%s:%s failed to allocate tx requests",
+ dp->name, __func__);
+
+ /* free partially allocated tx requests */
+ (void) usbgem_free_memory(dp);
+ return (USB_FAILURE);
+ }
+
+ /* add the new one allocated into tx free list */
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ }
+
+ return (USB_SUCCESS);
+}
+
+/* ========================================================== */
+/*
+ * Start transmission.
+ * Return zero on success,
+ */
+/* ========================================================== */
+
+#ifdef TXTIMEOUT_TEST
+static int usbgem_send_cnt = 0;
+#endif
+
+/*
+ * usbgem_send is used only to send data packet into ethernet line.
+ */
+static mblk_t *
+usbgem_send_common(struct usbgem_dev *dp, mblk_t *mp, uint32_t flags)
+{
+ int err;
+ mblk_t *new;
+ usb_bulk_req_t *req;
+ int mcast;
+ int bcast;
+ int len;
+ boolean_t intr;
+ usb_flags_t usb_flags = 0;
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_state_t p_state;
+#endif
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ intr = (flags & 1) != 0;
+ len = msgdsize(mp);
+ bcast = 0;
+ mcast = 0;
+ if (mp->b_rptr[0] & 1) {
+ if (bcmp(mp->b_rptr, &usbgem_bcastaddr, ETHERADDRL) == 0) {
+ bcast = 1;
+ } else {
+ mcast = 1;
+ }
+ }
+ new = (*dp->ugc.usbgc_tx_make_packet)(dp, mp);
+ if (new == NULL) {
+ /*
+ * no memory resource. we don't stop downstream,
+ * we just discard the packet.
+ */
+ DPRINTF(0, (CE_CONT, "!%s: %s: no memory",
+ dp->name, __func__));
+ freemsg(mp);
+
+ mutex_enter(&dp->txlock);
+ dp->stats.noxmtbuf++;
+ dp->stats.errxmt++;
+ mutex_exit(&dp->txlock);
+
+ return (NULL);
+ }
+
+ ASSERT(new->b_cont == NULL);
+
+ mutex_enter(&dp->txlock);
+ if (dp->tx_free_list == NULL) {
+ /*
+ * no tx free slot
+ */
+ ASSERT(dp->tx_busy_cnt == dp->ugc.usbgc_tx_list_max);
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: no free slot",
+ dp->name, __func__));
+ if (new && new != mp) {
+ /* free reallocated message */
+ freemsg(new);
+ }
+ return (mp);
+ }
+ req = dp->tx_free_list;
+ dp->tx_free_list = (usb_bulk_req_t *)req->bulk_client_private;
+ dp->tx_busy_cnt++;
+
+ if (dp->tx_free_list == NULL) {
+ intr = B_TRUE;
+ }
+ if (intr) {
+ dp->tx_intr_pended++;
+ }
+ DB_TCI(new) = intr;
+#ifdef USBGEM_DEBUG_LEVEL
+ new->b_datap->db_cksum32 = dp->tx_seq_num;
+ dp->tx_seq_num++;
+#endif
+ dp->stats.obytes += len;
+ dp->stats.opackets++;
+ if (bcast | mcast) {
+ dp->stats.obcast += bcast;
+ dp->stats.omcast += mcast;
+ }
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: sending", dp->name, __func__));
+
+ req->bulk_len = (long)new->b_wptr - (long)new->b_rptr;
+ req->bulk_data = new;
+ req->bulk_client_private = (usb_opaque_t)dp;
+ req->bulk_timeout = dp->bulkout_timeout; /* in second */
+ req->bulk_attributes = 0;
+ req->bulk_cb = usbgem_bulkout_cb;
+ req->bulk_exc_cb = usbgem_bulkout_cb;
+ req->bulk_completion_reason = 0;
+ req->bulk_cb_flags = 0;
+
+ if (intr) {
+ usb_flags = USB_FLAGS_SLEEP;
+ }
+ if ((err = usb_pipe_bulk_xfer(dp->bulkout_pipe, req, usb_flags))
+ != USB_SUCCESS) {
+
+ /* failed to transfer the packet, discard it. */
+ freemsg(new);
+ req->bulk_data = NULL;
+
+ /* recycle the request block */
+ mutex_enter(&dp->txlock);
+ dp->tx_busy_cnt--;
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ mutex_exit(&dp->txlock);
+
+ cmn_err(CE_NOTE,
+ "%s: %s: usb_pipe_bulk_xfer: failed: err:%d",
+ dp->name, __func__, err);
+
+ /* we use another flag to indicate error state. */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ } else {
+ /* record the start time */
+ dp->tx_start_time = ddi_get_lbolt();
+ }
+
+ if (err == USB_SUCCESS && (usb_flags & USB_FLAGS_SLEEP)) {
+ usbgem_bulkout_cb(dp->bulkout_pipe, req);
+ }
+
+ if (new != mp) {
+ freemsg(mp);
+ }
+ return (NULL);
+}
+
+int
+usbgem_restart_nic(struct usbgem_dev *dp)
+{
+ int ret;
+ int flags = 0;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ ASSERT(dp->mac_state != MAC_STATE_DISCONNECTED);
+
+ /*
+ * ensure to stop the nic
+ */
+ if (dp->mac_state == MAC_STATE_ONLINE) {
+ (void) usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL);
+ }
+
+ /* now the nic become quiescent, reset the chip */
+ if (usbgem_hal_reset_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: %s: failed to reset chip",
+ dp->name, __func__);
+ goto err;
+ }
+
+ /*
+ * restore the nic state step by step
+ */
+ if (dp->nic_state < NIC_STATE_INITIALIZED) {
+ goto done;
+ }
+
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: %s: failed to initialize chip",
+ dp->name, __func__);
+ goto err;
+ }
+
+ /* setup mac address and enable rx filter */
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode |= RXMODE_ENABLE;
+ ret = usbgem_hal_set_rx_filter(dp);
+ sema_v(&dp->rxfilter_lock);
+ if (ret != USB_SUCCESS) {
+ goto err;
+ }
+
+ /*
+ * update the link state asynchronously
+ */
+ cv_signal(&dp->link_watcher_wait_cv);
+
+ /*
+ * XXX - a panic happened because of linkdown.
+ * We must check mii_state here, because the link can be down just
+ * before the restart event happen. If the link is down now,
+ * gem_mac_start() will be called from gem_mii_link_check() when
+ * the link become up later.
+ */
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto err;
+ }
+ if (dp->nic_state < NIC_STATE_ONLINE) {
+ goto done;
+ }
+
+ (void) usbgem_mac_start(dp);
+
+ }
+done:
+ return (USB_SUCCESS);
+err:
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (USB_FAILURE);
+}
+
+static void
+usbgem_tx_timeout(struct usbgem_dev *dp)
+{
+ int ret;
+ uint_t rwlock;
+ clock_t now;
+
+ for (; ; ) {
+ mutex_enter(&dp->tx_watcher_lock);
+ ret = cv_timedwait(&dp->tx_watcher_cv, &dp->tx_watcher_lock,
+ dp->tx_watcher_interval + ddi_get_lbolt());
+ mutex_exit(&dp->tx_watcher_lock);
+
+ if (dp->tx_watcher_stop) {
+ break;
+ }
+
+ now = ddi_get_lbolt();
+
+ rwlock = RW_READER;
+again:
+ rw_enter(&dp->dev_state_lock, rwlock);
+
+ if ((dp->mac_state != MAC_STATE_DISCONNECTED &&
+ dp->fatal_error &&
+ now - dp->fatal_error >= dp->ugc.usbgc_tx_timeout) ||
+ (dp->mac_state == MAC_STATE_ONLINE &&
+ dp->mii_state == MII_STATE_LINKUP &&
+ dp->tx_busy_cnt != 0 &&
+ now - dp->tx_start_time >= dp->ugc.usbgc_tx_timeout)) {
+ if (rwlock == RW_READER) {
+ /*
+ * Upgrade dev_state_lock from shared mode
+ * to exclusive mode to restart nic
+ */
+ rwlock = RW_WRITER;
+ rw_exit(&dp->dev_state_lock);
+ goto again;
+ }
+ cmn_err(CE_WARN, "%s: %s: restarting the nic:"
+ " fatal_error:%ld nic_state:%d"
+ " mac_state:%d starttime:%ld",
+ dp->name, __func__,
+ dp->fatal_error ? now - dp->fatal_error: 0,
+ dp->nic_state, dp->mac_state,
+ dp->tx_busy_cnt ? now - dp->tx_start_time : 0);
+
+ (void) usbgem_restart_nic(dp);
+ }
+
+ rw_exit(&dp->dev_state_lock);
+ }
+}
+
+static int
+usbgem_tx_watcher_start(struct usbgem_dev *dp)
+{
+ int err;
+ kthread_t *wdth;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* make a first call of uwgem_lw_link_check() */
+ dp->tx_watcher_stop = 0;
+ dp->tx_watcher_interval = drv_usectohz(1000*1000);
+
+ wdth = thread_create(NULL, 0, usbgem_tx_timeout, dp, 0, &p0,
+ TS_RUN, minclsyspri);
+ if (wdth == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to create a tx_watcher thread",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ dp->tx_watcher_did = wdth->t_did;
+
+ return (USB_SUCCESS);
+}
+
+static void
+usbgem_tx_watcher_stop(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ if (dp->tx_watcher_did) {
+ /* Ensure timer routine stopped */
+ dp->tx_watcher_stop = 1;
+ cv_signal(&dp->tx_watcher_cv);
+ thread_join(dp->tx_watcher_did);
+ dp->tx_watcher_did = NULL;
+ }
+}
+
+/* ================================================================== */
+/*
+ * Callback handlers
+ */
+/* ================================================================== */
+static void
+usbgem_bulkin_cb(usb_pipe_handle_t pipe, usb_bulk_req_t *req)
+{
+ mblk_t *newmp;
+ mblk_t *mp;
+ mblk_t *tp;
+ int len = 0;
+ int pkts = 0;
+ int bcast = 0;
+ int mcast = 0;
+ boolean_t busy;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->bulk_client_private;
+ mp = req->bulk_data;
+ req->bulk_data = NULL;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: mp:%p, cr:%s(%d)",
+ dp->name, __func__, mp,
+ usb_str_cr(req->bulk_completion_reason),
+ req->bulk_completion_reason));
+
+ /*
+ * we cannot acquire dev_state_lock because the routine
+ * must be executed during usbgem_mac_stop() to avoid
+ * dead lock.
+ * we use a simle membar operation to get the state correctly.
+ */
+ membar_consumer();
+
+ if (req->bulk_completion_reason == USB_CR_OK &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ newmp = (*dp->ugc.usbgc_rx_make_packet)(dp, mp);
+
+ if (newmp != mp) {
+ /* the message has been reallocated, free old one */
+ freemsg(mp);
+ }
+
+ /* the message may includes one or more ethernet packets */
+ for (tp = newmp; tp; tp = tp->b_next) {
+ len += tp->b_wptr - tp->b_rptr;
+ pkts++;
+ if (tp->b_rptr[0] & 1) {
+ if (bcmp(tp->b_rptr, &usbgem_bcastaddr,
+ ETHERADDRL) == 0) {
+ bcast++;
+ } else {
+ mcast++;
+ }
+ }
+ }
+
+ /* send up if it is a valid packet */
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_rx(dp->mh, NULL, newmp);
+#else
+ while (newmp) {
+ tp = newmp;
+ newmp = newmp->b_next;
+ tp->b_next = NULL;
+ gld_recv(dp->macinfo, tp);
+ }
+#endif
+ } else {
+ freemsg(mp);
+ len = 0;
+ }
+
+ mutex_enter(&dp->rxlock);
+ /* update rx_active */
+ if (dp->rx_active) {
+ dp->rx_active = dp->mac_state == MAC_STATE_ONLINE;
+ }
+
+ dp->stats.rbytes += len;
+ dp->stats.rpackets += pkts;
+ if (bcast | mcast) {
+ dp->stats.rbcast += bcast;
+ dp->stats.rmcast += mcast;
+ }
+ mutex_exit(&dp->rxlock);
+
+ if (dp->rx_active) {
+ /* prepare to receive the next packets */
+ if (usbgem_rx_start_unit(dp, req)) {
+ /* we successed */
+ goto done;
+ }
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to fill next rx packet",
+ dp->name, __func__);
+ /*
+ * we use another flag to indicate error state.
+ * if we acquire dev_state_lock for RW_WRITER here,
+ * usbgem_mac_stop() may hang.
+ */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ } else {
+ /* no need to prepare the next packets */
+ usb_free_bulk_req(req);
+ }
+
+ mutex_enter(&dp->rxlock);
+ dp->rx_active = B_FALSE;
+ dp->rx_busy_cnt--;
+ if (dp->rx_busy_cnt == 0) {
+ /* wake up someone waits for me */
+ cv_broadcast(&dp->rx_drain_cv);
+ }
+ mutex_exit(&dp->rxlock);
+done:
+ ;
+}
+
+static void
+usbgem_bulkout_cb(usb_pipe_handle_t pipe, usb_bulk_req_t *req)
+{
+ boolean_t intr;
+ boolean_t tx_sched;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->bulk_client_private;
+ tx_sched = B_FALSE;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: cr:%s(%d) cb_flags:0x%x head:%d tail:%d",
+ dp->name, __func__,
+ usb_str_cr(req->bulk_completion_reason),
+ req->bulk_completion_reason,
+ req->bulk_cb_flags,
+ dp->tx_busy_cnt));
+
+ /* we have finished to transfer the packet into tx fifo */
+ intr = DB_TCI(req->bulk_data);
+ freemsg(req->bulk_data);
+
+ if (req->bulk_completion_reason != USB_CR_OK &&
+ dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+
+ mutex_enter(&dp->txlock);
+
+ if (intr) {
+ ASSERT(dp->tx_intr_pended > 0);
+ /* find the last interrupt we have scheduled */
+ if (--(dp->tx_intr_pended) == 0) {
+ tx_sched = B_TRUE;
+ }
+ }
+
+ ASSERT(dp->tx_busy_cnt > 0);
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ dp->tx_busy_cnt--;
+
+#ifdef CONFIG_TX_LIMITER
+ if (tx_sched) {
+ dp->tx_max_packets =
+ min(dp->tx_max_packets + 1, dp->ugc.usbgc_tx_list_max);
+ }
+#endif
+ if (dp->mac_state != MAC_STATE_ONLINE && dp->tx_busy_cnt == 0) {
+ cv_broadcast(&dp->tx_drain_cv);
+ }
+
+ mutex_exit(&dp->txlock);
+
+ if (tx_sched) {
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_tx_update(dp->mh);
+#else
+ gld_sched(dp->macinfo);
+#endif
+ }
+}
+
+static void
+usbgem_intr_cb(usb_pipe_handle_t ph, usb_intr_req_t *req)
+{
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->intr_client_private;
+ dp->stats.intr++;
+
+ if (req->intr_completion_reason == USB_CR_OK) {
+ (*dp->ugc.usbgc_interrupt)(dp, req->intr_data);
+ }
+
+ /* free the request and data */
+ usb_free_intr_req(req);
+}
+
+/* ======================================================================== */
+/*
+ * MII support routines
+ */
+/* ======================================================================== */
+static void
+usbgem_choose_forcedmode(struct usbgem_dev *dp)
+{
+ /* choose media mode */
+ if (dp->anadv_1000fdx || dp->anadv_1000hdx) {
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = dp->anadv_1000fdx;
+ } else if (dp->anadv_100fdx || dp->anadv_100t4) {
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if (dp->anadv_100hdx) {
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_FALSE;
+ } else {
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = dp->anadv_10fdx;
+ }
+}
+
+static uint16_t
+usbgem_mii_read(struct usbgem_dev *dp, uint_t reg, int *errp)
+{
+ uint16_t val;
+
+ sema_p(&dp->hal_op_lock);
+ val = (*dp->ugc.usbgc_mii_read)(dp, reg, errp);
+ sema_v(&dp->hal_op_lock);
+
+ return (val);
+}
+
+static void
+usbgem_mii_write(struct usbgem_dev *dp, uint_t reg, uint16_t val, int *errp)
+{
+ sema_p(&dp->hal_op_lock);
+ (*dp->ugc.usbgc_mii_write)(dp, reg, val, errp);
+ sema_v(&dp->hal_op_lock);
+}
+
+static int
+usbgem_mii_probe(struct usbgem_dev *dp)
+{
+ int err;
+
+ err = (*dp->ugc.usbgc_mii_probe)(dp);
+ return (err);
+}
+
+static int
+usbgem_mii_init(struct usbgem_dev *dp)
+{
+ int err;
+
+ err = (*dp->ugc.usbgc_mii_init)(dp);
+ return (err);
+}
+
+#define fc_cap_decode(x) \
+ ((((x) & MII_ABILITY_PAUSE) != 0 ? 1 : 0) | \
+ (((x) & MII_ABILITY_ASM_DIR) != 0 ? 2 : 0))
+
+int
+usbgem_mii_config_default(struct usbgem_dev *dp, int *errp)
+{
+ uint16_t mii_stat;
+ uint16_t val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Configure bits in advertisement register
+ */
+ mii_stat = dp->mii_status;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: MII_STATUS reg:%b",
+ dp->name, __func__, mii_stat, MII_STATUS_BITS));
+
+ if ((mii_stat & MII_STATUS_ABILITY_TECH) == 0) {
+ /* it's funny */
+ cmn_err(CE_WARN, "!%s: wrong ability bits: mii_status:%b",
+ dp->name, mii_stat, MII_STATUS_BITS);
+ return (USB_FAILURE);
+ }
+
+ /* Do not change the rest of ability bits in advert reg */
+ val = usbgem_mii_read(dp, MII_AN_ADVERT, errp) & ~MII_ABILITY_ALL;
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: 100T4:%d 100F:%d 100H:%d 10F:%d 10H:%d",
+ dp->name, __func__,
+ dp->anadv_100t4, dp->anadv_100fdx, dp->anadv_100hdx,
+ dp->anadv_10fdx, dp->anadv_10hdx));
+
+ /* set technology bits */
+ if (dp->anadv_100t4) {
+ val |= MII_ABILITY_100BASE_T4;
+ }
+ if (dp->anadv_100fdx) {
+ val |= MII_ABILITY_100BASE_TX_FD;
+ }
+ if (dp->anadv_100hdx) {
+ val |= MII_ABILITY_100BASE_TX;
+ }
+ if (dp->anadv_10fdx) {
+ val |= MII_ABILITY_10BASE_T_FD;
+ }
+ if (dp->anadv_10hdx) {
+ val |= MII_ABILITY_10BASE_T;
+ }
+
+ /* set flow control capabilities */
+ if (dp->anadv_pause) {
+ val |= MII_ABILITY_PAUSE;
+ }
+ if (dp->anadv_asmpause) {
+ val |= MII_ABILITY_ASM_DIR;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: setting MII_AN_ADVERT reg:%b, pause:%d, asmpause:%d",
+ dp->name, __func__, val, MII_ABILITY_BITS,
+ dp->anadv_pause, dp->anadv_asmpause));
+
+ usbgem_mii_write(dp, MII_AN_ADVERT, val, errp);
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ /*
+ * 1000Base-T GMII support
+ */
+ if (!dp->anadv_autoneg) {
+ /* enable manual configuration */
+ val = MII_1000TC_CFG_EN;
+ if (dp->anadv_1000t_ms == 2) {
+ val |= MII_1000TC_CFG_VAL;
+ }
+ } else {
+ val = 0;
+ if (dp->anadv_1000fdx) {
+ val |= MII_1000TC_ADV_FULL;
+ }
+ if (dp->anadv_1000hdx) {
+ val |= MII_1000TC_ADV_HALF;
+ }
+ switch (dp->anadv_1000t_ms) {
+ case 1:
+ /* slave */
+ val |= MII_1000TC_CFG_EN;
+ break;
+
+ case 2:
+ /* master */
+ val |= MII_1000TC_CFG_EN | MII_1000TC_CFG_VAL;
+ break;
+
+ default:
+ /* auto: do nothing */
+ break;
+ }
+ }
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: setting MII_1000TC reg:%b",
+ dp->name, __func__, val, MII_1000TC_BITS));
+
+ usbgem_mii_write(dp, MII_1000TC, val, errp);
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ return (USB_SUCCESS);
+
+usberr:
+ return (*errp);
+}
+
+static char *usbgem_fc_type[] = {
+ "without",
+ "with symmetric",
+ "with tx",
+ "with rx",
+};
+
+#ifdef USBGEM_CONFIG_GLDv3
+#define USBGEM_LINKUP(dp) mac_link_update((dp)->mh, LINK_STATE_UP)
+#define USBGEM_LINKDOWN(dp) mac_link_update((dp)->mh, LINK_STATE_DOWN)
+#else
+#define USBGEM_LINKUP(dp) \
+ if (gld_linkstate) { \
+ gld_linkstate((dp)->macinfo, GLD_LINKSTATE_UP); \
+ }
+#define USBGEM_LINKDOWN(dp) \
+ if (gld_linkstate) { \
+ gld_linkstate((dp)->macinfo, GLD_LINKSTATE_DOWN); \
+ }
+#endif
+
+static uint8_t usbgem_fc_result[4 /* my cap */][4 /* lp cap */] = {
+/* none symm tx rx/symm */
+/* none */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE},
+/* sym */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC},
+/* tx */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_TX_PAUSE},
+/* rx/symm */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC,
+ FLOW_CONTROL_RX_PAUSE,
+ FLOW_CONTROL_SYMMETRIC},
+};
+
+static boolean_t
+usbgem_mii_link_check(struct usbgem_dev *dp, int *oldstatep, int *newstatep)
+{
+ boolean_t tx_sched = B_FALSE;
+ uint16_t status;
+ uint16_t advert;
+ uint16_t lpable;
+ uint16_t exp;
+ uint16_t ctl1000;
+ uint16_t stat1000;
+ uint16_t val;
+ clock_t now;
+ clock_t diff;
+ int linkdown_action;
+ boolean_t fix_phy = B_FALSE;
+ int err;
+ uint_t rwlock;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: time:%d state:%d",
+ dp->name, __func__, ddi_get_lbolt(), dp->mii_state));
+
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ rwlock = RW_WRITER;
+ } else {
+ rwlock = RW_READER;
+ }
+again:
+ rw_enter(&dp->dev_state_lock, rwlock);
+
+ /* save old mii state */
+ *oldstatep = dp->mii_state;
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* stop periodic execution of the link watcher */
+ dp->mii_interval = 0;
+ tx_sched = B_FALSE;
+ goto next;
+ }
+
+ now = ddi_get_lbolt();
+ diff = now - dp->mii_last_check;
+ dp->mii_last_check = now;
+
+ /*
+ * For NWAM, don't show linkdown state right
+ * when the device is attached.
+ */
+ if (dp->linkup_delay > 0) {
+ if (dp->linkup_delay > diff) {
+ dp->linkup_delay -= diff;
+ } else {
+ /* link up timeout */
+ dp->linkup_delay = -1;
+ }
+ }
+
+next_nowait:
+ switch (dp->mii_state) {
+ case MII_STATE_UNKNOWN:
+ goto reset_phy;
+
+ case MII_STATE_RESETTING:
+ dp->mii_timer -= diff;
+ if (dp->mii_timer > 0) {
+ /* don't read phy registers in resetting */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (val & MII_CONTROL_RESET) {
+ cmn_err(CE_NOTE,
+ "!%s: time:%ld resetting phy not complete."
+ " mii_control:0x%b",
+ dp->name, ddi_get_lbolt(),
+ val, MII_CONTROL_BITS);
+ }
+
+ /* ensure neither isolated nor pwrdown nor auto-nego mode */
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+#if USBGEM_DEBUG_LEVEL > 10
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ cmn_err(CE_CONT, "!%s: readback control %b",
+ dp->name, val, MII_CONTROL_BITS);
+#endif
+ /* As resetting PHY has completed, configure PHY registers */
+ if ((*dp->ugc.usbgc_mii_config)(dp, &err) != USB_SUCCESS) {
+ /* we failed to configure PHY */
+ goto usberr;
+ }
+
+ /* prepare for forced mode */
+ usbgem_choose_forcedmode(dp);
+
+ dp->mii_lpable = 0;
+ dp->mii_advert = 0;
+ dp->mii_exp = 0;
+ dp->mii_ctl1000 = 0;
+ dp->mii_stat1000 = 0;
+
+ dp->flow_control = FLOW_CONTROL_NONE;
+
+ if (!dp->anadv_autoneg) {
+ /* skip auto-negotiation phase */
+ dp->mii_state = MII_STATE_MEDIA_SETUP;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+ goto next_nowait;
+ }
+
+ /* issue an auto-negotiation command */
+ goto autonego;
+
+ case MII_STATE_AUTONEGOTIATING:
+ /*
+ * Autonegotiation in progress
+ */
+ dp->mii_timer -= diff;
+ if (dp->mii_timer -
+ (dp->ugc.usbgc_mii_an_timeout - dp->ugc.usbgc_mii_an_wait)
+ > 0) {
+ /* wait for minimum time (2.3 - 2.5 sec) */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ /* read PHY status */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s: called: mii_state:%d MII_STATUS reg:%b",
+ dp->name, __func__, dp->mii_state,
+ status, MII_STATUS_BITS));
+
+ if (status & MII_STATUS_REMFAULT) {
+ /*
+ * The link parnert told me something wrong happend.
+ * What do we do ?
+ */
+ cmn_err(CE_CONT,
+ "!%s: auto-negotiation failed: remote fault",
+ dp->name);
+ goto autonego;
+ }
+
+ if ((status & MII_STATUS_ANDONE) == 0) {
+ if (dp->mii_timer <= 0) {
+ /*
+ * Auto-negotiation has been timed out,
+ * Reset PHY and try again.
+ */
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_WARN,
+ "!%s: auto-negotiation failed:"
+ " timeout",
+ dp->name);
+ dp->mii_supress_msg = B_TRUE;
+ }
+ goto autonego;
+ }
+ /*
+ * Auto-negotiation is in progress. Wait for a while.
+ */
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+ }
+
+ /*
+ * Auto-negotiation has been completed. Let's go to AN_DONE.
+ */
+ dp->mii_state = MII_STATE_AN_DONE;
+ dp->mii_supress_msg = B_FALSE;
+ DPRINTF(0, (CE_CONT,
+ "!%s: auto-negotiation completed, MII_STATUS:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ if (dp->ugc.usbgc_mii_an_delay > 0) {
+ dp->mii_timer = dp->ugc.usbgc_mii_an_delay;
+ dp->mii_interval = drv_usectohz(20*1000);
+ goto next;
+ }
+
+ dp->mii_timer = 0;
+ diff = 0;
+ goto next_nowait;
+
+ case MII_STATE_AN_DONE:
+ /*
+ * Auto-negotiation has done. Now we can set up media.
+ */
+ dp->mii_timer -= diff;
+ if (dp->mii_timer > 0) {
+ /* wait for a while */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ /*
+ * Setup speed and duplex mode according with
+ * the result of auto negotiation.
+ */
+
+ /*
+ * Read registers required to determin current
+ * duplex mode and media speed.
+ */
+ if (dp->ugc.usbgc_mii_an_delay > 0) {
+ /* the 'status' variable is not initialized yet */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ advert = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ lpable = usbgem_mii_read(dp, MII_AN_LPABLE, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ exp = usbgem_mii_read(dp, MII_AN_EXPANSION, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (exp == 0xffff) {
+ /* some phys don't have exp register */
+ exp = 0;
+ }
+
+ ctl1000 = 0;
+ stat1000 = 0;
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ ctl1000 = usbgem_mii_read(dp, MII_1000TC, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ stat1000 = usbgem_mii_read(dp, MII_1000TS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ dp->mii_lpable = lpable;
+ dp->mii_advert = advert;
+ dp->mii_exp = exp;
+ dp->mii_ctl1000 = ctl1000;
+ dp->mii_stat1000 = stat1000;
+
+ cmn_err(CE_CONT,
+ "!%s: auto-negotiation done: "
+ "status:%b, advert:%b, lpable:%b, exp:%b",
+ dp->name,
+ status, MII_STATUS_BITS,
+ advert, MII_ABILITY_BITS,
+ lpable, MII_ABILITY_BITS,
+ exp, MII_AN_EXP_BITS);
+
+ DPRINTF(0, (CE_CONT, "!%s: MII_STATUS:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ cmn_err(CE_CONT,
+ "! MII_1000TC reg:%b, MII_1000TS reg:%b",
+ ctl1000, MII_1000TC_BITS,
+ stat1000, MII_1000TS_BITS);
+ }
+
+ if (usbgem_population(lpable) <= 1 &&
+ (exp & MII_AN_EXP_LPCANAN) == 0) {
+ if ((advert & MII_ABILITY_TECH) != lpable) {
+ cmn_err(CE_WARN,
+ "!%s: but the link partner doesn't seem"
+ " to have auto-negotiation capability."
+ " please check the link configuration.",
+ dp->name);
+ }
+ /*
+ * it should be a result of pararell detection,
+ * which cannot detect duplex mode.
+ */
+ if ((advert & lpable) == 0 &&
+ lpable & MII_ABILITY_10BASE_T) {
+ /* no common technology, try 10M half mode */
+ lpable |= advert & MII_ABILITY_10BASE_T;
+ fix_phy = B_TRUE;
+ }
+ } else if (lpable == 0) {
+ cmn_err(CE_WARN, "!%s: wrong lpable.", dp->name);
+ goto reset_phy;
+ }
+ /*
+ * configure current link mode according to AN priority.
+ */
+ val = advert & lpable;
+ if ((ctl1000 & MII_1000TC_ADV_FULL) &&
+ (stat1000 & MII_1000TS_LP_FULL)) {
+ /* 1000BaseT & full duplex */
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = B_TRUE;
+ } else if ((ctl1000 & MII_1000TC_ADV_HALF) &&
+ (stat1000 & MII_1000TS_LP_HALF)) {
+ /* 1000BaseT & half duplex */
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = B_FALSE;
+ } else if ((val & MII_ABILITY_100BASE_TX_FD)) {
+ /* 100BaseTx & fullduplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_100BASE_T4)) {
+ /* 100BaseTx & fullduplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_100BASE_TX)) {
+ /* 100BaseTx & half duplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_FALSE;
+ } else if ((val & MII_ABILITY_10BASE_T_FD)) {
+ /* 10BaseT & full duplex */
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_10BASE_T)) {
+ /* 10BaseT & half duplex */
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = B_FALSE;
+ } else {
+ /*
+ * the link partner doesn't seem to have
+ * auto-negotiation capability and our PHY
+ * could not report current mode correctly.
+ * We guess current mode by mii_control register.
+ */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ /* select 100m half or 10m half */
+ dp->speed = (val & MII_CONTROL_100MB) ?
+ USBGEM_SPD_100 : USBGEM_SPD_10;
+ dp->full_duplex = B_FALSE;
+ fix_phy = B_TRUE;
+
+ cmn_err(CE_NOTE,
+ "!%s: auto-negotiation done but "
+ "common ability not found.\n"
+ "PHY state: control:%b advert:%b lpable:%b\n"
+ "guessing %d Mbps %s duplex mode",
+ dp->name,
+ val, MII_CONTROL_BITS,
+ advert, MII_ABILITY_BITS,
+ lpable, MII_ABILITY_BITS,
+ usbgem_speed_value[dp->speed],
+ dp->full_duplex ? "full" : "half");
+ }
+
+ if (dp->full_duplex) {
+ dp->flow_control =
+ usbgem_fc_result[fc_cap_decode(advert)]
+ [fc_cap_decode(lpable)];
+ } else {
+ dp->flow_control = FLOW_CONTROL_NONE;
+ }
+ dp->mii_state = MII_STATE_MEDIA_SETUP;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+ goto next_nowait;
+
+ case MII_STATE_MEDIA_SETUP:
+ DPRINTF(2, (CE_CONT, "!%s: setup midia mode", dp->name));
+
+ /* assume the link state is down */
+ dp->mii_state = MII_STATE_LINKDOWN;
+ dp->mii_supress_msg = B_FALSE;
+
+ /* use short interval */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+
+ if ((!dp->anadv_autoneg) ||
+ dp->ugc.usbgc_mii_an_oneshot || fix_phy) {
+
+ /*
+ * write the result of auto negotiation back.
+ */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ val &= ~(MII_CONTROL_SPEED | MII_CONTROL_FDUPLEX |
+ MII_CONTROL_ANE | MII_CONTROL_RSAN);
+
+ if (dp->full_duplex) {
+ val |= MII_CONTROL_FDUPLEX;
+ }
+
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ val |= MII_CONTROL_1000MB;
+ break;
+
+ case USBGEM_SPD_100:
+ val |= MII_CONTROL_100MB;
+ break;
+
+ default:
+ cmn_err(CE_WARN, "%s: unknown speed:%d",
+ dp->name, dp->speed);
+ /* FALLTHROUGH */
+
+ case USBGEM_SPD_10:
+ /* for USBGEM_SPD_10, do nothing */
+ break;
+ }
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ usbgem_mii_write(dp,
+ MII_1000TC, MII_1000TC_CFG_EN, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ usbgem_mii_write(dp, MII_CONTROL, val, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ /*
+ * XXX -- nic state should be one of
+ * NIC_STATE_DISCONNECTED
+ * NIC_STATE_STOPPED
+ * NIC_STATE_INITIALIZED
+ * NIC_STATE_ONLINE
+ */
+ if (dp->nic_state >= NIC_STATE_INITIALIZED) {
+ /* notify the result of autonegotiation to mac */
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ goto next_nowait;
+
+ case MII_STATE_LINKDOWN:
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (status & MII_STATUS_LINKUP) {
+ /*
+ * Link is going up
+ */
+ dp->mii_state = MII_STATE_LINKUP;
+ dp->mii_supress_msg = B_FALSE;
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: link up detected: status:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ /*
+ * MII_CONTROL_100MB and MII_CONTROL_FDUPLEX are
+ * ignored when MII_CONTROL_ANE is set.
+ */
+ cmn_err(CE_CONT,
+ "!%s: Link up: %d Mbps %s duplex %s flow control",
+ dp->name,
+ usbgem_speed_value[dp->speed],
+ dp->full_duplex ? "full" : "half",
+ usbgem_fc_type[dp->flow_control]);
+
+ dp->mii_interval =
+ dp->ugc.usbgc_mii_link_watch_interval;
+
+ if (dp->ugc.usbgc_mii_hw_link_detection &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ dp->mii_interval = 0;
+ }
+
+ if (dp->nic_state == NIC_STATE_ONLINE) {
+ if (dp->mac_state == MAC_STATE_INITIALIZED) {
+ (void) usbgem_mac_start(dp);
+ }
+ tx_sched = B_TRUE;
+ }
+
+ goto next;
+ }
+
+ dp->mii_supress_msg = B_TRUE;
+ if (dp->anadv_autoneg) {
+ dp->mii_timer -= diff;
+ if (dp->mii_timer <= 0) {
+ /*
+ * the link down timer expired.
+ * need to restart auto-negotiation.
+ */
+ linkdown_action =
+ dp->ugc.usbgc_mii_linkdown_timeout_action;
+ goto restart_autonego;
+ }
+ }
+ /* don't change mii_state */
+ goto next;
+
+ case MII_STATE_LINKUP:
+ if (rwlock == RW_READER) {
+ /* first pass, read mii status */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ if ((status & MII_STATUS_LINKUP) == 0) {
+ /*
+ * Link is going down
+ */
+ cmn_err(CE_NOTE,
+ "!%s: link down detected: status:%b",
+ dp->name, status, MII_STATUS_BITS);
+ /*
+ * Acquire exclusive lock to change mii_state
+ */
+ if (rwlock == RW_READER) {
+ rwlock = RW_WRITER;
+ rw_exit(&dp->dev_state_lock);
+ goto again;
+ }
+
+ dp->mii_state = MII_STATE_LINKDOWN;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+
+ /*
+ * As we may change the state of the device,
+ * let us acquire exclusive lock for the state.
+ */
+ if (dp->nic_state == NIC_STATE_ONLINE &&
+ dp->mac_state == MAC_STATE_ONLINE &&
+ dp->ugc.usbgc_mii_stop_mac_on_linkdown) {
+ (void) usbgem_restart_nic(dp);
+ /* drain tx */
+ tx_sched = B_TRUE;
+ }
+
+ if (dp->anadv_autoneg) {
+ /* need to restart auto-negotiation */
+ linkdown_action =
+ dp->ugc.usbgc_mii_linkdown_action;
+ goto restart_autonego;
+ }
+ /*
+ * don't use hw link down detection until the link
+ * status become stable for a while.
+ */
+ dp->mii_interval =
+ dp->ugc.usbgc_mii_link_watch_interval;
+
+ goto next;
+ }
+
+ /*
+ * still link up, no need to change mii_state
+ */
+ if (dp->ugc.usbgc_mii_hw_link_detection &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ /*
+ * no need to check link status periodicly
+ * if nic can generate interrupts when link go down.
+ */
+ dp->mii_interval = 0;
+ }
+ goto next;
+ }
+ /* NOTREACHED */
+ cmn_err(CE_PANIC, "!%s: %s: not reached", dp->name, __func__);
+
+ /*
+ * Actions for new state.
+ */
+restart_autonego:
+ switch (linkdown_action) {
+ case MII_ACTION_RESET:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: resetting PHY", dp->name);
+ }
+ dp->mii_supress_msg = B_TRUE;
+ goto reset_phy;
+
+ case MII_ACTION_NONE:
+ dp->mii_supress_msg = B_TRUE;
+ if (dp->ugc.usbgc_mii_an_oneshot) {
+ goto autonego;
+ }
+ /* PHY will restart autonego automatically */
+ dp->mii_state = MII_STATE_AUTONEGOTIATING;
+ dp->mii_timer = dp->ugc.usbgc_mii_an_timeout;
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+
+ case MII_ACTION_RSA:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: restarting auto-negotiation",
+ dp->name);
+ }
+ dp->mii_supress_msg = B_TRUE;
+ goto autonego;
+
+ default:
+ cmn_err(CE_PANIC, "!%s: unknowm linkdown action: %d",
+ dp->name, dp->ugc.usbgc_mii_linkdown_action);
+ dp->mii_supress_msg = B_TRUE;
+ }
+ /* NOTREACHED */
+
+reset_phy:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: resetting PHY", dp->name);
+ }
+ dp->mii_state = MII_STATE_RESETTING;
+ dp->mii_timer = dp->ugc.usbgc_mii_reset_timeout;
+ if (!dp->ugc.usbgc_mii_dont_reset) {
+ usbgem_mii_write(dp, MII_CONTROL, MII_CONTROL_RESET, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+
+autonego:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: auto-negotiation started", dp->name);
+ }
+ dp->mii_state = MII_STATE_AUTONEGOTIATING;
+ dp->mii_timer = dp->ugc.usbgc_mii_an_timeout;
+
+ /* start/restart autoneg */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err) &
+ ~(MII_CONTROL_ISOLATE | MII_CONTROL_PWRDN | MII_CONTROL_RESET);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (val & MII_CONTROL_ANE) {
+ val |= MII_CONTROL_RSAN;
+ }
+ usbgem_mii_write(dp, MII_CONTROL,
+ val | dp->ugc.usbgc_mii_an_cmd | MII_CONTROL_ANE, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+
+usberr:
+ dp->mii_state = MII_STATE_UNKNOWN;
+ dp->mii_interval = dp->ugc.usbgc_mii_link_watch_interval;
+ tx_sched = B_TRUE;
+
+next:
+ *newstatep = dp->mii_state;
+ rw_exit(&dp->dev_state_lock);
+ return (tx_sched);
+}
+
+static void
+usbgem_mii_link_watcher(struct usbgem_dev *dp)
+{
+ int old_mii_state;
+ int new_mii_state;
+ boolean_t tx_sched;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ for (; ; ) {
+
+ mutex_enter(&dp->link_watcher_lock);
+ if (dp->mii_interval) {
+ (void) cv_timedwait(&dp->link_watcher_wait_cv,
+ &dp->link_watcher_lock,
+ dp->mii_interval + ddi_get_lbolt());
+ } else {
+ cv_wait(&dp->link_watcher_wait_cv,
+ &dp->link_watcher_lock);
+ }
+ mutex_exit(&dp->link_watcher_lock);
+
+ if (dp->link_watcher_stop) {
+ break;
+ }
+
+ /* we block callbacks from disconnect/suspend and restart */
+ tx_sched = usbgem_mii_link_check(dp,
+ &old_mii_state, &new_mii_state);
+
+ /*
+ * gld v2 notifier functions are not able to
+ * be called with any locks in this layer.
+ */
+ if (tx_sched) {
+ /* kick potentially stopped downstream */
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_tx_update(dp->mh);
+#else
+ gld_sched(dp->macinfo);
+#endif
+ }
+
+ if (old_mii_state != new_mii_state) {
+ /* notify new mii link state */
+ if (new_mii_state == MII_STATE_LINKUP) {
+ dp->linkup_delay = 0;
+ USBGEM_LINKUP(dp);
+ } else if (dp->linkup_delay <= 0) {
+ USBGEM_LINKDOWN(dp);
+ }
+ } else if (dp->linkup_delay < 0) {
+ /* first linkup timeout */
+ dp->linkup_delay = 0;
+ USBGEM_LINKDOWN(dp);
+ }
+ }
+
+ thread_exit();
+}
+
+void
+usbgem_mii_update_link(struct usbgem_dev *dp)
+{
+ cv_signal(&dp->link_watcher_wait_cv);
+}
+
+int
+usbgem_mii_probe_default(struct usbgem_dev *dp)
+{
+ int phy;
+ uint16_t status;
+ uint16_t xstatus;
+ int err;
+ uint16_t adv;
+ uint16_t adv_org;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Scan PHY
+ */
+ dp->mii_status = 0;
+
+ /* Try default phy first */
+ if (dp->mii_phy_addr) {
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (status != 0xffff && status != 0x0000) {
+ goto PHY_found;
+ }
+
+ if (dp->mii_phy_addr < 0) {
+ cmn_err(CE_NOTE,
+ "!%s: failed to probe default internal and/or non-MII PHY",
+ dp->name);
+ return (USB_FAILURE);
+ }
+
+ cmn_err(CE_NOTE,
+ "!%s: failed to probe default MII PHY at %d",
+ dp->name, dp->mii_phy_addr);
+ }
+
+ /* Try all possible address */
+ for (phy = dp->ugc.usbgc_mii_addr_min; phy < 32; phy++) {
+ dp->mii_phy_addr = phy;
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(status) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (status != 0xffff && status != 0x0000) {
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_write(control) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ goto PHY_found;
+ }
+ }
+ for (phy = dp->ugc.usbgc_mii_addr_min; phy < 32; phy++) {
+ dp->mii_phy_addr = phy;
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_write(control) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(status) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (status != 0xffff && status != 0) {
+ goto PHY_found;
+ }
+ }
+
+ cmn_err(CE_NOTE, "!%s: no MII PHY found", dp->name);
+ return (USB_FAILURE);
+
+PHY_found:
+ dp->mii_status = status;
+ dp->mii_status_ro = ~status;
+ dp->mii_phy_id = usbgem_mii_read(dp, MII_PHYIDH, &err) << 16;
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(PHYIDH) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ dp->mii_phy_id |= usbgem_mii_read(dp, MII_PHYIDL, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(PHYIDL) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (dp->mii_phy_addr < 0) {
+ cmn_err(CE_CONT, "!%s: using internal/non-MII PHY(0x%08x)",
+ dp->name, dp->mii_phy_id);
+ } else {
+ cmn_err(CE_CONT, "!%s: MII PHY (0x%08x) found at %d",
+ dp->name, dp->mii_phy_id, dp->mii_phy_addr);
+ }
+
+ cmn_err(CE_CONT,
+ "!%s: PHY control:%b, status:%b, advert:%b, lpar:%b, exp:%b",
+ dp->name,
+ usbgem_mii_read(dp, MII_CONTROL, &err), MII_CONTROL_BITS,
+ status, MII_STATUS_BITS,
+ usbgem_mii_read(dp, MII_AN_ADVERT, &err), MII_ABILITY_BITS,
+ usbgem_mii_read(dp, MII_AN_LPABLE, &err), MII_ABILITY_BITS,
+ usbgem_mii_read(dp, MII_AN_EXPANSION, &err), MII_AN_EXP_BITS);
+
+ dp->mii_xstatus = 0;
+ if (status & MII_STATUS_XSTATUS) {
+ dp->mii_xstatus = usbgem_mii_read(dp, MII_XSTATUS, &err);
+
+ cmn_err(CE_CONT, "!%s: xstatus:%b",
+ dp->name, dp->mii_xstatus, MII_XSTATUS_BITS);
+ }
+ dp->mii_xstatus_ro = ~dp->mii_xstatus;
+
+ /* check if the phy can advertize pause abilities */
+ adv_org = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ usbgem_mii_write(dp, MII_AN_ADVERT,
+ MII_ABILITY_PAUSE | MII_ABILITY_ASM_DIR, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ adv = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ if ((adv & MII_ABILITY_PAUSE) == 0) {
+ dp->ugc.usbgc_flow_control &= ~1;
+ }
+
+ if ((adv & MII_ABILITY_ASM_DIR) == 0) {
+ dp->ugc.usbgc_flow_control &= ~2;
+ }
+
+ usbgem_mii_write(dp, MII_AN_ADVERT, adv_org, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ return (USB_SUCCESS);
+
+usberr:
+ return (USB_FAILURE);
+}
+
+int
+usbgem_mii_init_default(struct usbgem_dev *dp)
+{
+ /* ENPTY */
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_mii_start(struct usbgem_dev *dp)
+{
+ int err;
+ kthread_t *lwth;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* make a first call of usbgem_mii_link_check() */
+ dp->link_watcher_stop = 0;
+ dp->mii_state = MII_STATE_UNKNOWN;
+ dp->mii_interval = drv_usectohz(1000*1000); /* 1sec */
+ dp->mii_last_check = ddi_get_lbolt();
+ dp->linkup_delay = 600 * drv_usectohz(1000*1000); /* 10 minutes */
+
+ lwth = thread_create(NULL, 0, usbgem_mii_link_watcher, dp, 0, &p0,
+ TS_RUN, minclsyspri);
+ if (lwth == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to create a link watcher thread",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ dp->link_watcher_did = lwth->t_did;
+
+ return (USB_SUCCESS);
+}
+
+static void
+usbgem_mii_stop(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* Ensure timer routine stopped */
+ dp->link_watcher_stop = 1;
+ cv_signal(&dp->link_watcher_wait_cv);
+ thread_join(dp->link_watcher_did);
+}
+
+/* ============================================================== */
+/*
+ * internal mac register operation interface
+ */
+/* ============================================================== */
+/*
+ * usbgem_mac_init: cold start
+ */
+static int
+usbgem_mac_init(struct usbgem_dev *dp)
+{
+ int err;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* pretend we succeeded */
+ return (USB_SUCCESS);
+ }
+
+ ASSERT(dp->mac_state == MAC_STATE_STOPPED);
+
+ /* reset fatal error timestamp */
+ dp->fatal_error = (clock_t)0;
+
+ /* reset tx side state */
+ mutex_enter(&dp->txlock);
+ dp->tx_busy_cnt = 0;
+ dp->tx_max_packets = dp->ugc.usbgc_tx_list_max;
+ mutex_exit(&dp->txlock);
+
+ /* reset rx side state */
+ mutex_enter(&dp->rxlock);
+ dp->rx_busy_cnt = 0;
+ mutex_exit(&dp->rxlock);
+
+ err = usbgem_hal_init_chip(dp);
+ if (err == USB_SUCCESS) {
+ dp->mac_state = MAC_STATE_INITIALIZED;
+ }
+
+ return (err);
+}
+
+/*
+ * usbgem_mac_start: warm start
+ */
+static int
+usbgem_mac_start(struct usbgem_dev *dp)
+{
+ int err;
+ int i;
+ usb_flags_t flags = 0;
+ usb_intr_req_t *req;
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_state_t p_state;
+#endif
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* do nothing but don't return failure */
+ return (USB_SUCCESS);
+ }
+
+ if (dp->mac_state != MAC_STATE_INITIALIZED) {
+ /* don't return failer */
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mac_state(%d) is not MAC_STATE_INITIALIZED",
+ dp->name, __func__, dp->mac_state));
+ goto x;
+ }
+
+ dp->mac_state = MAC_STATE_ONLINE;
+
+ if (usbgem_hal_start_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_NOTE,
+ "!%s: %s: usb error was detected during start_chip",
+ dp->name, __func__);
+ goto x;
+ }
+
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_get_state(dp->intr_pipe, &p_state, 0);
+ ASSERT(p_state == USB_PIPE_STATE_IDLE);
+#endif /* USBGEM_DEBUG_LEVEL */
+
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+
+ /* make a request for interrupt */
+
+ req = usb_alloc_intr_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: failed to allocate intreq",
+ dp->name, __func__);
+ goto x;
+ }
+ req->intr_data = NULL;
+ req->intr_client_private = (usb_opaque_t)dp;
+ req->intr_timeout = 0;
+ req->intr_attributes =
+ USB_ATTRS_SHORT_XFER_OK | USB_ATTRS_AUTOCLEARING;
+ req->intr_len = dp->ep_intr->wMaxPacketSize;
+ req->intr_cb = usbgem_intr_cb;
+ req->intr_exc_cb = usbgem_intr_cb;
+ req->intr_completion_reason = 0;
+ req->intr_cb_flags = 0;
+
+ err = usb_pipe_intr_xfer(dp->intr_pipe, req, flags);
+ if (err != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "%s: err:%d failed to start polling of intr pipe",
+ dp->name, err);
+ goto x;
+ }
+ }
+
+ /* kick to receive the first packet */
+ if (usbgem_init_rx_buf(dp) != USB_SUCCESS) {
+ goto err_stop_intr;
+ }
+ dp->rx_active = B_TRUE;
+
+ return (USB_SUCCESS);
+
+err_stop_intr:
+ /* stop the interrupt pipe */
+ DPRINTF(0, (CE_CONT, "!%s: %s: FAULURE", dp->name, __func__));
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+ usb_pipe_stop_intr_polling(dp->intr_pipe, USB_FLAGS_SLEEP);
+ }
+x:
+ ASSERT(dp->mac_state == MAC_STATE_ONLINE);
+ /* we use another flag to indicate error state. */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ return (USB_FAILURE);
+}
+
+static int
+usbgem_mac_stop(struct usbgem_dev *dp, int new_state, boolean_t graceful)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * we must have writer lock for dev_state_lock
+ */
+ ASSERT(new_state == MAC_STATE_STOPPED
+ || new_state == MAC_STATE_DISCONNECTED);
+
+ /* stop polling interrupt pipe */
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+ usb_pipe_stop_intr_polling(dp->intr_pipe, USB_FLAGS_SLEEP);
+ }
+
+ if (new_state == MAC_STATE_STOPPED || graceful) {
+ /* stop the nic hardware completely */
+ if (usbgem_hal_stop_chip(dp) != USB_SUCCESS) {
+ (void) usbgem_hal_reset_chip(dp);
+ }
+ }
+
+ /* stop preparing new rx packets and sending new packets */
+ dp->mac_state = new_state;
+
+ /* other processors must get mac_state correctly after here */
+ membar_producer();
+
+ /* cancel all requests we have sent */
+ usb_pipe_reset(dp->dip, dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ usb_pipe_reset(dp->dip, dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: rx_busy_cnt:%d tx_busy_cnt:%d",
+ dp->name, __func__, dp->rx_busy_cnt, dp->tx_busy_cnt));
+
+ /*
+ * Here all rx packets has been cancelled and their call back
+ * function has been exeuted, because we called usb_pipe_reset
+ * synchronously.
+ * So actually we just ensure rx_busy_cnt == 0.
+ */
+ mutex_enter(&dp->rxlock);
+ while (dp->rx_busy_cnt > 0) {
+ cv_wait(&dp->rx_drain_cv, &dp->rxlock);
+ }
+ mutex_exit(&dp->rxlock);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: rx_busy_cnt is %d now",
+ dp->name, __func__, dp->rx_busy_cnt));
+
+ mutex_enter(&dp->txlock);
+ while (dp->tx_busy_cnt > 0) {
+ cv_wait(&dp->tx_drain_cv, &dp->txlock);
+ }
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: tx_busy_cnt is %d now",
+ dp->name, __func__, dp->tx_busy_cnt));
+
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_add_multicast(struct usbgem_dev *dp, const uint8_t *ep)
+{
+ int cnt;
+ int err;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ if (dp->mc_count_req++ < USBGEM_MAXMC) {
+ /* append the new address at the end of the mclist */
+ cnt = dp->mc_count;
+ bcopy(ep, dp->mc_list[cnt].addr.ether_addr_octet,
+ ETHERADDRL);
+ if (dp->ugc.usbgc_multicast_hash) {
+ dp->mc_list[cnt].hash =
+ (*dp->ugc.usbgc_multicast_hash)(dp, ep);
+ }
+ dp->mc_count = cnt + 1;
+ }
+
+ if (dp->mc_count_req != dp->mc_count) {
+ /* multicast address list overflow */
+ dp->rxmode |= RXMODE_MULTI_OVF;
+ } else {
+ dp->rxmode &= ~RXMODE_MULTI_OVF;
+ }
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ /* tell new multicast list to the hardware */
+ err = usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (err);
+}
+
+static int
+usbgem_remove_multicast(struct usbgem_dev *dp, const uint8_t *ep)
+{
+ size_t len;
+ int i;
+ int cnt;
+ int err;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ dp->mc_count_req--;
+ cnt = dp->mc_count;
+ for (i = 0; i < cnt; i++) {
+ if (bcmp(ep, &dp->mc_list[i].addr, ETHERADDRL)) {
+ continue;
+ }
+ /* shrink the mclist by copying forward */
+ len = (cnt - (i + 1)) * sizeof (*dp->mc_list);
+ if (len > 0) {
+ bcopy(&dp->mc_list[i+1], &dp->mc_list[i], len);
+ }
+ dp->mc_count--;
+ break;
+ }
+
+ if (dp->mc_count_req != dp->mc_count) {
+ /* multicast address list overflow */
+ dp->rxmode |= RXMODE_MULTI_OVF;
+ } else {
+ dp->rxmode &= ~RXMODE_MULTI_OVF;
+ }
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ err = usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (err);
+}
+
+
+/* ============================================================== */
+/*
+ * ioctl
+ */
+/* ============================================================== */
+enum ioc_reply {
+ IOC_INVAL = -1, /* bad, NAK with EINVAL */
+ IOC_DONE, /* OK, reply sent */
+ IOC_ACK, /* OK, just send ACK */
+ IOC_REPLY, /* OK, just send reply */
+ IOC_RESTART_ACK, /* OK, restart & ACK */
+ IOC_RESTART_REPLY /* OK, restart & reply */
+};
+
+
+#ifdef USBGEM_CONFIG_MAC_PROP
+static int
+usbgem_get_def_val(struct usbgem_dev *dp, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ link_flowctrl_t fl;
+ int err = 0;
+
+ ASSERT(pr_valsize > 0);
+ switch (pr_num) {
+ case MAC_PROP_AUTONEG:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ if (pr_valsize < sizeof (link_flowctrl_t)) {
+ return (EINVAL);
+ }
+ switch (dp->ugc.usbgc_flow_control) {
+ case FLOW_CONTROL_NONE:
+ fl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ fl = LINK_FLOWCTRL_BI;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ fl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ fl = LINK_FLOWCTRL_RX;
+ break;
+ }
+ bcopy(&fl, pr_val, sizeof (fl));
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_EN_1000FDX_CAP:
+ *(uint8_t *)pr_val =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_EN_1000HDX_CAP:
+ *(uint8_t *)pr_val =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case MAC_PROP_ADV_100T4_CAP:
+ case MAC_PROP_EN_100T4_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_EN_100FDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_EN_100HDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_EN_10FDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_EN_10HDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ default:
+ err = ENOTSUP;
+ break;
+ }
+ return (err);
+}
+
+#ifdef MAC_VERSION_V1
+static void
+usbgem_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ struct usbgem_dev *dp = arg;
+ link_flowctrl_t fl;
+
+ /*
+ * By default permissions are read/write unless specified
+ * otherwise by the driver.
+ */
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ case MAC_PROP_SPEED:
+ case MAC_PROP_STATUS:
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_ADV_100T4_CAP:
+ case MAC_PROP_EN_100T4_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ break;
+
+ case MAC_PROP_EN_1000FDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASET_FD));
+ } else if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD)
+ == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASET));
+ } else if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASEX));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_10));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+ if ((dp->mii_status_ro & MII_STATUS_CANAUTONEG) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ switch (dp->ugc.usbgc_flow_control) {
+ case FLOW_CONTROL_NONE:
+ fl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ fl = LINK_FLOWCTRL_BI;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ fl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ fl = LINK_FLOWCTRL_RX;
+ break;
+ }
+ mac_prop_info_set_default_link_flowctrl(prh, fl);
+ break;
+
+ case MAC_PROP_MTU:
+ mac_prop_info_set_range_uint32(prh,
+ dp->ugc.usbgc_min_mtu, dp->ugc.usbgc_max_mtu);
+ break;
+
+ case MAC_PROP_PRIVATE:
+ break;
+ }
+}
+#endif
+
+static int
+usbgem_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ struct usbgem_dev *dp = arg;
+ int err = 0;
+ boolean_t update = B_FALSE;
+ link_flowctrl_t flowctrl;
+ uint32_t cur_mtu, new_mtu;
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ switch (pr_num) {
+ case MAC_PROP_EN_1000FDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) == 0 ||
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD) == 0) {
+ if (dp->anadv_1000fdx != *(uint8_t *)pr_val) {
+ dp->anadv_1000fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) == 0 ||
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX) == 0) {
+ if (dp->anadv_1000hdx != *(uint8_t *)pr_val) {
+ dp->anadv_1000hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX_FD) == 0) {
+ if (dp->anadv_100fdx != *(uint8_t *)pr_val) {
+ dp->anadv_100fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX) == 0) {
+ if (dp->anadv_100hdx != *(uint8_t *)pr_val) {
+ dp->anadv_100hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ if (dp->anadv_10fdx != *(uint8_t *)pr_val) {
+ dp->anadv_10fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ if (dp->anadv_10hdx != *(uint8_t *)pr_val) {
+ dp->anadv_10hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+ if ((dp->mii_status_ro & MII_STATUS_CANAUTONEG) == 0) {
+ if (dp->anadv_autoneg != *(uint8_t *)pr_val) {
+ dp->anadv_autoneg = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ bcopy(pr_val, &flowctrl, sizeof (flowctrl));
+
+ switch (flowctrl) {
+ default:
+ err = EINVAL;
+ break;
+
+ case LINK_FLOWCTRL_NONE:
+ if (dp->flow_control != FLOW_CONTROL_NONE) {
+ dp->flow_control = FLOW_CONTROL_NONE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_RX:
+ if (dp->flow_control != FLOW_CONTROL_RX_PAUSE) {
+ dp->flow_control = FLOW_CONTROL_RX_PAUSE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_TX:
+ if (dp->flow_control != FLOW_CONTROL_TX_PAUSE) {
+ dp->flow_control = FLOW_CONTROL_TX_PAUSE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_BI:
+ if (dp->flow_control != FLOW_CONTROL_SYMMETRIC) {
+ dp->flow_control = FLOW_CONTROL_SYMMETRIC;
+ update = B_TRUE;
+ }
+ break;
+ }
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_STATUS:
+ case MAC_PROP_SPEED:
+ case MAC_PROP_DUPLEX:
+ err = ENOTSUP; /* read-only prop. Can't set this. */
+ break;
+
+ case MAC_PROP_MTU:
+ bcopy(pr_val, &new_mtu, sizeof (new_mtu));
+ if (new_mtu != dp->mtu) {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_PRIVATE:
+ err = ENOTSUP;
+ break;
+
+ default:
+ err = ENOTSUP;
+ break;
+ }
+
+ if (update) {
+ /* sync with PHY */
+ usbgem_choose_forcedmode(dp);
+ dp->mii_state = MII_STATE_UNKNOWN;
+ cv_signal(&dp->link_watcher_wait_cv);
+ }
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static int
+#ifdef MAC_VERSION_V1
+usbgem_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+#else
+usbgem_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
+#endif
+{
+ struct usbgem_dev *dp = arg;
+ int err = 0;
+ link_flowctrl_t flowctrl;
+ uint64_t tmp = 0;
+
+ if (pr_valsize == 0) {
+ return (EINVAL);
+ }
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_RW;
+#endif
+ bzero(pr_val, pr_valsize);
+#ifndef MAC_VERSION_V1
+ if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) {
+ return (usbgem_get_def_val(dp, pr_num, pr_valsize, pr_val));
+ }
+#endif
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_READ;
+#endif
+ if (pr_valsize >= sizeof (link_duplex_t)) {
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_UNKNOWN;
+ } else if (dp->full_duplex) {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL;
+ } else {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_HALF;
+ }
+ } else {
+ err = EINVAL;
+ }
+ break;
+ case MAC_PROP_SPEED:
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_READ;
+#endif
+ if (pr_valsize >= sizeof (uint64_t)) {
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ tmp = 1000000000;
+ break;
+ case USBGEM_SPD_100:
+ tmp = 100000000;
+ break;
+ case USBGEM_SPD_10:
+ tmp = 10000000;
+ break;
+ default:
+ tmp = 0;
+ }
+ bcopy(&tmp, pr_val, sizeof (tmp));
+ } else {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_CANAUTONEG) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_autoneg;
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ if (pr_valsize >= sizeof (link_flowctrl_t)) {
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_NONE:
+ flowctrl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ flowctrl = LINK_FLOWCTRL_RX;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ flowctrl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ flowctrl = LINK_FLOWCTRL_BI;
+ break;
+ }
+ bcopy(&flowctrl, pr_val, sizeof (flowctrl));
+ } else {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_ADV_100T4_CAP:
+ usbgem_get_def_val(dp, pr_num, pr_valsize, pr_val);
+ break;
+
+ case MAC_PROP_EN_1000FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) &&
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD)) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_1000fdx;
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) &&
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX)) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_1000hdx;
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASEX_FD) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100fdx;
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASEX) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100hdx;
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_10_FD) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_10fdx;
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_10) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_10hdx;
+ break;
+
+ case MAC_PROP_EN_100T4_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASE_T4) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100t4;
+ break;
+
+ case MAC_PROP_PRIVATE:
+ err = ENOTSUP;
+ break;
+
+#ifndef MAC_VERSION_V1
+ case MAC_PROP_MTU: {
+ mac_propval_range_t range;
+ if (!(pr_flags & MAC_PROP_POSSIBLE)) {
+ err = ENOTSUP;
+ break;
+ }
+ if (pr_valsize < sizeof (mac_propval_range_t)) {
+ err = EINVAL;
+ break;
+ }
+ range.mpr_count = 1;
+ range.mpr_type = MAC_PROPVAL_UINT32;
+ range.range_uint32[0].mpur_min = ETHERMTU;
+ range.range_uint32[0].mpur_max = dp->mtu;
+ bcopy(&range, pr_val, sizeof (range));
+ break;
+ }
+#endif
+ default:
+ err = ENOTSUP;
+ break;
+ }
+
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+#endif /* USBGEM_CONFIG_MAC_PROP */
+
+#ifdef USBGEM_CONFIG_ND
+/* ============================================================== */
+/*
+ * ND interface
+ */
+/* ============================================================== */
+enum {
+ PARAM_AUTONEG_CAP,
+ PARAM_PAUSE_CAP,
+ PARAM_ASYM_PAUSE_CAP,
+ PARAM_1000FDX_CAP,
+ PARAM_1000HDX_CAP,
+ PARAM_100T4_CAP,
+ PARAM_100FDX_CAP,
+ PARAM_100HDX_CAP,
+ PARAM_10FDX_CAP,
+ PARAM_10HDX_CAP,
+
+ PARAM_ADV_AUTONEG_CAP,
+ PARAM_ADV_PAUSE_CAP,
+ PARAM_ADV_ASYM_PAUSE_CAP,
+ PARAM_ADV_1000FDX_CAP,
+ PARAM_ADV_1000HDX_CAP,
+ PARAM_ADV_100T4_CAP,
+ PARAM_ADV_100FDX_CAP,
+ PARAM_ADV_100HDX_CAP,
+ PARAM_ADV_10FDX_CAP,
+ PARAM_ADV_10HDX_CAP,
+ PARAM_ADV_1000T_MS,
+
+ PARAM_LP_AUTONEG_CAP,
+ PARAM_LP_PAUSE_CAP,
+ PARAM_LP_ASYM_PAUSE_CAP,
+ PARAM_LP_1000FDX_CAP,
+ PARAM_LP_1000HDX_CAP,
+ PARAM_LP_100T4_CAP,
+ PARAM_LP_100FDX_CAP,
+ PARAM_LP_100HDX_CAP,
+ PARAM_LP_10FDX_CAP,
+ PARAM_LP_10HDX_CAP,
+
+ PARAM_LINK_STATUS,
+ PARAM_LINK_SPEED,
+ PARAM_LINK_DUPLEX,
+
+ PARAM_LINK_AUTONEG,
+ PARAM_LINK_RX_PAUSE,
+ PARAM_LINK_TX_PAUSE,
+
+ PARAM_LOOP_MODE,
+ PARAM_MSI_CNT,
+#ifdef DEBUG_RESUME
+ PARAM_RESUME_TEST,
+#endif
+
+ PARAM_COUNT
+};
+
+struct usbgem_nd_arg {
+ struct usbgem_dev *dp;
+ int item;
+};
+
+static int
+usbgem_param_get(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *credp)
+{
+ struct usbgem_dev *dp = ((struct usbgem_nd_arg *)(void *)arg)->dp;
+ int item = ((struct usbgem_nd_arg *)(void *)arg)->item;
+ long val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called, item:%d",
+ dp->name, __func__, item));
+
+ switch (item) {
+ case PARAM_AUTONEG_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ DPRINTF(1, (CE_CONT, "autoneg_cap:%d", val));
+ break;
+
+ case PARAM_PAUSE_CAP:
+ val = dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE;
+ break;
+
+ case PARAM_ASYM_PAUSE_CAP:
+ val = dp->ugc.usbgc_flow_control > FLOW_CONTROL_SYMMETRIC;
+ break;
+
+ case PARAM_1000FDX_CAP:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case PARAM_1000HDX_CAP:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case PARAM_100T4_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case PARAM_100FDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case PARAM_100HDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case PARAM_10FDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case PARAM_10HDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ case PARAM_ADV_AUTONEG_CAP:
+ val = dp->anadv_autoneg;
+ break;
+
+ case PARAM_ADV_PAUSE_CAP:
+ val = dp->anadv_pause;
+ break;
+
+ case PARAM_ADV_ASYM_PAUSE_CAP:
+ val = dp->anadv_asmpause;
+ break;
+
+ case PARAM_ADV_1000FDX_CAP:
+ val = dp->anadv_1000fdx;
+ break;
+
+ case PARAM_ADV_1000HDX_CAP:
+ val = dp->anadv_1000hdx;
+ break;
+
+ case PARAM_ADV_100T4_CAP:
+ val = dp->anadv_100t4;
+ break;
+
+ case PARAM_ADV_100FDX_CAP:
+ val = dp->anadv_100fdx;
+ break;
+
+ case PARAM_ADV_100HDX_CAP:
+ val = dp->anadv_100hdx;
+ break;
+
+ case PARAM_ADV_10FDX_CAP:
+ val = dp->anadv_10fdx;
+ break;
+
+ case PARAM_ADV_10HDX_CAP:
+ val = dp->anadv_10hdx;
+ break;
+
+ case PARAM_ADV_1000T_MS:
+ val = dp->anadv_1000t_ms;
+ break;
+
+ case PARAM_LP_AUTONEG_CAP:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case PARAM_LP_PAUSE_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_PAUSE);
+ break;
+
+ case PARAM_LP_ASYM_PAUSE_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_ASM_DIR);
+ break;
+
+ case PARAM_LP_1000FDX_CAP:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ break;
+
+ case PARAM_LP_1000HDX_CAP:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ break;
+
+ case PARAM_LP_100T4_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_T4);
+ break;
+
+ case PARAM_LP_100FDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ break;
+
+ case PARAM_LP_100HDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ break;
+
+ case PARAM_LP_10FDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ break;
+
+ case PARAM_LP_10HDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ break;
+
+ case PARAM_LINK_STATUS:
+ val = (dp->mii_state == MII_STATE_LINKUP);
+ break;
+
+ case PARAM_LINK_SPEED:
+ val = usbgem_speed_value[dp->speed];
+ break;
+
+ case PARAM_LINK_DUPLEX:
+ val = 0;
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ val = dp->full_duplex ? 2 : 1;
+ }
+ break;
+
+ case PARAM_LINK_AUTONEG:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case PARAM_LINK_RX_PAUSE:
+ val = (dp->flow_control == FLOW_CONTROL_SYMMETRIC) ||
+ (dp->flow_control == FLOW_CONTROL_RX_PAUSE);
+ break;
+
+ case PARAM_LINK_TX_PAUSE:
+ val = (dp->flow_control == FLOW_CONTROL_SYMMETRIC) ||
+ (dp->flow_control == FLOW_CONTROL_TX_PAUSE);
+ break;
+
+#ifdef DEBUG_RESUME
+ case PARAM_RESUME_TEST:
+ val = 0;
+ break;
+#endif
+ default:
+ cmn_err(CE_WARN, "%s: unimplemented ndd control (%d)",
+ dp->name, item);
+ break;
+ }
+
+ (void) mi_mpprintf(mp, "%ld", val);
+
+ return (0);
+}
+
+static int
+usbgem_param_set(queue_t *q,
+ mblk_t *mp, char *value, caddr_t arg, cred_t *credp)
+{
+ struct usbgem_dev *dp = ((struct usbgem_nd_arg *)(void *)arg)->dp;
+ int item = ((struct usbgem_nd_arg *)(void *)arg)->item;
+ long val;
+ char *end;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ if (ddi_strtol(value, &end, 10, &val)) {
+ return (EINVAL);
+ }
+ if (end == value) {
+ return (EINVAL);
+ }
+
+ switch (item) {
+ case PARAM_ADV_AUTONEG_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_CANAUTONEG) == 0) {
+ goto err;
+ }
+ dp->anadv_autoneg = (int)val;
+ break;
+
+ case PARAM_ADV_PAUSE_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && dp->ugc.usbgc_flow_control == FLOW_CONTROL_NONE) {
+ goto err;
+ }
+ dp->anadv_pause = (int)val;
+ break;
+
+ case PARAM_ADV_ASYM_PAUSE_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val &&
+ dp->ugc.usbgc_flow_control <= FLOW_CONTROL_SYMMETRIC) {
+ goto err;
+ }
+ dp->anadv_asmpause = (int)val;
+ break;
+
+ case PARAM_ADV_1000FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD |
+ MII_XSTATUS_1000BASEX_FD)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000fdx = (int)val;
+ break;
+
+ case PARAM_ADV_1000HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET | MII_XSTATUS_1000BASEX)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000hdx = (int)val;
+ break;
+
+ case PARAM_ADV_100T4_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASE_T4) == 0) {
+ goto err;
+ }
+ dp->anadv_100t4 = (int)val;
+ break;
+
+ case PARAM_ADV_100FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASEX_FD) == 0) {
+ goto err;
+ }
+ dp->anadv_100fdx = (int)val;
+ break;
+
+ case PARAM_ADV_100HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASEX) == 0) {
+ goto err;
+ }
+ dp->anadv_100hdx = (int)val;
+ break;
+
+ case PARAM_ADV_10FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_10_FD) == 0) {
+ goto err;
+ }
+ dp->anadv_10fdx = (int)val;
+ break;
+
+ case PARAM_ADV_10HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_10) == 0) {
+ goto err;
+ }
+ dp->anadv_10hdx = (int)val;
+ break;
+
+ case PARAM_ADV_1000T_MS:
+ if (val != 0 && val != 1 && val != 2) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET | MII_XSTATUS_1000BASET_FD)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000t_ms = (int)val;
+ break;
+
+#ifdef DEBUG_RESUME
+ case PARAM_RESUME_TEST:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ gem_suspend(dp->dip);
+ gem_resume(dp->dip);
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+ break;
+#endif
+ }
+
+ /* sync with PHY */
+ usbgem_choose_forcedmode(dp);
+
+ dp->mii_state = MII_STATE_UNKNOWN;
+ if (dp->ugc.usbgc_mii_hw_link_detection) {
+ /* wake up link watcher possiblely sleeps */
+ cv_signal(&dp->link_watcher_wait_cv);
+ }
+
+ return (0);
+err:
+ return (EINVAL);
+}
+
+static void
+usbgem_nd_load(struct usbgem_dev *dp,
+ char *name, ndgetf_t gf, ndsetf_t sf, int item)
+{
+ struct usbgem_nd_arg *arg;
+
+ ASSERT(item >= 0);
+ ASSERT(item < PARAM_COUNT);
+
+ arg = &((struct usbgem_nd_arg *)(void *)dp->nd_arg_p)[item];
+ arg->dp = dp;
+ arg->item = item;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: name:%s, item:%d",
+ dp->name, __func__, name, item));
+ (void) nd_load(&dp->nd_data_p, name, gf, sf, (caddr_t)arg);
+}
+
+static void
+usbgem_nd_setup(struct usbgem_dev *dp)
+{
+ DPRINTF(1, (CE_CONT, "!%s: %s: called, mii_status:0x%b",
+ dp->name, __func__, dp->mii_status, MII_STATUS_BITS));
+
+ ASSERT(dp->nd_arg_p == NULL);
+
+ dp->nd_arg_p =
+ kmem_zalloc(sizeof (struct usbgem_nd_arg) * PARAM_COUNT, KM_SLEEP);
+
+#define SETFUNC(x) ((x) ? usbgem_param_set : NULL)
+
+ usbgem_nd_load(dp, "autoneg_cap",
+ usbgem_param_get, NULL, PARAM_AUTONEG_CAP);
+ usbgem_nd_load(dp, "pause_cap",
+ usbgem_param_get, NULL, PARAM_PAUSE_CAP);
+ usbgem_nd_load(dp, "asym_pause_cap",
+ usbgem_param_get, NULL, PARAM_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "1000fdx_cap",
+ usbgem_param_get, NULL, PARAM_1000FDX_CAP);
+ usbgem_nd_load(dp, "1000hdx_cap",
+ usbgem_param_get, NULL, PARAM_1000HDX_CAP);
+ usbgem_nd_load(dp, "100T4_cap",
+ usbgem_param_get, NULL, PARAM_100T4_CAP);
+ usbgem_nd_load(dp, "100fdx_cap",
+ usbgem_param_get, NULL, PARAM_100FDX_CAP);
+ usbgem_nd_load(dp, "100hdx_cap",
+ usbgem_param_get, NULL, PARAM_100HDX_CAP);
+ usbgem_nd_load(dp, "10fdx_cap",
+ usbgem_param_get, NULL, PARAM_10FDX_CAP);
+ usbgem_nd_load(dp, "10hdx_cap",
+ usbgem_param_get, NULL, PARAM_10HDX_CAP);
+
+ /* Our advertised capabilities */
+ usbgem_nd_load(dp, "adv_autoneg_cap", usbgem_param_get,
+ SETFUNC(dp->mii_status & MII_STATUS_CANAUTONEG),
+ PARAM_ADV_AUTONEG_CAP);
+ usbgem_nd_load(dp, "adv_pause_cap", usbgem_param_get,
+ SETFUNC(dp->ugc.usbgc_flow_control & 1),
+ PARAM_ADV_PAUSE_CAP);
+ usbgem_nd_load(dp, "adv_asym_pause_cap", usbgem_param_get,
+ SETFUNC(dp->ugc.usbgc_flow_control & 2),
+ PARAM_ADV_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "adv_1000fdx_cap", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASET_FD)),
+ PARAM_ADV_1000FDX_CAP);
+ usbgem_nd_load(dp, "adv_1000hdx_cap", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX | MII_XSTATUS_1000BASET)),
+ PARAM_ADV_1000HDX_CAP);
+ usbgem_nd_load(dp, "adv_100T4_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASE_T4) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100T4_CAP);
+ usbgem_nd_load(dp, "adv_100fdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASEX_FD) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100FDX_CAP);
+ usbgem_nd_load(dp, "adv_100hdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASEX) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100HDX_CAP);
+ usbgem_nd_load(dp, "adv_10fdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_10_FD) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_10FDX_CAP);
+ usbgem_nd_load(dp, "adv_10hdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_10) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_10HDX_CAP);
+ usbgem_nd_load(dp, "adv_1000t_ms", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD | MII_XSTATUS_1000BASET)),
+ PARAM_ADV_1000T_MS);
+
+
+ /* Partner's advertised capabilities */
+ usbgem_nd_load(dp, "lp_autoneg_cap",
+ usbgem_param_get, NULL, PARAM_LP_AUTONEG_CAP);
+ usbgem_nd_load(dp, "lp_pause_cap",
+ usbgem_param_get, NULL, PARAM_LP_PAUSE_CAP);
+ usbgem_nd_load(dp, "lp_asym_pause_cap",
+ usbgem_param_get, NULL, PARAM_LP_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "lp_1000fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_1000FDX_CAP);
+ usbgem_nd_load(dp, "lp_1000hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_1000HDX_CAP);
+ usbgem_nd_load(dp, "lp_100T4_cap",
+ usbgem_param_get, NULL, PARAM_LP_100T4_CAP);
+ usbgem_nd_load(dp, "lp_100fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_100FDX_CAP);
+ usbgem_nd_load(dp, "lp_100hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_100HDX_CAP);
+ usbgem_nd_load(dp, "lp_10fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_10FDX_CAP);
+ usbgem_nd_load(dp, "lp_10hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_10HDX_CAP);
+
+ /* Current operating modes */
+ usbgem_nd_load(dp, "link_status",
+ usbgem_param_get, NULL, PARAM_LINK_STATUS);
+ usbgem_nd_load(dp, "link_speed",
+ usbgem_param_get, NULL, PARAM_LINK_SPEED);
+ usbgem_nd_load(dp, "link_duplex",
+ usbgem_param_get, NULL, PARAM_LINK_DUPLEX);
+ usbgem_nd_load(dp, "link_autoneg",
+ usbgem_param_get, NULL, PARAM_LINK_AUTONEG);
+ usbgem_nd_load(dp, "link_rx_pause",
+ usbgem_param_get, NULL, PARAM_LINK_RX_PAUSE);
+ usbgem_nd_load(dp, "link_tx_pause",
+ usbgem_param_get, NULL, PARAM_LINK_TX_PAUSE);
+#ifdef DEBUG_RESUME
+ usbgem_nd_load(dp, "resume_test",
+ usbgem_param_get, usbgem_param_set, PARAM_RESUME_TEST);
+#endif
+#undef SETFUNC
+}
+
+static
+enum ioc_reply
+usbgem_nd_ioctl(struct usbgem_dev *dp,
+ queue_t *wq, mblk_t *mp, struct iocblk *iocp)
+{
+ boolean_t ok;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ switch (iocp->ioc_cmd) {
+ case ND_GET:
+ ok = nd_getset(wq, dp->nd_data_p, mp);
+ DPRINTF(1, (CE_CONT,
+ "%s: get %s", dp->name, ok ? "OK" : "FAIL"));
+ return (ok ? IOC_REPLY : IOC_INVAL);
+
+ case ND_SET:
+ ok = nd_getset(wq, dp->nd_data_p, mp);
+
+ DPRINTF(1, (CE_CONT, "%s: set %s err %d",
+ dp->name, ok ? "OK" : "FAIL", iocp->ioc_error));
+
+ if (!ok) {
+ return (IOC_INVAL);
+ }
+
+ if (iocp->ioc_error) {
+ return (IOC_REPLY);
+ }
+
+ return (IOC_RESTART_REPLY);
+ }
+
+ cmn_err(CE_WARN, "%s: invalid cmd 0x%x", dp->name, iocp->ioc_cmd);
+
+ return (IOC_INVAL);
+}
+
+static void
+usbgem_nd_cleanup(struct usbgem_dev *dp)
+{
+ ASSERT(dp->nd_data_p != NULL);
+ ASSERT(dp->nd_arg_p != NULL);
+
+ nd_free(&dp->nd_data_p);
+
+ kmem_free(dp->nd_arg_p, sizeof (struct usbgem_nd_arg) * PARAM_COUNT);
+ dp->nd_arg_p = NULL;
+}
+#endif /* USBGEM_CONFIG_ND */
+
+static void
+usbgem_mac_ioctl(struct usbgem_dev *dp, queue_t *wq, mblk_t *mp)
+{
+ struct iocblk *iocp;
+ enum ioc_reply status;
+ int cmd;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Validate the command before bothering with the mutex ...
+ */
+ iocp = (void *)mp->b_rptr;
+ iocp->ioc_error = 0;
+ cmd = iocp->ioc_cmd;
+
+ DPRINTF(1, (CE_CONT, "%s: %s cmd:0x%x", dp->name, __func__, cmd));
+
+#ifdef USBGEM_CONFIG_ND
+ switch (cmd) {
+ default:
+ _NOTE(NOTREACHED)
+ status = IOC_INVAL;
+ break;
+
+ case ND_GET:
+ case ND_SET:
+ status = usbgem_nd_ioctl(dp, wq, mp, iocp);
+ break;
+ }
+
+ /*
+ * Finally, decide how to reply
+ */
+ switch (status) {
+ default:
+ case IOC_INVAL:
+ /*
+ * Error, reply with a NAK and EINVAL or the specified error
+ */
+ miocnak(wq, mp, 0, iocp->ioc_error == 0 ?
+ EINVAL : iocp->ioc_error);
+ break;
+
+ case IOC_DONE:
+ /*
+ * OK, reply already sent
+ */
+ break;
+
+ case IOC_RESTART_ACK:
+ case IOC_ACK:
+ /*
+ * OK, reply with an ACK
+ */
+ miocack(wq, mp, 0, 0);
+ break;
+
+ case IOC_RESTART_REPLY:
+ case IOC_REPLY:
+ /*
+ * OK, send prepared reply as ACK or NAK
+ */
+ mp->b_datap->db_type =
+ iocp->ioc_error == 0 ? M_IOCACK : M_IOCNAK;
+ qreply(wq, mp);
+ break;
+ }
+#else
+ miocnak(wq, mp, 0, EINVAL);
+ return;
+#endif /* USBGEM_CONFIG_GLDv3 */
+}
+
+#ifndef SYS_MAC_H
+#define XCVR_UNDEFINED 0
+#define XCVR_NONE 1
+#define XCVR_10 2
+#define XCVR_100T4 3
+#define XCVR_100X 4
+#define XCVR_100T2 5
+#define XCVR_1000X 6
+#define XCVR_1000T 7
+#endif
+static int
+usbgem_mac_xcvr_inuse(struct usbgem_dev *dp)
+{
+ int val = XCVR_UNDEFINED;
+
+ if ((dp->mii_status & MII_STATUS_XSTATUS) == 0) {
+ if (dp->mii_status & MII_STATUS_100_BASE_T4) {
+ val = XCVR_100T4;
+ } else if (dp->mii_status &
+ (MII_STATUS_100_BASEX_FD |
+ MII_STATUS_100_BASEX)) {
+ val = XCVR_100X;
+ } else if (dp->mii_status &
+ (MII_STATUS_100_BASE_T2_FD |
+ MII_STATUS_100_BASE_T2)) {
+ val = XCVR_100T2;
+ } else if (dp->mii_status &
+ (MII_STATUS_10_FD | MII_STATUS_10)) {
+ val = XCVR_10;
+ }
+ } else if (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD | MII_XSTATUS_1000BASET)) {
+ val = XCVR_1000T;
+ } else if (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASEX)) {
+ val = XCVR_1000X;
+ }
+
+ return (val);
+}
+
+#ifdef USBGEM_CONFIG_GLDv3
+/* ============================================================== */
+/*
+ * GLDv3 interface
+ */
+/* ============================================================== */
+static int usbgem_m_getstat(void *, uint_t, uint64_t *);
+static int usbgem_m_start(void *);
+static void usbgem_m_stop(void *);
+static int usbgem_m_setpromisc(void *, boolean_t);
+static int usbgem_m_multicst(void *, boolean_t, const uint8_t *);
+static int usbgem_m_unicst(void *, const uint8_t *);
+static mblk_t *usbgem_m_tx(void *, mblk_t *);
+static void usbgem_m_ioctl(void *, queue_t *, mblk_t *);
+#ifdef GEM_CONFIG_MAC_PROP
+static int usbgem_m_setprop(void *, const char *, mac_prop_id_t,
+ uint_t, const void *);
+#ifdef MAC_VERSION_V1
+static int usbgem_m_getprop(void *, const char *, mac_prop_id_t,
+ uint_t, void *);
+#else
+static int usbgem_m_getprop(void *, const char *, mac_prop_id_t,
+ uint_t, uint_t, void *, uint_t *);
+#endif
+#endif
+
+#ifdef _SYS_MAC_PROVIDER_H
+#define GEM_M_CALLBACK_FLAGS (MC_IOCTL)
+#else
+#define GEM_M_CALLBACK_FLAGS (MC_IOCTL)
+#endif
+
+static mac_callbacks_t gem_m_callbacks = {
+#ifdef USBGEM_CONFIG_MAC_PROP
+#ifdef MAC_VERSION_V1
+ GEM_M_CALLBACK_FLAGS | MC_SETPROP | MC_GETPROP | MC_PROPINFO,
+#else
+ GEM_M_CALLBACK_FLAGS | MC_SETPROP | MC_GETPROP,
+#endif
+#else
+ GEM_M_CALLBACK_FLAGS,
+#endif
+ usbgem_m_getstat,
+ usbgem_m_start,
+ usbgem_m_stop,
+ usbgem_m_setpromisc,
+ usbgem_m_multicst,
+ usbgem_m_unicst,
+ usbgem_m_tx,
+#ifdef _SYS_MAC_PROVIDER_H
+#ifdef MAC_VERSION_V1
+ NULL,
+#endif
+#else
+ NULL, /* m_resources */
+#endif
+ usbgem_m_ioctl,
+ NULL, /* m_getcapab */
+#ifdef USBGEM_CONFIG_MAC_PROP
+ NULL,
+ NULL,
+ usbgem_m_setprop,
+ usbgem_m_getprop,
+#endif
+#ifdef MAC_VERSION_V1
+ usbgem_m_propinfo,
+#endif
+};
+
+static int
+usbgem_m_start(void *arg)
+{
+ int ret;
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ err = EIO;
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_ONLINE;
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = 0;
+ goto x;
+ }
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* initialize rx filter state */
+ sema_p(&dp->rxfilter_lock);
+ dp->mc_count = 0;
+ dp->mc_count_req = 0;
+
+ bcopy(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ ret = usbgem_hal_set_rx_filter(dp);
+ sema_v(&dp->rxfilter_lock);
+
+ if (ret != USB_SUCCESS) {
+ goto x;
+ }
+
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ /* setup media mode if the link have been up */
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto x;
+ }
+ if (usbgem_mac_start(dp) != USB_SUCCESS) {
+ goto x;
+ }
+ }
+
+ err = 0;
+x:
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static void
+usbgem_m_stop(void *arg)
+{
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* stop rx gracefully */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode &= ~RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+ /* make the nic state inactive */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_STOPPED;
+
+ /* stop mac completely */
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL);
+ }
+ rw_exit(&dp->dev_state_lock);
+}
+
+static int
+usbgem_m_multicst(void *arg, boolean_t add, const uint8_t *ep)
+{
+ int err;
+ int ret;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (add) {
+ ret = usbgem_add_multicast(dp, ep);
+ } else {
+ ret = usbgem_remove_multicast(dp, ep);
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ err = 0;
+ if (ret != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ err = EIO;
+ }
+
+ return (err);
+}
+
+static int
+usbgem_m_setpromisc(void *arg, boolean_t on)
+{
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ sema_p(&dp->rxfilter_lock);
+ if (on) {
+ dp->rxmode |= RXMODE_PROMISC;
+ } else {
+ dp->rxmode &= ~RXMODE_PROMISC;
+ }
+
+ err = 0;
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_hal_set_rx_filter(dp) != USB_SUCCESS) {
+ err = EIO;
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (err != 0) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ }
+#endif
+ return (err);
+}
+
+int
+usbgem_m_getstat(void *arg, uint_t stat, uint64_t *valp)
+{
+ int ret;
+ uint64_t val;
+ struct usbgem_dev *dp = arg;
+ struct usbgem_stats *gstp = &dp->stats;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ rw_exit(&dp->dev_state_lock);
+ return (0);
+ }
+ ret = usbgem_hal_get_stats(dp);
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (ret != USB_SUCCESS) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ return (EIO);
+ }
+#endif
+
+ switch (stat) {
+ case MAC_STAT_IFSPEED:
+ val = usbgem_speed_value[dp->speed] *1000000ull;
+ break;
+
+ case MAC_STAT_MULTIRCV:
+ val = gstp->rmcast;
+ break;
+
+ case MAC_STAT_BRDCSTRCV:
+ val = gstp->rbcast;
+ break;
+
+ case MAC_STAT_MULTIXMT:
+ val = gstp->omcast;
+ break;
+
+ case MAC_STAT_BRDCSTXMT:
+ val = gstp->obcast;
+ break;
+
+ case MAC_STAT_NORCVBUF:
+ val = gstp->norcvbuf + gstp->missed;
+ break;
+
+ case MAC_STAT_IERRORS:
+ val = gstp->errrcv;
+ break;
+
+ case MAC_STAT_NOXMTBUF:
+ val = gstp->noxmtbuf;
+ break;
+
+ case MAC_STAT_OERRORS:
+ val = gstp->errxmt;
+ break;
+
+ case MAC_STAT_COLLISIONS:
+ val = gstp->collisions;
+ break;
+
+ case MAC_STAT_RBYTES:
+ val = gstp->rbytes;
+ break;
+
+ case MAC_STAT_IPACKETS:
+ val = gstp->rpackets;
+ break;
+
+ case MAC_STAT_OBYTES:
+ val = gstp->obytes;
+ break;
+
+ case MAC_STAT_OPACKETS:
+ val = gstp->opackets;
+ break;
+
+ case MAC_STAT_UNDERFLOWS:
+ val = gstp->underflow;
+ break;
+
+ case MAC_STAT_OVERFLOWS:
+ val = gstp->overflow;
+ break;
+
+ case ETHER_STAT_ALIGN_ERRORS:
+ val = gstp->frame;
+ break;
+
+ case ETHER_STAT_FCS_ERRORS:
+ val = gstp->crc;
+ break;
+
+ case ETHER_STAT_FIRST_COLLISIONS:
+ val = gstp->first_coll;
+ break;
+
+ case ETHER_STAT_MULTI_COLLISIONS:
+ val = gstp->multi_coll;
+ break;
+
+ case ETHER_STAT_SQE_ERRORS:
+ val = gstp->sqe;
+ break;
+
+ case ETHER_STAT_DEFER_XMTS:
+ val = gstp->defer;
+ break;
+
+ case ETHER_STAT_TX_LATE_COLLISIONS:
+ val = gstp->xmtlatecoll;
+ break;
+
+ case ETHER_STAT_EX_COLLISIONS:
+ val = gstp->excoll;
+ break;
+
+ case ETHER_STAT_MACXMT_ERRORS:
+ val = gstp->xmit_internal_err;
+ break;
+
+ case ETHER_STAT_CARRIER_ERRORS:
+ val = gstp->nocarrier;
+ break;
+
+ case ETHER_STAT_TOOLONG_ERRORS:
+ val = gstp->frame_too_long;
+ break;
+
+ case ETHER_STAT_MACRCV_ERRORS:
+ val = gstp->rcv_internal_err;
+ break;
+
+ case ETHER_STAT_XCVR_ADDR:
+ val = dp->mii_phy_addr;
+ break;
+
+ case ETHER_STAT_XCVR_ID:
+ val = dp->mii_phy_id;
+ break;
+
+ case ETHER_STAT_XCVR_INUSE:
+ val = usbgem_mac_xcvr_inuse(dp);
+ break;
+
+ case ETHER_STAT_CAP_1000FDX:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case ETHER_STAT_CAP_1000HDX:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case ETHER_STAT_CAP_100FDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case ETHER_STAT_CAP_100HDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case ETHER_STAT_CAP_10FDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case ETHER_STAT_CAP_10HDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ case ETHER_STAT_CAP_ASMPAUSE:
+ val = dp->ugc.usbgc_flow_control > FLOW_CONTROL_SYMMETRIC;
+ break;
+
+ case ETHER_STAT_CAP_PAUSE:
+ val = dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE;
+ break;
+
+ case ETHER_STAT_CAP_AUTONEG:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ break;
+
+ case ETHER_STAT_ADV_CAP_1000FDX:
+ val = dp->anadv_1000fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_1000HDX:
+ val = dp->anadv_1000hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_100FDX:
+ val = dp->anadv_100fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_100HDX:
+ val = dp->anadv_100hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_10FDX:
+ val = dp->anadv_10fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_10HDX:
+ val = dp->anadv_10hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_ASMPAUSE:
+ val = dp->anadv_asmpause;
+ break;
+
+ case ETHER_STAT_ADV_CAP_PAUSE:
+ val = dp->anadv_pause;
+ break;
+
+ case ETHER_STAT_ADV_CAP_AUTONEG:
+ val = dp->anadv_autoneg;
+ break;
+
+ case ETHER_STAT_LP_CAP_1000FDX:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ break;
+
+ case ETHER_STAT_LP_CAP_1000HDX:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ break;
+
+ case ETHER_STAT_LP_CAP_100FDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ break;
+
+ case ETHER_STAT_LP_CAP_100HDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ break;
+
+ case ETHER_STAT_LP_CAP_10FDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ break;
+
+ case ETHER_STAT_LP_CAP_10HDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ break;
+
+ case ETHER_STAT_LP_CAP_ASMPAUSE:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_ASM_DIR);
+ break;
+
+ case ETHER_STAT_LP_CAP_PAUSE:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_PAUSE);
+ break;
+
+ case ETHER_STAT_LP_CAP_AUTONEG:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case ETHER_STAT_LINK_ASMPAUSE:
+ val = BOOLEAN(dp->flow_control & 2);
+ break;
+
+ case ETHER_STAT_LINK_PAUSE:
+ val = BOOLEAN(dp->flow_control & 1);
+ break;
+
+ case ETHER_STAT_LINK_AUTONEG:
+ val = dp->anadv_autoneg &&
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case ETHER_STAT_LINK_DUPLEX:
+ val = (dp->mii_state == MII_STATE_LINKUP) ?
+ (dp->full_duplex ? 2 : 1) : 0;
+ break;
+
+ case ETHER_STAT_TOOSHORT_ERRORS:
+ val = gstp->runt;
+ break;
+#ifdef NEVER /* it doesn't make sense */
+ case ETHER_STAT_CAP_REMFAULT:
+ val = B_TRUE;
+ break;
+
+ case ETHER_STAT_ADV_REMFAULT:
+ val = dp->anadv_remfault;
+ break;
+#endif
+ case ETHER_STAT_LP_REMFAULT:
+ val = BOOLEAN(dp->mii_lpable & MII_AN_ADVERT_REMFAULT);
+ break;
+
+ case ETHER_STAT_JABBER_ERRORS:
+ val = gstp->jabber;
+ break;
+
+ case ETHER_STAT_CAP_100T4:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case ETHER_STAT_ADV_CAP_100T4:
+ val = dp->anadv_100t4;
+ break;
+
+ case ETHER_STAT_LP_CAP_100T4:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_T4);
+ break;
+
+ default:
+#if GEM_DEBUG_LEVEL > 2
+ cmn_err(CE_WARN,
+ "%s: unrecognized parameter value = %d",
+ __func__, stat);
+#endif
+ *valp = 0;
+ return (ENOTSUP);
+ }
+
+ *valp = val;
+
+ return (0);
+}
+
+static int
+usbgem_m_unicst(void *arg, const uint8_t *mac)
+{
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ sema_p(&dp->rxfilter_lock);
+ bcopy(mac, dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ err = 0;
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_hal_set_rx_filter(dp) != USB_SUCCESS) {
+ err = EIO;
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (err != 0) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ }
+#endif
+ return (err);
+}
+
+/*
+ * usbgem_m_tx is used only for sending data packets into ethernet wire.
+ */
+static mblk_t *
+usbgem_m_tx(void *arg, mblk_t *mp_head)
+{
+ int limit;
+ mblk_t *mp;
+ mblk_t *nmp;
+ uint32_t flags;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ mp = mp_head;
+ flags = 0;
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ if (dp->mii_state != MII_STATE_LINKUP ||
+ dp->mac_state != MAC_STATE_ONLINE) {
+ /* some nics hate to send packets during the link is down */
+ for (; mp; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ freemsg(mp);
+ }
+ goto x;
+ }
+
+ ASSERT(dp->nic_state == NIC_STATE_ONLINE);
+
+ limit = dp->tx_max_packets;
+ for (; limit-- && mp; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ if (usbgem_send_common(dp, mp,
+ (limit == 0 && nmp) ? 1 : 0)) {
+ mp->b_next = nmp;
+ break;
+ }
+ }
+#ifdef CONFIG_TX_LIMITER
+ if (mp == mp_head) {
+ /* no packets were sent, descrease allocation limit */
+ mutex_enter(&dp->txlock);
+ dp->tx_max_packets = max(dp->tx_max_packets - 1, 1);
+ mutex_exit(&dp->txlock);
+ }
+#endif
+x:
+ rw_exit(&dp->dev_state_lock);
+
+ return (mp);
+}
+
+static void
+usbgem_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
+{
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called",
+ ((struct usbgem_dev *)arg)->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ usbgem_mac_ioctl((struct usbgem_dev *)arg, wq, mp);
+ rw_exit(&dp->dev_state_lock);
+}
+
+static void
+usbgem_gld3_init(struct usbgem_dev *dp, mac_register_t *macp)
+{
+ macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ macp->m_driver = dp;
+ macp->m_dip = dp->dip;
+ macp->m_src_addr = dp->dev_addr.ether_addr_octet;
+ macp->m_callbacks = &gem_m_callbacks;
+ macp->m_min_sdu = 0;
+ macp->m_max_sdu = dp->mtu;
+
+ if (dp->misc_flag & USBGEM_VLAN) {
+ macp->m_margin = VTAG_SIZE;
+ }
+}
+#else
+/* ============================================================== */
+/*
+ * GLDv2 interface
+ */
+/* ============================================================== */
+static int usbgem_gld_reset(gld_mac_info_t *);
+static int usbgem_gld_start(gld_mac_info_t *);
+static int usbgem_gld_stop(gld_mac_info_t *);
+static int usbgem_gld_set_mac_address(gld_mac_info_t *, uint8_t *);
+static int usbgem_gld_set_multicast(gld_mac_info_t *, uint8_t *, int);
+static int usbgem_gld_set_promiscuous(gld_mac_info_t *, int);
+static int usbgem_gld_get_stats(gld_mac_info_t *, struct gld_stats *);
+static int usbgem_gld_send(gld_mac_info_t *, mblk_t *);
+static int usbgem_gld_send_tagged(gld_mac_info_t *, mblk_t *, uint32_t);
+
+static int
+usbgem_gld_reset(gld_mac_info_t *macinfo)
+{
+ int err;
+ struct usbgem_dev *dp;
+
+ err = GLD_SUCCESS;
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ err = GLD_FAILURE;
+ goto x;
+ }
+
+ dp->nic_state = NIC_STATE_INITIALIZED;
+
+ /* setup media mode if the link have been up */
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_media(dp);
+ }
+ }
+x:
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static int
+usbgem_gld_start(gld_mac_info_t *macinfo)
+{
+ int err;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ dp->nic_state = NIC_STATE_ONLINE;
+
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (usbgem_mac_start(dp) != USB_SUCCESS) {
+ /* sema_v(&dp->mii_lock); */
+ err = GLD_FAILURE;
+ goto x;
+ }
+ }
+
+ /*
+ * XXX - don't call gld_linkstate() here,
+ * otherwise it cause recursive mutex call.
+ */
+ err = GLD_SUCCESS;
+x:
+ rw_exit(&dp->dev_state_lock);
+
+ return (err);
+}
+
+static int
+usbgem_gld_stop(gld_mac_info_t *macinfo)
+{
+ int err = GLD_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* try to stop rx gracefully */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode &= ~RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+ /* make the nic state inactive */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_STOPPED;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL)
+ != USB_SUCCESS) {
+ err = GLD_FAILURE;
+ }
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ return (err);
+}
+
+static int
+usbgem_gld_set_multicast(gld_mac_info_t *macinfo, uint8_t *ep, int flag)
+{
+ int err;
+ int ret;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (flag == GLD_MULTI_ENABLE) {
+ ret = usbgem_add_multicast(dp, ep);
+ } else {
+ ret = usbgem_remove_multicast(dp, ep);
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ err = GLD_SUCCESS;
+ if (ret != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ err = GLD_FAILURE;
+ }
+ return (err);
+}
+
+static int
+usbgem_gld_set_promiscuous(gld_mac_info_t *macinfo, int flag)
+{
+ boolean_t need_to_change = B_TRUE;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ if (flag == GLD_MAC_PROMISC_NONE) {
+ dp->rxmode &= ~(RXMODE_PROMISC | RXMODE_ALLMULTI_REQ);
+ } else if (flag == GLD_MAC_PROMISC_MULTI) {
+ dp->rxmode |= RXMODE_ALLMULTI_REQ;
+ } else if (flag == GLD_MAC_PROMISC_PHYS) {
+ dp->rxmode |= RXMODE_PROMISC;
+ } else {
+ /* mode unchanged */
+ need_to_change = B_FALSE;
+ }
+
+ if (need_to_change) {
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_set_mac_address(gld_mac_info_t *macinfo, uint8_t *mac)
+{
+ struct usbgem_dev *dp;
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ bcopy(mac, dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_get_stats(gld_mac_info_t *macinfo, struct gld_stats *gs)
+{
+ struct usbgem_dev *dp;
+ struct usbgem_stats *vs;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ if ((*dp->ugc.usbgc_get_stats)(dp) != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (USB_FAILURE);
+ }
+
+ vs = &dp->stats;
+
+ gs->glds_errxmt = vs->errxmt;
+ gs->glds_errrcv = vs->errrcv;
+ gs->glds_collisions = vs->collisions;
+
+ gs->glds_excoll = vs->excoll;
+ gs->glds_defer = vs->defer;
+ gs->glds_frame = vs->frame;
+ gs->glds_crc = vs->crc;
+
+ gs->glds_overflow = vs->overflow; /* fifo err,underrun,rbufovf */
+ gs->glds_underflow = vs->underflow;
+ gs->glds_short = vs->runt;
+ gs->glds_missed = vs->missed; /* missed pkts while rbuf ovf */
+ gs->glds_xmtlatecoll = vs->xmtlatecoll;
+ gs->glds_nocarrier = vs->nocarrier;
+ gs->glds_norcvbuf = vs->norcvbuf; /* OS resource exaust */
+ gs->glds_intr = vs->intr;
+
+ /* all before here must be kept in place for v0 compatibility */
+ gs->glds_speed = usbgem_speed_value[dp->speed] * 1000000;
+ gs->glds_media = GLDM_PHYMII;
+ gs->glds_duplex = dp->full_duplex ? GLD_DUPLEX_FULL : GLD_DUPLEX_HALF;
+
+ /* gs->glds_media_specific */
+ gs->glds_dot3_first_coll = vs->first_coll;
+ gs->glds_dot3_multi_coll = vs->multi_coll;
+ gs->glds_dot3_sqe_error = 0;
+ gs->glds_dot3_mac_xmt_error = 0;
+ gs->glds_dot3_mac_rcv_error = 0;
+ gs->glds_dot3_frame_too_long = vs->frame_too_long;
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_ioctl(gld_mac_info_t *macinfo, queue_t *wq, mblk_t *mp)
+{
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+ usbgem_mac_ioctl(dp, wq, mp);
+
+ return (GLD_SUCCESS);
+}
+
+/*
+ * gem_gld_send is used only for sending data packets into ethernet wire.
+ */
+static int
+usbgem_gld_send(gld_mac_info_t *macinfo, mblk_t *mp)
+{
+ int ret;
+ uint32_t flags = 0;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ /* nic state must be online of suspended */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ ASSERT(dp->nic_state == NIC_STATE_ONLINE);
+ ASSERT(mp->b_next == NULL);
+
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ /* Some nics hate to send packets while the link is down. */
+ /* we discard the untransmitted packets silently */
+ rw_exit(&dp->dev_state_lock);
+
+ freemsg(mp);
+#ifdef GEM_CONFIG_FMA
+ /* FIXME - should we ignore the error? */
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (GLD_SUCCESS);
+ }
+
+ ret = (usbgem_send_common(dp, mp, flags) == NULL)
+ ? GLD_SUCCESS : GLD_NORESOURCES;
+ rw_exit(&dp->dev_state_lock);
+
+ return (ret);
+}
+
+/*
+ * usbgem_gld_send is used only for sending data packets into ethernet wire.
+ */
+static int
+usbgem_gld_send_tagged(gld_mac_info_t *macinfo, mblk_t *mp, uint32_t vtag)
+{
+ uint32_t flags;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ /*
+ * Some nics hate to send packets while the link is down.
+ */
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ /* we dicard the untransmitted packets silently */
+ freemsg(mp);
+#ifdef GEM_CONFIG_FMA
+ /* FIXME - should we ignore the error? */
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_UNAFFECTED);
+#endif
+ return (GLD_SUCCESS);
+ }
+#ifdef notyet
+ flags = GLD_VTAG_TCI(vtag) << GEM_SEND_VTAG_SHIFT;
+#endif
+ return ((usbgem_send_common(dp, mp, 0) == NULL) ?
+ GLD_SUCCESS : GLD_NORESOURCES);
+}
+
+static void
+usbgem_gld_init(struct usbgem_dev *dp, gld_mac_info_t *macinfo, char *ident)
+{
+ /*
+ * configure GLD
+ */
+ macinfo->gldm_devinfo = dp->dip;
+ macinfo->gldm_private = (caddr_t)dp;
+
+ macinfo->gldm_reset = usbgem_gld_reset;
+ macinfo->gldm_start = usbgem_gld_start;
+ macinfo->gldm_stop = usbgem_gld_stop;
+ macinfo->gldm_set_mac_addr = usbgem_gld_set_mac_address;
+ macinfo->gldm_send = usbgem_gld_send;
+ macinfo->gldm_set_promiscuous = usbgem_gld_set_promiscuous;
+ macinfo->gldm_get_stats = usbgem_gld_get_stats;
+ macinfo->gldm_ioctl = usbgem_gld_ioctl;
+ macinfo->gldm_set_multicast = usbgem_gld_set_multicast;
+ macinfo->gldm_intr = NULL;
+ macinfo->gldm_mctl = NULL;
+
+ macinfo->gldm_ident = ident;
+ macinfo->gldm_type = DL_ETHER;
+ macinfo->gldm_minpkt = 0;
+ macinfo->gldm_maxpkt = dp->mtu;
+ macinfo->gldm_addrlen = ETHERADDRL;
+ macinfo->gldm_saplen = -2;
+ macinfo->gldm_ppa = ddi_get_instance(dp->dip);
+#ifdef GLD_CAP_LINKSTATE
+ macinfo->gldm_capabilities = GLD_CAP_LINKSTATE;
+#endif
+ macinfo->gldm_vendor_addr = dp->dev_addr.ether_addr_octet;
+ macinfo->gldm_broadcast_addr = usbgem_bcastaddr;
+}
+#endif /* USBGEM_CONFIG_GLDv3 */
+
+
+/* ======================================================================== */
+/*
+ * .conf interface
+ */
+/* ======================================================================== */
+void
+usbgem_generate_macaddr(struct usbgem_dev *dp, uint8_t *mac)
+{
+ extern char hw_serial[];
+ char *hw_serial_p;
+ int i;
+ uint64_t val;
+ uint64_t key;
+
+ cmn_err(CE_NOTE,
+ "!%s: using temp ether address,"
+ " do not use this for long time",
+ dp->name);
+
+ /* prefer a fixed address for DHCP */
+ hw_serial_p = &hw_serial[0];
+ val = stoi(&hw_serial_p);
+
+ key = 0;
+ for (i = 0; i < USBGEM_NAME_LEN; i++) {
+ if (dp->name[i] == 0) {
+ break;
+ }
+ key ^= dp->name[i];
+ }
+ key ^= ddi_get_instance(dp->dip);
+ val ^= key << 32;
+
+ /* generate a local address */
+ mac[0] = 0x02;
+ mac[1] = (uint8_t)(val >> 32);
+ mac[2] = (uint8_t)(val >> 24);
+ mac[3] = (uint8_t)(val >> 16);
+ mac[4] = (uint8_t)(val >> 8);
+ mac[5] = (uint8_t)val;
+}
+
+boolean_t
+usbgem_get_mac_addr_conf(struct usbgem_dev *dp)
+{
+ char propname[32];
+ char *valstr;
+ uint8_t mac[ETHERADDRL];
+ char *cp;
+ int c;
+ int i;
+ int j;
+ uint8_t v;
+ uint8_t d;
+ uint8_t ored;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ /*
+ * Get ethernet address from .conf file
+ */
+ (void) sprintf(propname, "mac-addr");
+ if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, propname, &valstr)) != DDI_PROP_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (strlen(valstr) != ETHERADDRL*3-1) {
+ goto syntax_err;
+ }
+
+ cp = valstr;
+ j = 0;
+ ored = 0;
+ for (;;) {
+ v = 0;
+ for (i = 0; i < 2; i++) {
+ c = *cp++;
+
+ if (c >= 'a' && c <= 'f') {
+ d = c - 'a' + 10;
+ } else if (c >= 'A' && c <= 'F') {
+ d = c - 'A' + 10;
+ } else if (c >= '0' && c <= '9') {
+ d = c - '0';
+ } else {
+ goto syntax_err;
+ }
+ v = (v << 4) | d;
+ }
+
+ mac[j++] = v;
+ ored |= v;
+ if (j == ETHERADDRL) {
+ /* done */
+ break;
+ }
+
+ c = *cp++;
+ if (c != ':') {
+ goto syntax_err;
+ }
+ }
+
+ if (ored == 0) {
+ usbgem_generate_macaddr(dp, mac);
+ }
+ for (i = 0; i < ETHERADDRL; i++) {
+ dp->dev_addr.ether_addr_octet[i] = mac[i];
+ }
+ ddi_prop_free(valstr);
+ return (B_TRUE);
+
+syntax_err:
+ cmn_err(CE_CONT,
+ "!%s: read mac addr: trying .conf: syntax err %s",
+ dp->name, valstr);
+ ddi_prop_free(valstr);
+
+ return (B_FALSE);
+}
+
+static void
+usbgem_read_conf(struct usbgem_dev *dp)
+{
+ int val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Get media mode infomation from .conf file
+ */
+ dp->anadv_autoneg = usbgem_prop_get_int(dp, "adv_autoneg_cap", 1) != 0;
+ dp->anadv_1000fdx = usbgem_prop_get_int(dp, "adv_1000fdx_cap", 1) != 0;
+ dp->anadv_1000hdx = usbgem_prop_get_int(dp, "adv_1000hdx_cap", 1) != 0;
+ dp->anadv_100t4 = usbgem_prop_get_int(dp, "adv_100T4_cap", 1) != 0;
+ dp->anadv_100fdx = usbgem_prop_get_int(dp, "adv_100fdx_cap", 1) != 0;
+ dp->anadv_100hdx = usbgem_prop_get_int(dp, "adv_100hdx_cap", 1) != 0;
+ dp->anadv_10fdx = usbgem_prop_get_int(dp, "adv_10fdx_cap", 1) != 0;
+ dp->anadv_10hdx = usbgem_prop_get_int(dp, "adv_10hdx_cap", 1) != 0;
+ dp->anadv_1000t_ms = usbgem_prop_get_int(dp, "adv_1000t_ms", 0);
+
+ if ((ddi_prop_exists(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, "full-duplex"))) {
+ dp->full_duplex =
+ usbgem_prop_get_int(dp, "full-duplex", 1) != 0;
+ dp->anadv_autoneg = B_FALSE;
+ if (dp->full_duplex) {
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ } else {
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ }
+ }
+
+ if ((val = usbgem_prop_get_int(dp, "speed", 0)) > 0) {
+ dp->anadv_autoneg = B_FALSE;
+ switch (val) {
+ case 1000:
+ dp->speed = USBGEM_SPD_1000;
+ dp->anadv_100t4 = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ break;
+ case 100:
+ dp->speed = USBGEM_SPD_100;
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ break;
+ case 10:
+ dp->speed = USBGEM_SPD_10;
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_100t4 = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ break;
+ default:
+ cmn_err(CE_WARN,
+ "!%s: property %s: illegal value:%d",
+ dp->name, "speed", val);
+ dp->anadv_autoneg = B_TRUE;
+ break;
+ }
+ }
+ val = usbgem_prop_get_int(dp,
+ "adv_pause", dp->ugc.usbgc_flow_control & 1);
+ val |= usbgem_prop_get_int(dp,
+ "adv_asmpause", BOOLEAN(dp->ugc.usbgc_flow_control & 2)) << 1;
+ if (val > FLOW_CONTROL_RX_PAUSE || val < FLOW_CONTROL_NONE) {
+ cmn_err(CE_WARN,
+ "!%s: property %s: illegal value:%d",
+ dp->name, "flow-control", val);
+ } else {
+ val = min(val, dp->ugc.usbgc_flow_control);
+ }
+ dp->anadv_pause = BOOLEAN(val & 1);
+ dp->anadv_asmpause = BOOLEAN(val & 2);
+
+ dp->mtu = usbgem_prop_get_int(dp, "mtu", dp->mtu);
+ dp->txthr = usbgem_prop_get_int(dp, "txthr", dp->txthr);
+ dp->rxthr = usbgem_prop_get_int(dp, "rxthr", dp->rxthr);
+ dp->txmaxdma = usbgem_prop_get_int(dp, "txmaxdma", dp->txmaxdma);
+ dp->rxmaxdma = usbgem_prop_get_int(dp, "rxmaxdma", dp->rxmaxdma);
+#ifdef GEM_CONFIG_POLLING
+ dp->poll_pkt_delay =
+ usbgem_prop_get_int(dp, "pkt_delay", dp->poll_pkt_delay);
+
+ dp->max_poll_interval[GEM_SPD_10] =
+ usbgem_prop_get_int(dp, "max_poll_interval_10",
+ dp->max_poll_interval[GEM_SPD_10]);
+ dp->max_poll_interval[GEM_SPD_100] =
+ usbgem_prop_get_int(dp, "max_poll_interval_100",
+ dp->max_poll_interval[GEM_SPD_100]);
+ dp->max_poll_interval[GEM_SPD_1000] =
+ usbgem_prop_get_int(dp, "max_poll_interval_1000",
+ dp->max_poll_interval[GEM_SPD_1000]);
+
+ dp->min_poll_interval[GEM_SPD_10] =
+ usbgem_prop_get_int(dp, "min_poll_interval_10",
+ dp->min_poll_interval[GEM_SPD_10]);
+ dp->min_poll_interval[GEM_SPD_100] =
+ usbgem_prop_get_int(dp, "min_poll_interval_100",
+ dp->min_poll_interval[GEM_SPD_100]);
+ dp->min_poll_interval[GEM_SPD_1000] =
+ usbgem_prop_get_int(dp, "min_poll_interval_1000",
+ dp->min_poll_interval[GEM_SPD_1000]);
+#endif
+}
+
+/*
+ * usbem kstat support
+ */
+#ifndef GEM_CONFIG_GLDv3
+/* kstat items based from dmfe driver */
+
+struct usbgem_kstat_named {
+ struct kstat_named ks_xcvr_addr;
+ struct kstat_named ks_xcvr_id;
+ struct kstat_named ks_xcvr_inuse;
+ struct kstat_named ks_link_up;
+ struct kstat_named ks_link_duplex; /* 0:unknwon, 1:half, 2:full */
+ struct kstat_named ks_cap_1000fdx;
+ struct kstat_named ks_cap_1000hdx;
+ struct kstat_named ks_cap_100fdx;
+ struct kstat_named ks_cap_100hdx;
+ struct kstat_named ks_cap_10fdx;
+ struct kstat_named ks_cap_10hdx;
+#ifdef NEVER
+ struct kstat_named ks_cap_remfault;
+#endif
+ struct kstat_named ks_cap_autoneg;
+
+ struct kstat_named ks_adv_cap_1000fdx;
+ struct kstat_named ks_adv_cap_1000hdx;
+ struct kstat_named ks_adv_cap_100fdx;
+ struct kstat_named ks_adv_cap_100hdx;
+ struct kstat_named ks_adv_cap_10fdx;
+ struct kstat_named ks_adv_cap_10hdx;
+#ifdef NEVER
+ struct kstat_named ks_adv_cap_remfault;
+#endif
+ struct kstat_named ks_adv_cap_autoneg;
+ struct kstat_named ks_lp_cap_1000fdx;
+ struct kstat_named ks_lp_cap_1000hdx;
+ struct kstat_named ks_lp_cap_100fdx;
+ struct kstat_named ks_lp_cap_100hdx;
+ struct kstat_named ks_lp_cap_10fdx;
+ struct kstat_named ks_lp_cap_10hdx;
+ struct kstat_named ks_lp_cap_remfault;
+ struct kstat_named ks_lp_cap_autoneg;
+};
+
+static int
+usbgem_kstat_update(kstat_t *ksp, int rw)
+{
+ struct usbgem_kstat_named *knp;
+ struct usbgem_dev *dp = (struct usbgem_dev *)ksp->ks_private;
+
+ if (rw != KSTAT_READ) {
+ return (0);
+ }
+
+ knp = (struct usbgem_kstat_named *)ksp->ks_data;
+
+ knp->ks_xcvr_addr.value.ul = dp->mii_phy_addr;
+ knp->ks_xcvr_id.value.ul = dp->mii_phy_id;
+ knp->ks_xcvr_inuse.value.ul = usbgem_mac_xcvr_inuse(dp);
+ knp->ks_link_up.value.ul = dp->mii_state == MII_STATE_LINKUP;
+ knp->ks_link_duplex.value.ul =
+ (dp->mii_state == MII_STATE_LINKUP) ?
+ (dp->full_duplex ? 2 : 1) : 0;
+
+ knp->ks_cap_1000fdx.value.ul =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ knp->ks_cap_1000hdx.value.ul =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ knp->ks_cap_100fdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ knp->ks_cap_100hdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ knp->ks_cap_10fdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ knp->ks_cap_10hdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_10);
+#ifdef NEVER
+ knp->ks_cap_remfault.value.ul = B_TRUE;
+#endif
+ knp->ks_cap_autoneg.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+
+ knp->ks_adv_cap_1000fdx.value.ul = dp->anadv_1000fdx;
+ knp->ks_adv_cap_1000hdx.value.ul = dp->anadv_1000hdx;
+ knp->ks_adv_cap_100fdx.value.ul = dp->anadv_100fdx;
+ knp->ks_adv_cap_100hdx.value.ul = dp->anadv_100hdx;
+ knp->ks_adv_cap_10fdx.value.ul = dp->anadv_10fdx;
+ knp->ks_adv_cap_10hdx.value.ul = dp->anadv_10hdx;
+#ifdef NEVER
+ knp->ks_adv_cap_remfault.value.ul = 0;
+#endif
+ knp->ks_adv_cap_autoneg.value.ul = dp->anadv_autoneg;
+
+ knp->ks_lp_cap_1000fdx.value.ul =
+ BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ knp->ks_lp_cap_1000hdx.value.ul =
+ BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ knp->ks_lp_cap_100fdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ knp->ks_lp_cap_100hdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ knp->ks_lp_cap_10fdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ knp->ks_lp_cap_10hdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ knp->ks_lp_cap_remfault.value.ul =
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_PARFAULT);
+ knp->ks_lp_cap_autoneg.value.ul =
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+
+ return (0);
+}
+
+
+static int
+usbgem_kstat_init(struct usbgem_dev *dp)
+{
+ int i;
+ kstat_t *ksp;
+ struct usbgem_kstat_named *knp;
+
+ ksp = kstat_create(
+ (char *)ddi_driver_name(dp->dip), ddi_get_instance(dp->dip),
+ "mii", "net", KSTAT_TYPE_NAMED,
+ sizeof (*knp) / sizeof (knp->ks_xcvr_addr), 0);
+
+ if (ksp == NULL) {
+ cmn_err(CE_WARN, "%s: %s() for mii failed",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+
+ knp = (struct usbgem_kstat_named *)ksp->ks_data;
+
+ kstat_named_init(&knp->ks_xcvr_addr, "xcvr_addr",
+ KSTAT_DATA_INT32);
+ kstat_named_init(&knp->ks_xcvr_id, "xcvr_id",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_xcvr_inuse, "xcvr_inuse",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_link_up, "link_up",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_link_duplex, "link_duplex",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_1000fdx, "cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_1000hdx, "cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_100fdx, "cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_100hdx, "cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_10fdx, "cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_10hdx, "cap_10hdx",
+ KSTAT_DATA_UINT32);
+#ifdef NEVER
+ kstat_named_init(&knp->ks_cap_remfault, "cap_rem_fault",
+ KSTAT_DATA_UINT32);
+#endif
+ kstat_named_init(&knp->ks_cap_autoneg, "cap_autoneg",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_1000fdx, "adv_cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_1000hdx, "adv_cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_100fdx, "adv_cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_100hdx, "adv_cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_10fdx, "adv_cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_10hdx, "adv_cap_10hdx",
+ KSTAT_DATA_UINT32);
+#ifdef NEVER
+ kstat_named_init(&knp->ks_adv_cap_remfault, "adv_rem_fault",
+ KSTAT_DATA_UINT32);
+#endif
+ kstat_named_init(&knp->ks_adv_cap_autoneg, "adv_cap_autoneg",
+ KSTAT_DATA_UINT32);
+
+ kstat_named_init(&knp->ks_lp_cap_1000fdx, "lp_cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_1000hdx, "lp_cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_100fdx, "lp_cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_100hdx, "lp_cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_10fdx, "lp_cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_10hdx, "lp_cap_10hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_remfault, "lp_cap_rem_fault",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_autoneg, "lp_cap_autoneg",
+ KSTAT_DATA_UINT32);
+
+ ksp->ks_private = (void *) dp;
+ ksp->ks_update = usbgem_kstat_update;
+ dp->ksp = ksp;
+
+ kstat_install(ksp);
+
+ return (USB_SUCCESS);
+}
+#endif /* GEM_CONFIG_GLDv3 */
+/* ======================================================================== */
+/*
+ * attach/detatch/usb support
+ */
+/* ======================================================================== */
+int
+usbgem_ctrl_out(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size)
+{
+ mblk_t *data;
+ usb_ctrl_setup_t setup;
+ usb_cr_t completion_reason;
+ usb_cb_flags_t cb_flags;
+ usb_flags_t flags;
+ int i;
+ int ret;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s "
+ "reqt:0x%02x req:0x%02x val:0x%04x ix:0x%04x len:0x%02x "
+ "bp:0x%p nic_state:%d",
+ dp->name, __func__, reqt, req, val, ix, len, bp, dp->nic_state));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ return (USB_PIPE_ERROR);
+ }
+
+ data = NULL;
+ if (size > 0) {
+ if ((data = allocb(size, 0)) == NULL) {
+ return (USB_FAILURE);
+ }
+
+ bcopy(bp, data->b_rptr, size);
+ data->b_wptr = data->b_rptr + size;
+ }
+
+ setup.bmRequestType = reqt;
+ setup.bRequest = req;
+ setup.wValue = val;
+ setup.wIndex = ix;
+ setup.wLength = len;
+ setup.attrs = 0; /* attributes */
+
+ for (i = usbgem_ctrl_retry; i > 0; i--) {
+ completion_reason = 0;
+ cb_flags = 0;
+
+ ret = usb_pipe_ctrl_xfer_wait(DEFAULT_PIPE(dp),
+ &setup, &data, &completion_reason, &cb_flags, 0);
+
+ if (ret == USB_SUCCESS) {
+ break;
+ }
+ if (i == 1) {
+ cmn_err(CE_WARN,
+ "!%s: %s failed: "
+ "reqt:0x%x req:0x%x val:0x%x ix:0x%x len:0x%x "
+ "ret:%d cr:%s(%d), cb_flags:0x%x %s",
+ dp->name, __func__, reqt, req, val, ix, len,
+ ret, usb_str_cr(completion_reason),
+ completion_reason,
+ cb_flags,
+ (i > 1) ? "retrying..." : "fatal");
+ }
+ }
+
+ if (data != NULL) {
+ freemsg(data);
+ }
+
+ return (ret);
+}
+
+int
+usbgem_ctrl_in(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size)
+{
+ mblk_t *data;
+ usb_ctrl_setup_t setup;
+ usb_cr_t completion_reason;
+ usb_cb_flags_t cb_flags;
+ int i;
+ int ret;
+ int reclen;
+
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s:"
+ " reqt:0x%02x req:0x%02x val:0x%04x ix:0x%04x len:0x%02x"
+ " bp:x%p mac_state:%d",
+ dp->name, __func__, reqt, req, val, ix, len, bp, dp->mac_state));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ return (USB_PIPE_ERROR);
+ }
+
+ data = NULL;
+
+ setup.bmRequestType = reqt;
+ setup.bRequest = req;
+ setup.wValue = val;
+ setup.wIndex = ix;
+ setup.wLength = len;
+ setup.attrs = USB_ATTRS_AUTOCLEARING; /* XXX */
+
+ for (i = usbgem_ctrl_retry; i > 0; i--) {
+ completion_reason = 0;
+ cb_flags = 0;
+ ret = usb_pipe_ctrl_xfer_wait(DEFAULT_PIPE(dp), &setup, &data,
+ &completion_reason, &cb_flags, 0);
+
+ if (ret == USB_SUCCESS) {
+ reclen = msgdsize(data);
+ bcopy(data->b_rptr, bp, min(reclen, size));
+ break;
+ }
+ if (i == 1) {
+ cmn_err(CE_WARN,
+ "!%s: %s failed: "
+ "reqt:0x%x req:0x%x val:0x%x ix:0x%x len:0x%x "
+ "ret:%d cr:%s(%d) cb_flags:0x%x %s",
+ dp->name, __func__,
+ reqt, req, val, ix, len,
+ ret, usb_str_cr(completion_reason),
+ completion_reason,
+ cb_flags,
+ (i > 1) ? "retrying..." : "fatal");
+ }
+ }
+
+ if (data) {
+ freemsg(data);
+ }
+
+ return (ret);
+}
+
+int
+usbgem_ctrl_out_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ uint32_t v)
+{
+ uint8_t buf[4];
+
+ /* convert to little endian from native byte order */
+ switch (len) {
+ case 4:
+ buf[3] = v >> 24;
+ buf[2] = v >> 16;
+ /* fall thru */
+ case 2:
+ buf[1] = v >> 8;
+ /* fall thru */
+ case 1:
+ buf[0] = v;
+ }
+
+ return (usbgem_ctrl_out(dp, reqt, req, val, ix, len, buf, len));
+}
+
+int
+usbgem_ctrl_in_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *valp)
+{
+ uint8_t buf[4];
+ uint_t v;
+ int err;
+
+#ifdef SANITY
+ bzero(buf, sizeof (buf));
+#endif
+ err = usbgem_ctrl_in(dp, reqt, req, val, ix, len, buf, len);
+ if (err == USB_SUCCESS) {
+ v = 0;
+ switch (len) {
+ case 4:
+ v |= buf[3] << 24;
+ v |= buf[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ v |= buf[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ v |= buf[0];
+ }
+
+ switch (len) {
+ case 4:
+ *(uint32_t *)valp = v;
+ break;
+ case 2:
+ *(uint16_t *)valp = v;
+ break;
+ case 1:
+ *(uint8_t *)valp = v;
+ break;
+ }
+ }
+ return (err);
+}
+
+/*
+ * Attach / detach / disconnect / reconnect management
+ */
+static int
+usbgem_open_pipes(struct usbgem_dev *dp)
+{
+ int i;
+ int ret;
+ int ifnum;
+ int alt;
+ usb_client_dev_data_t *reg_data;
+ usb_ep_data_t *ep_tree_node;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ ifnum = dp->ugc.usbgc_ifnum;
+ alt = dp->ugc.usbgc_alt;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_BULK, USB_EP_DIR_IN);
+ if (ep_tree_node == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: ep_bulkin is NULL",
+ dp->name, __func__);
+ goto err;
+ }
+ dp->ep_bulkin = &ep_tree_node->ep_descr;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_BULK, USB_EP_DIR_OUT);
+ if (ep_tree_node == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: ep_bulkout is NULL",
+ dp->name, __func__);
+ goto err;
+ }
+ dp->ep_bulkout = &ep_tree_node->ep_descr;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_INTR, USB_EP_DIR_IN);
+ if (ep_tree_node) {
+ dp->ep_intr = &ep_tree_node->ep_descr;
+ } else {
+ /* don't care */
+ DPRINTF(1, (CE_CONT, "!%s: %s: ep_intr is NULL",
+ dp->name, __func__));
+ dp->ep_intr = NULL;
+ }
+
+ /* XXX -- no need to open default pipe */
+
+ /* open bulk out pipe */
+ bzero(&dp->policy_bulkout, sizeof (usb_pipe_policy_t));
+ dp->policy_bulkout.pp_max_async_reqs = 1;
+
+ if ((ret = usb_pipe_open(dp->dip,
+ dp->ep_bulkout, &dp->policy_bulkout, USB_FLAGS_SLEEP,
+ &dp->bulkout_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: err:%x: failed to open bulk-out pipe",
+ dp->name, __func__, ret);
+ dp->bulkout_pipe = NULL;
+ goto err;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: bulkout_pipe opened successfully",
+ dp->name, __func__));
+
+ /* open bulk in pipe */
+ bzero(&dp->policy_bulkin, sizeof (usb_pipe_policy_t));
+ dp->policy_bulkin.pp_max_async_reqs = 1;
+ if ((ret = usb_pipe_open(dp->dip,
+ dp->ep_bulkin, &dp->policy_bulkin, USB_FLAGS_SLEEP,
+ &dp->bulkin_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: ret:%x failed to open bulk-in pipe",
+ dp->name, __func__, ret);
+ dp->bulkin_pipe = NULL;
+ goto err;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: bulkin_pipe opened successfully",
+ dp->name, __func__));
+
+ if (dp->ep_intr) {
+ /* open interrupt pipe */
+ bzero(&dp->policy_interrupt, sizeof (usb_pipe_policy_t));
+ dp->policy_interrupt.pp_max_async_reqs = 1;
+ if ((ret = usb_pipe_open(dp->dip, dp->ep_intr,
+ &dp->policy_interrupt, USB_FLAGS_SLEEP,
+ &dp->intr_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: ret:%x failed to open interrupt pipe",
+ dp->name, __func__, ret);
+ dp->intr_pipe = NULL;
+ goto err;
+ }
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: intr_pipe opened successfully",
+ dp->name, __func__));
+
+ return (USB_SUCCESS);
+
+err:
+ if (dp->bulkin_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkin_pipe = NULL;
+ }
+ if (dp->bulkout_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkout_pipe = NULL;
+ }
+ if (dp->intr_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->intr_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->intr_pipe = NULL;
+ }
+
+ return (USB_FAILURE);
+}
+
+static int
+usbgem_close_pipes(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->intr_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->intr_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->intr_pipe = NULL;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: 1", dp->name, __func__));
+
+ ASSERT(dp->bulkin_pipe);
+ usb_pipe_close(dp->dip, dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkin_pipe = NULL;
+ DPRINTF(1, (CE_CONT, "!%s: %s: 2", dp->name, __func__));
+
+ ASSERT(dp->bulkout_pipe);
+ usb_pipe_close(dp->dip, dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkout_pipe = NULL;
+ DPRINTF(1, (CE_CONT, "!%s: %s: 3", dp->name, __func__));
+
+ return (USB_SUCCESS);
+}
+
+#define FREEZE_GRACEFUL (B_TRUE)
+#define FREEZE_NO_GRACEFUL (B_FALSE)
+static int
+usbgem_freeze_device(struct usbgem_dev *dp, boolean_t graceful)
+{
+ DPRINTF(0, (CE_NOTE, "!%s: %s: called", dp->name, __func__));
+
+ /* stop nic activity */
+ (void) usbgem_mac_stop(dp, MAC_STATE_DISCONNECTED, graceful);
+
+ /*
+ * Here we free all memory resource allocated, because it will
+ * cause to panic the system that we free usb_bulk_req objects
+ * during the usb device is disconnected.
+ */
+ (void) usbgem_free_memory(dp);
+
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_disconnect_cb(dev_info_t *dip)
+{
+ int ret;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ cmn_err(CE_NOTE, "!%s: the usb device was disconnected (dp=%p)",
+ dp->name, dp);
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ ret = usbgem_freeze_device(dp, 0);
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (ret);
+}
+
+static int
+usbgem_recover_device(struct usbgem_dev *dp)
+{
+ int err;
+
+ DPRINTF(0, (CE_NOTE, "!%s: %s: called", dp->name, __func__));
+
+ err = USB_SUCCESS;
+
+ /* reinitialize the usb connection */
+ usbgem_close_pipes(dp);
+ if ((err = usbgem_open_pipes(dp)) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* initialize nic state */
+ dp->mac_state = MAC_STATE_STOPPED;
+ dp->mii_state = MII_STATE_UNKNOWN;
+
+ /* allocate memory resources again */
+ if ((err = usbgem_alloc_memory(dp)) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* restart nic and recover state */
+ (void) usbgem_restart_nic(dp);
+
+ usbgem_mii_init(dp);
+
+ /* kick potentially stopped house keeping thread */
+ cv_signal(&dp->link_watcher_wait_cv);
+x:
+ return (err);
+}
+
+static int
+usbgem_reconnect_cb(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+ DPRINTF(0, (CE_CONT, "!%s: dp=%p", ddi_get_name(dip), dp));
+#ifdef notdef
+ /* check device changes after disconnect */
+ if (usb_check_same_device(dp->dip, NULL, USB_LOG_L2, -1,
+ USB_CHK_BASIC | USB_CHK_CFG, NULL) != USB_SUCCESS) {
+ cmn_err(CE_CONT,
+ "!%s: no or different device installed", dp->name);
+ return (DDI_SUCCESS);
+ }
+#endif
+ cmn_err(CE_NOTE, "%s: the usb device was reconnected", dp->name);
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_recover_device(dp);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+int
+usbgem_suspend(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: callded", dp->name, __func__));
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_freeze_device(dp, STOP_GRACEFUL);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+int
+usbgem_resume(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: callded", dp->name, __func__));
+#ifdef notdef
+ /* check device changes after disconnect */
+ if (usb_check_same_device(dp->dip, NULL, USB_LOG_L2, -1,
+ USB_CHK_BASIC | USB_CHK_CFG, NULL) != USB_SUCCESS) {
+ cmn_err(CE_CONT,
+ "!%s: no or different device installed", dp->name);
+ return (DDI_SUCCESS);
+ }
+#endif
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_recover_device(dp);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+#define USBGEM_LOCAL_DATA_SIZE(gc) \
+ (sizeof (struct usbgem_dev) + USBGEM_MCALLOC)
+
+struct usbgem_dev *
+usbgem_do_attach(dev_info_t *dip,
+ struct usbgem_conf *gc, void *lp, int lmsize)
+{
+ struct usbgem_dev *dp;
+ int i;
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_register_t *macp = NULL;
+#else
+ gld_mac_info_t *macinfo;
+ void *tmp;
+#endif
+ int ret;
+ int unit;
+ int err;
+
+ unit = ddi_get_instance(dip);
+
+ DPRINTF(2, (CE_CONT, "!usbgem%d: %s: called", unit, __func__));
+
+ /*
+ * Allocate soft data structure
+ */
+ dp = kmem_zalloc(USBGEM_LOCAL_DATA_SIZE(gc), KM_SLEEP);
+ if (dp == NULL) {
+#ifndef USBGEM_CONFIG_GLDv3
+ gld_mac_free(macinfo);
+#endif
+ return (NULL);
+ }
+#ifdef USBGEM_CONFIG_GLDv3
+ if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+ cmn_err(CE_WARN, "!gem%d: %s: mac_alloc failed",
+ unit, __func__);
+ return (NULL);
+ }
+#else
+ macinfo = gld_mac_alloc(dip);
+ dp->macinfo = macinfo;
+#endif
+
+ /* link to private area */
+ dp->private = lp;
+ dp->priv_size = lmsize;
+ dp->mc_list = (struct mcast_addr *)&dp[1];
+
+ dp->dip = dip;
+ bcopy(gc->usbgc_name, dp->name, USBGEM_NAME_LEN);
+
+ /*
+ * register with usb service
+ */
+ if (usb_client_attach(dip, USBDRV_VERSION, 0) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "%s: %s: usb_client_attach failed",
+ dp->name, __func__);
+ goto err_free_private;
+ }
+
+ if (usb_get_dev_data(dip, &dp->reg_data,
+ USB_PARSE_LVL_ALL, 0) != USB_SUCCESS) {
+ dp->reg_data = NULL;
+ goto err_unregister_client;
+ }
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_print_descr_tree(dp->dip, dp->reg_data);
+#endif
+
+ if (usbgem_open_pipes(dp) != USB_SUCCESS) {
+ /* failed to open pipes */
+ cmn_err(CE_WARN, "!%s: %s: failed to open pipes",
+ dp->name, __func__);
+ goto err_unregister_client;
+ }
+
+ /*
+ * Initialize mutexs and condition variables
+ */
+ mutex_init(&dp->rxlock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&dp->txlock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&dp->rx_drain_cv, NULL, CV_DRIVER, NULL);
+ cv_init(&dp->tx_drain_cv, NULL, CV_DRIVER, NULL);
+ rw_init(&dp->dev_state_lock, NULL, RW_DRIVER, NULL);
+ mutex_init(&dp->link_watcher_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&dp->link_watcher_wait_cv, NULL, CV_DRIVER, NULL);
+ sema_init(&dp->hal_op_lock, 1, NULL, SEMA_DRIVER, NULL);
+ sema_init(&dp->rxfilter_lock, 1, NULL, SEMA_DRIVER, NULL);
+
+ /*
+ * Initialize configuration
+ */
+ dp->ugc = *gc;
+
+ dp->mtu = ETHERMTU;
+ dp->rxmode = 0;
+ dp->speed = USBGEM_SPD_10; /* default is 10Mbps */
+ dp->full_duplex = B_FALSE; /* default is half */
+ dp->flow_control = FLOW_CONTROL_NONE;
+
+ dp->nic_state = NIC_STATE_STOPPED;
+ dp->mac_state = MAC_STATE_STOPPED;
+ dp->mii_state = MII_STATE_UNKNOWN;
+
+ /* performance tuning parameters */
+ dp->txthr = ETHERMAX; /* tx fifo threshoold */
+ dp->txmaxdma = 16*4; /* tx max dma burst size */
+ dp->rxthr = 128; /* rx fifo threshoold */
+ dp->rxmaxdma = 16*4; /* rx max dma burst size */
+
+ /*
+ * Get media mode infomation from .conf file
+ */
+ usbgem_read_conf(dp);
+
+ /* rx_buf_len depend on MTU */
+ dp->rx_buf_len = MAXPKTBUF(dp) + dp->ugc.usbgc_rx_header_len;
+
+ /*
+ * Reset the chip
+ */
+ if (usbgem_hal_reset_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to reset the usb device",
+ dp->name, __func__);
+ goto err_destroy_locks;
+ }
+
+ /*
+ * HW dependant paremeter initialization
+ */
+ if (usbgem_hal_attach_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to attach the usb device",
+ dp->name, __func__);
+ goto err_destroy_locks;
+ }
+
+ /* allocate resources */
+ if (usbgem_alloc_memory(dp) != USB_SUCCESS) {
+ goto err_destroy_locks;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %02x:%02x:%02x:%02x:%02x:%02x",
+ dp->name,
+ dp->dev_addr.ether_addr_octet[0],
+ dp->dev_addr.ether_addr_octet[1],
+ dp->dev_addr.ether_addr_octet[2],
+ dp->dev_addr.ether_addr_octet[3],
+ dp->dev_addr.ether_addr_octet[4],
+ dp->dev_addr.ether_addr_octet[5]));
+
+ /* copy mac address */
+ dp->cur_addr = dp->dev_addr;
+
+ /* pre-calculated tx timeout in second for performance */
+ dp->bulkout_timeout =
+ dp->ugc.usbgc_tx_timeout / drv_usectohz(1000*1000);
+
+#ifdef USBGEM_CONFIG_GLDv3
+ usbgem_gld3_init(dp, macp);
+#else
+ usbgem_gld_init(dp, macinfo, ident);
+#endif
+
+ /* Probe MII phy (scan phy) */
+ dp->mii_lpable = 0;
+ dp->mii_advert = 0;
+ dp->mii_exp = 0;
+ dp->mii_ctl1000 = 0;
+ dp->mii_stat1000 = 0;
+
+ dp->mii_status_ro = 0;
+ dp->mii_xstatus_ro = 0;
+
+ if (usbgem_mii_probe(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: mii_probe failed",
+ dp->name, __func__);
+ goto err_free_memory;
+ }
+
+ /* mask unsupported abilities */
+ dp->anadv_autoneg &= BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ dp->anadv_1000fdx &=
+ BOOLEAN(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASET_FD));
+ dp->anadv_1000hdx &=
+ BOOLEAN(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX | MII_XSTATUS_1000BASET));
+ dp->anadv_100t4 &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ dp->anadv_100fdx &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ dp->anadv_100hdx &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ dp->anadv_10fdx &= BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ dp->anadv_10hdx &= BOOLEAN(dp->mii_status & MII_STATUS_10);
+
+ if (usbgem_mii_init(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: mii_init failed",
+ dp->name, __func__);
+ goto err_free_memory;
+ }
+
+ /*
+ * initialize kstats including mii statistics
+ */
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ usbgem_nd_setup(dp);
+#endif
+#else
+ if (usbgem_kstat_init(dp) != USB_SUCCESS) {
+ goto err_free_memory;
+ }
+#endif
+
+ /*
+ * Add interrupt to system.
+ */
+#ifdef USBGEM_CONFIG_GLDv3
+ if (ret = mac_register(macp, &dp->mh)) {
+ cmn_err(CE_WARN, "!%s: mac_register failed, error:%d",
+ dp->name, ret);
+ goto err_release_stats;
+ }
+ mac_free(macp);
+ macp = NULL;
+#else
+ /* gld_register will corrupts driver_private */
+ tmp = ddi_get_driver_private(dip);
+ if (gld_register(dip,
+ (char *)ddi_driver_name(dip), macinfo) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: gld_register failed",
+ dp->name, __func__);
+ ddi_set_driver_private(dip, tmp);
+ goto err_release_stats;
+ }
+ /* restore driver private */
+ ddi_set_driver_private(dip, tmp);
+#endif /* USBGEM_CONFIG_GLDv3 */
+ if (usb_register_hotplug_cbs(dip,
+ usbgem_suspend, usbgem_resume) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to register hotplug cbs",
+ dp->name, __func__);
+ goto err_unregister_gld;
+ }
+
+ /* reset mii and start mii link watcher */
+ if (usbgem_mii_start(dp) != USB_SUCCESS) {
+ goto err_unregister_hotplug;
+ }
+
+ /* start tx watchdow watcher */
+ if (usbgem_tx_watcher_start(dp)) {
+ goto err_usbgem_mii_stop;
+ }
+
+ ddi_set_driver_private(dip, (caddr_t)dp);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: return: success", dp->name, __func__));
+
+ return (dp);
+
+err_usbgem_mii_stop:
+ usbgem_mii_stop(dp);
+
+err_unregister_hotplug:
+ usb_unregister_hotplug_cbs(dip);
+
+err_unregister_gld:
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_unregister(dp->mh);
+#else
+ gld_unregister(macinfo);
+#endif
+
+err_release_stats:
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ /* release NDD resources */
+ usbgem_nd_cleanup(dp);
+#endif
+#else
+ kstat_delete(dp->ksp);
+#endif
+
+err_free_memory:
+ usbgem_free_memory(dp);
+
+err_destroy_locks:
+ cv_destroy(&dp->tx_drain_cv);
+ cv_destroy(&dp->rx_drain_cv);
+ mutex_destroy(&dp->txlock);
+ mutex_destroy(&dp->rxlock);
+ rw_destroy(&dp->dev_state_lock);
+ mutex_destroy(&dp->link_watcher_lock);
+ cv_destroy(&dp->link_watcher_wait_cv);
+ sema_destroy(&dp->hal_op_lock);
+ sema_destroy(&dp->rxfilter_lock);
+
+err_close_pipes:
+ (void) usbgem_close_pipes(dp);
+
+err_unregister_client:
+ usb_client_detach(dp->dip, dp->reg_data);
+
+err_free_private:
+#ifdef USBGEM_CONFIG_GLDv3
+ if (macp) {
+ mac_free(macp);
+ }
+#else
+ gld_mac_free(macinfo);
+#endif
+ kmem_free((caddr_t)dp, USBGEM_LOCAL_DATA_SIZE(gc));
+
+ return (NULL);
+}
+
+int
+usbgem_do_detach(dev_info_t *dip)
+{
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+#ifdef USBGEM_CONFIG_GLDv3
+ /* unregister with gld v3 */
+ if (mac_unregister(dp->mh) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+#else
+ /* unregister with gld v2 */
+ if (gld_unregister(dp->macinfo) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+#endif
+ /* unregister with hotplug service */
+ usb_unregister_hotplug_cbs(dip);
+
+ /* stop tx watchdog watcher*/
+ usbgem_tx_watcher_stop(dp);
+
+ /* stop the link manager */
+ usbgem_mii_stop(dp);
+
+ /* unregister with usb service */
+ (void) usbgem_free_memory(dp);
+ (void) usbgem_close_pipes(dp);
+ usb_client_detach(dp->dip, dp->reg_data);
+ dp->reg_data = NULL;
+
+ /* unregister with kernel statistics */
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ /* release ndd resources */
+ usbgem_nd_cleanup(dp);
+#endif
+#else
+ /* destroy kstat objects */
+ kstat_delete(dp->ksp);
+#endif
+
+ /* release locks and condition variables */
+ mutex_destroy(&dp->txlock);
+ mutex_destroy(&dp->rxlock);
+ cv_destroy(&dp->tx_drain_cv);
+ cv_destroy(&dp->rx_drain_cv);
+ rw_destroy(&dp->dev_state_lock);
+ mutex_destroy(&dp->link_watcher_lock);
+ cv_destroy(&dp->link_watcher_wait_cv);
+ sema_destroy(&dp->hal_op_lock);
+ sema_destroy(&dp->rxfilter_lock);
+
+ /* release basic memory resources */
+#ifndef USBGEM_CONFIG_GLDv3
+ gld_mac_free(dp->macinfo);
+#endif
+ kmem_free((caddr_t)(dp->private), dp->priv_size);
+ kmem_free((caddr_t)dp, USBGEM_LOCAL_DATA_SIZE(&dp->ugc));
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: return: success",
+ ddi_driver_name(dip), __func__));
+
+ return (DDI_SUCCESS);
+}
+
+int
+usbgem_mod_init(struct dev_ops *dop, char *name)
+{
+#ifdef USBGEM_CONFIG_GLDv3
+ major_t major;
+ major = ddi_name_to_major(name);
+ if (major == DDI_MAJOR_T_NONE) {
+ return (DDI_FAILURE);
+ }
+ mac_init_ops(dop, name);
+#endif
+ return (DDI_SUCCESS);
+}
+
+void
+usbgem_mod_fini(struct dev_ops *dop)
+{
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_fini_ops(dop);
+#endif
+}
+
+int
+usbgem_quiesce(dev_info_t *dip)
+{
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ ASSERT(dp != NULL);
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED &&
+ dp->mac_state != MAC_STATE_STOPPED) {
+ if (usbgem_hal_stop_chip(dp) != USB_SUCCESS) {
+ (void) usbgem_hal_reset_chip(dp);
+ }
+ }
+
+ /* devo_quiesce() must return DDI_SUCCESS always */
+ return (DDI_SUCCESS);
+}
diff --git a/usr/src/uts/common/io/usbgem/usbgem.h b/usr/src/uts/common/io/usbgem/usbgem.h
new file mode 100644
index 0000000000..80b89a260e
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem.h
@@ -0,0 +1,428 @@
+/*
+ * usbgem.h: General USB to Ethernet MAC driver framework
+ * @(#)usbgem.h 1.4 12/02/09
+ * (C) Copyright 2003-2009 Masayuki Murayama KHF04453@nifty.ne.jp
+ */
+
+#ifndef __USBGEM_H__
+#define __USBGEM_H__
+
+#pragma ident "@(#)usbgem.h 1.4 12/02/09"
+
+#ifdef USBGEM_CONFIG_GLDv3
+#include <sys/mac.h>
+#ifndef MAC_VERSION
+#include <sys/mac_provider.h>
+#endif
+#include <sys/mac_ether.h>
+#else
+#include <sys/gld.h>
+#endif /* GLDv3 */
+
+/*
+ * Useful macros and typedefs
+ */
+#define USBGEM_NAME_LEN 32
+
+#define USBGEM_TX_TIMEOUT (drv_usectohz(3*1000000))
+#define USBGEM_TX_TIMEOUT_INTERVAL (drv_usectohz(1*1000000))
+#define USBGEM_LINK_WATCH_INTERVAL (drv_usectohz(1*1000000))
+
+/* general return code */
+#define USBGEM_SUCCESS 0
+#define USBGEM_FAILURE 1
+
+/* return code of usbgem_tx_done */
+#define INTR_RESTART_TX 0x80000000U
+
+struct usbgem_stats {
+ uint32_t intr;
+
+ uint32_t crc;
+ uint32_t errrcv;
+ uint32_t overflow;
+ uint32_t frame;
+ uint32_t missed;
+ uint32_t runt;
+ uint32_t frame_too_long;
+ uint32_t norcvbuf;
+ uint32_t sqe;
+
+ uint32_t collisions;
+ uint32_t first_coll;
+ uint32_t multi_coll;
+ uint32_t excoll;
+ uint32_t xmit_internal_err;
+ uint32_t nocarrier;
+ uint32_t defer;
+ uint32_t errxmt;
+ uint32_t underflow;
+ uint32_t xmtlatecoll;
+ uint32_t noxmtbuf;
+ uint32_t jabber;
+
+
+ uint64_t rbytes;
+ uint64_t obytes;
+ uint64_t rpackets;
+ uint64_t opackets;
+ uint32_t rbcast;
+ uint32_t obcast;
+ uint32_t rmcast;
+ uint32_t omcast;
+ uint32_t rcv_internal_err;
+};
+
+struct mcast_addr {
+ struct ether_addr addr;
+ uint32_t hash;
+};
+
+#define USBGEM_MAXMC 64
+#define USBGEM_MCALLOC (sizeof(struct mcast_addr) * USBGEM_MAXMC)
+
+#define SLOT(dp, n) ((n) % (dp)->ugc.usbgc_tx_list_max)
+
+/*
+ * mac soft state
+ */
+struct usbgem_dev {
+ dev_info_t *dip;
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_handle_t mh;
+#else
+ void *macinfo; /* opaque handle for upper layer */
+#endif
+ char name[USBGEM_NAME_LEN];
+
+ /* pointer to usb private data */
+ usb_client_dev_data_t *reg_data;
+
+ /* usb handles */
+ usb_pipe_handle_t default_pipe;
+ usb_pipe_handle_t bulkin_pipe;
+ usb_pipe_handle_t bulkout_pipe;
+ usb_pipe_handle_t intr_pipe;
+
+ /* usb endpoints */
+ usb_ep_descr_t *ep_default;
+ usb_ep_descr_t *ep_bulkin;
+ usb_ep_descr_t *ep_bulkout;
+ usb_ep_descr_t *ep_intr;
+
+ /* usb policies */
+ usb_pipe_policy_t policy_default;
+ usb_pipe_policy_t policy_bulkin;
+ usb_pipe_policy_t policy_bulkout;
+ usb_pipe_policy_t policy_interrupt;
+
+ /* MAC address information */
+ struct ether_addr cur_addr;
+ struct ether_addr dev_addr;
+
+ /* RX state and resource management */
+ kmutex_t rxlock;
+ int rx_busy_cnt;
+ boolean_t rx_active;
+ kcondvar_t rx_drain_cv;
+
+ /* RX buffer management */
+ int rx_buf_len;
+
+ /* TX state and resource management */
+ kmutex_t txlock;
+ int tx_busy_cnt;
+ usb_bulk_req_t *tx_free_list;
+ kcondvar_t tx_drain_cv;
+ clock_t tx_start_time;
+ int bulkout_timeout; /* in second */
+ int tx_max_packets;
+ int tx_seq_num;
+ int tx_intr_pended;
+
+ /* NIC state from OS view */
+ int nic_state;
+#define NIC_STATE_UNKNOWN 0
+#define NIC_STATE_STOPPED 1
+#define NIC_STATE_INITIALIZED 2
+#define NIC_STATE_ONLINE 3
+
+ /* MAC state from hardware view */
+ int mac_state;
+#define MAC_STATE_DISCONNECTED 0 /* it includes suspended state too */
+#define MAC_STATE_STOPPED 1 /* powered up / buf not initialized */
+#define MAC_STATE_INITIALIZED 2 /* initialized */
+#define MAC_STATE_ONLINE 3 /* working correctly */
+#define MAC_STATE_ERROR 4 /* need to restart nic */
+
+ clock_t fatal_error;
+
+ /* robustness: timer and watchdog */
+ uint_t tx_watcher_stop;
+ kt_did_t tx_watcher_did;
+ kcondvar_t tx_watcher_cv;
+ kmutex_t tx_watcher_lock;
+ clock_t tx_watcher_timeout;
+ clock_t tx_watcher_interval;
+
+ /* MII mamagement */
+ boolean_t anadv_autoneg:1;
+ boolean_t anadv_1000fdx:1;
+ boolean_t anadv_1000hdx:1;
+ boolean_t anadv_100t4:1;
+ boolean_t anadv_100fdx:1;
+ boolean_t anadv_100hdx:1;
+ boolean_t anadv_10fdx:1;
+ boolean_t anadv_10hdx:1;
+ boolean_t anadv_1000t_ms:2;
+ boolean_t anadv_pause:1;
+ boolean_t anadv_asmpause:1;
+ boolean_t mii_advert_ro:1;
+
+ boolean_t full_duplex:1;
+ int speed:3;
+#define USBGEM_SPD_10 0
+#define USBGEM_SPD_100 1
+#define USBGEM_SPD_1000 2
+#define USBGEM_SPD_NUM 3
+ unsigned int flow_control:2;
+#define FLOW_CONTROL_NONE 0
+#define FLOW_CONTROL_SYMMETRIC 1
+#define FLOW_CONTROL_TX_PAUSE 2
+#define FLOW_CONTROL_RX_PAUSE 3
+
+ boolean_t mii_supress_msg:1;
+
+ uint32_t mii_phy_id;
+ uint16_t mii_status;
+ uint16_t mii_advert;
+ uint16_t mii_lpable;
+ uint16_t mii_exp;
+ uint16_t mii_ctl1000;
+ uint16_t mii_stat1000;
+ uint16_t mii_xstatus;
+ int8_t mii_phy_addr; /* must be signed */
+
+ uint16_t mii_status_ro;
+ uint16_t mii_xstatus_ro;
+
+ int mii_state;
+#define MII_STATE_UNKNOWN 0
+#define MII_STATE_RESETTING 1
+#define MII_STATE_AUTONEGOTIATING 2
+#define MII_STATE_AN_DONE 3
+#define MII_STATE_MEDIA_SETUP 4
+#define MII_STATE_LINKUP 5
+#define MII_STATE_LINKDOWN 6
+
+ clock_t mii_last_check; /* in tick */
+ clock_t mii_timer; /* in tick */
+#define MII_RESET_TIMEOUT drv_usectohz(1000*1000)
+#define MII_AN_TIMEOUT drv_usectohz(5000*1000)
+#define MII_LINKDOWN_TIMEOUT drv_usectohz(10000*1000)
+
+ clock_t mii_interval; /* in tick */
+ clock_t linkup_delay; /* in tick */
+
+ uint_t link_watcher_stop;
+ kt_did_t link_watcher_did;
+ kcondvar_t link_watcher_wait_cv;
+ kmutex_t link_watcher_lock;
+
+ krwlock_t dev_state_lock; /* mac_state and nic_state */
+ ksema_t hal_op_lock; /* serialize hw operations */
+ ksema_t drv_op_lock; /* hotplug op lock */
+
+ /* multcast list */
+ ksema_t rxfilter_lock;
+ int mc_count;
+ int mc_count_req;
+ struct mcast_addr *mc_list;
+ int rxmode;
+#define RXMODE_PROMISC 0x01
+#define RXMODE_ALLMULTI_REQ 0x02
+#define RXMODE_MULTI_OVF 0x04
+#define RXMODE_ENABLE 0x08
+#define RXMODE_ALLMULTI (RXMODE_ALLMULTI_REQ | RXMODE_MULTI_OVF)
+#define RXMODE_BITS \
+ "\020" \
+ "\004ENABLE" \
+ "\003MULTI_OVF" \
+ "\002ALLMULTI_REQ" \
+ "\001PROMISC"
+
+ /* statistcs */
+ struct usbgem_stats stats;
+
+ /* pointer to local structure */
+ void *private;
+ int priv_size;
+
+ /* configuration */
+ struct usbgem_conf {
+ /* name */
+ char usbgc_name[USBGEM_NAME_LEN];
+ int usbgc_ppa;
+
+ /* specification on usb */
+ int usbgc_ifnum; /* interface number */
+ int usbgc_alt; /* alternate */
+
+ /* specification on tx engine */
+ int usbgc_tx_list_max;
+
+ /* specification on rx engine */
+ int usbgc_rx_header_len;
+ int usbgc_rx_list_max;
+
+ /* time out parameters */
+ clock_t usbgc_tx_timeout;
+ clock_t usbgc_tx_timeout_interval;
+
+ /* flow control */
+ int usbgc_flow_control;
+
+ /* MII timeout parameters */
+ clock_t usbgc_mii_linkdown_timeout;
+ clock_t usbgc_mii_link_watch_interval;
+ clock_t usbgc_mii_reset_timeout;
+
+ clock_t usbgc_mii_an_watch_interval;
+ clock_t usbgc_mii_an_timeout;
+ clock_t usbgc_mii_an_wait;
+ clock_t usbgc_mii_an_delay;
+
+ /* MII configuration */
+ int usbgc_mii_addr_min;
+ int usbgc_mii_linkdown_action;
+ int usbgc_mii_linkdown_timeout_action;
+#define MII_ACTION_NONE 0
+#define MII_ACTION_RESET 1
+#define MII_ACTION_RSA 2
+ boolean_t usbgc_mii_dont_reset:1;
+ boolean_t usbgc_mii_an_oneshot:1;
+ boolean_t usbgc_mii_hw_link_detection:1;
+ boolean_t usbgc_mii_stop_mac_on_linkdown:1;
+ uint16_t usbgc_mii_an_cmd;
+
+ /* I/O methods */
+
+ /* mac operation */
+ int (*usbgc_attach_chip)(struct usbgem_dev *dp);
+ int (*usbgc_reset_chip)(struct usbgem_dev *dp);
+ int (*usbgc_init_chip)(struct usbgem_dev *dp);
+ int (*usbgc_start_chip)(struct usbgem_dev *dp);
+ int (*usbgc_stop_chip)(struct usbgem_dev *dp);
+ uint32_t (*usbgc_multicast_hash)(struct usbgem_dev *dp,
+ const uint8_t *);
+ int (*usbgc_set_rx_filter)(struct usbgem_dev *dp);
+ int (*usbgc_set_media)(struct usbgem_dev *dp);
+ int (*usbgc_get_stats)(struct usbgem_dev *dp);
+ void (*usbgc_interrupt)(struct usbgem_dev *dp, mblk_t *mp);
+
+ /* packet manupilation */
+ mblk_t *(*usbgc_tx_make_packet)(struct usbgem_dev *dp, mblk_t *mp);
+ mblk_t *(*usbgc_rx_make_packet)(struct usbgem_dev *dp, mblk_t *mp);
+ /* mii operations */
+ int (*usbgc_mii_probe)(struct usbgem_dev *dp);
+ int (*usbgc_mii_init)(struct usbgem_dev *dp);
+ int (*usbgc_mii_config)(struct usbgem_dev *dp, int *errp);
+ uint16_t (*usbgc_mii_read)(struct usbgem_dev *dp, uint_t reg, int *errp);
+ void (*usbgc_mii_write)(struct usbgem_dev *dp, uint_t reg, uint16_t val, int *errp);
+
+ /* jumbo frame */
+ int usbgc_max_mtu;
+ int usbgc_default_mtu;
+ int usbgc_min_mtu;
+ } ugc;
+
+ int misc_flag;
+#define USBGEM_VLAN 0x0001
+ timeout_id_t intr_watcher_id;
+
+ /* buffer size */
+ uint_t mtu;
+
+ /* performance tuning parameters */
+ uint_t txthr; /* tx fifo threshoold */
+ uint_t txmaxdma; /* tx max dma burst size */
+ uint_t rxthr; /* rx fifo threshoold */
+ uint_t rxmaxdma; /* tx max dma burst size */
+
+ /* kstat stuff */
+ kstat_t *ksp;
+
+ /* ndd stuff */
+ caddr_t nd_data_p;
+ caddr_t nd_arg_p;
+
+#ifdef USBGEM_DEBUG_LEVEL
+ int tx_cnt;
+#endif
+};
+
+/*
+ * Exported functions
+ */
+int usbgem_ctrl_out(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size);
+
+int usbgem_ctrl_in(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size);
+
+int usbgem_ctrl_out_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ uint32_t v);
+
+int usbgem_ctrl_in_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *valp);
+
+void usbgem_generate_macaddr(struct usbgem_dev *, uint8_t *);
+boolean_t usbgem_get_mac_addr_conf(struct usbgem_dev *);
+int usbgem_mii_probe_default(struct usbgem_dev *);
+int usbgem_mii_init_default(struct usbgem_dev *);
+int usbgem_mii_config_default(struct usbgem_dev *, int *errp);
+void usbgem_mii_update_link(struct usbgem_dev *);
+void usbgem_restart_tx(struct usbgem_dev *);
+boolean_t usbgem_tx_done(struct usbgem_dev *, int);
+void usbgem_receive(struct usbgem_dev *);
+struct usbgem_dev *usbgem_do_attach(dev_info_t *,
+ struct usbgem_conf *, void *, int);
+int usbgem_do_detach(dev_info_t *);
+
+uint32_t usbgem_ether_crc_le(const uint8_t *addr);
+uint32_t usbgem_ether_crc_be(const uint8_t *addr);
+
+int usbgem_resume(dev_info_t *);
+int usbgem_suspend(dev_info_t *);
+int usbgem_quiesce(dev_info_t *);
+
+#ifdef USBGEM_CONFIG_GLDv3
+#if DEVO_REV < 4
+#define USBGEM_STREAM_OPS(dev_ops, attach, detach) \
+ DDI_DEFINE_STREAM_OPS(dev_ops, nulldev, nulldev, attach, detach, \
+ nodev, NULL, D_MP, NULL)
+#else
+#define USBGEM_STREAM_OPS(dev_ops, attach, detach) \
+ DDI_DEFINE_STREAM_OPS(dev_ops, nulldev, nulldev, attach, detach, \
+ nodev, NULL, D_MP, NULL, usbgem_quiesce)
+#endif
+#else
+#define usbgem_getinfo gld_getinfo
+#define usbgem_open gld_open
+#define usbgem_close gld_close
+#define usbgem_wput gld_wput
+#define usbgem_wsrv gld_wsrv
+#define usbgem_rsrv gld_rsrv
+#define usbgem_power NULL
+#endif
+int usbgem_mod_init(struct dev_ops *, char *);
+void usbgem_mod_fini(struct dev_ops *);
+
+#define USBGEM_GET_DEV(dip) \
+ ((struct usbgem_dev *)(ddi_get_driver_private(dip)))
+
+#endif /* __USBGEM_H__ */
diff --git a/usr/src/uts/common/io/usbgem/usbgem_mii.h b/usr/src/uts/common/io/usbgem/usbgem_mii.h
new file mode 100644
index 0000000000..2b4176a340
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem_mii.h
@@ -0,0 +1,242 @@
+/*
+ * gem_mii.h: mii header for gem
+ *
+ * Copyright (c) 2002-2007 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+#pragma ident "@(#)gem_mii.h 1.4 07/11/30"
+
+/*
+ * gem_mii.h : MII registers
+ */
+#ifndef _GEM_MII_H_
+#define _GEM_MII_H_
+
+#ifdef GEM_CONFIG_GLDv3
+#include <sys/miiregs.h>
+#else
+#define MII_CONTROL 0
+#define MII_STATUS 1
+#define MII_PHYIDH 2
+#define MII_PHYIDL 3
+#define MII_AN_ADVERT 4
+#define MII_AN_LPABLE 5
+#define MII_AN_EXPANSION 6
+#define MII_AN_NXTPGXMIT 7
+#endif /* GEM_CONFIG_GLDv3 */
+
+#define MII_AN_LPANXT 8
+#define MII_MS_CONTROL 9
+#define MII_MS_STATUS 10
+#define MII_XSTATUS 15
+
+/* for 1000BaseT support */
+#define MII_1000TC MII_MS_CONTROL
+#define MII_1000TS MII_MS_STATUS
+#ifndef GEM_CONFIG_GLDv3
+#define MII_CONTROL_RESET 0x8000
+#define MII_CONTROL_LOOPBACK 0x4000
+#define MII_CONTROL_100MB 0x2000
+#define MII_CONTROL_ANE 0x1000
+#define MII_CONTROL_PWRDN 0x0800
+#define MII_CONTROL_ISOLATE 0x0400
+#define MII_CONTROL_RSAN 0x0200
+#define MII_CONTROL_FDUPLEX 0x0100
+#define MII_CONTROL_COLTST 0x0080
+#endif /* !GEM_CONFIG_GLDv3 */
+#define MII_CONTROL_SPEED 0x2040
+
+#define MII_CONTROL_10MB 0x0000
+#define MII_CONTROL_1000MB 0x0040
+
+#define MII_CONTROL_BITS \
+ "\020" \
+ "\020RESET" \
+ "\017LOOPBACK" \
+ "\016100MB" \
+ "\015ANE" \
+ "\014PWRDN" \
+ "\013ISOLATE" \
+ "\012RSAN" \
+ "\011FDUPLEX" \
+ "\010COLTST" \
+ "\0071000M"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_STATUS_100_BASE_T4 0x8000
+#define MII_STATUS_100_BASEX_FD 0x4000
+#define MII_STATUS_100_BASEX 0x2000
+#define MII_STATUS_10_FD 0x1000
+#define MII_STATUS_10 0x0800
+#define MII_STATUS_MFPRMBLSUPR 0x0040
+#define MII_STATUS_ANDONE 0x0020
+#define MII_STATUS_REMFAULT 0x0010
+#define MII_STATUS_CANAUTONEG 0x0008
+#define MII_STATUS_LINKUP 0x0004
+#define MII_STATUS_JABBERING 0x0002
+#define MII_STATUS_EXTENDED 0x0001
+#endif /* !GEM_CONFIG_GLDv3 */
+#define MII_STATUS_XSTATUS 0x0100
+#define MII_STATUS_100_BASE_T2_FD 0x0400
+#define MII_STATUS_100_BASE_T2 0x0200
+
+#define MII_STATUS_ABILITY_TECH \
+ (MII_STATUS_100_BASE_T4 | \
+ MII_STATUS_100_BASEX_FD | \
+ MII_STATUS_100_BASEX | \
+ MII_STATUS_10 | \
+ MII_STATUS_10_FD)
+
+
+#define MII_STATUS_BITS \
+ "\020" \
+ "\020100_BASE_T4" \
+ "\017100_BASEX_FD" \
+ "\016100_BASEX" \
+ "\01510_BASE_FD" \
+ "\01410_BASE" \
+ "\013100_BASE_T2_FD" \
+ "\012100_BASE_T2" \
+ "\011XSTATUS" \
+ "\007MFPRMBLSUPR" \
+ "\006ANDONE" \
+ "\005REMFAULT" \
+ "\004CANAUTONEG" \
+ "\003LINKUP" \
+ "\002JABBERING" \
+ "\001EXTENDED"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_AN_ADVERT_NP 0x8000
+#define MII_AN_ADVERT_REMFAULT 0x2000
+#define MII_AN_ADVERT_SELECTOR 0x001f
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_ABILITY_ASM_DIR 0x0800 /* for annex 28B */
+#ifndef MII_ABILITY_PAUSE
+#define MII_ABILITY_PAUSE 0x0400 /* for IEEE 802.3x */
+#endif
+#ifndef GEM_CONFIG_GLDv3
+#define MII_ABILITY_100BASE_T4 0x0200
+#define MII_ABILITY_100BASE_TX_FD 0x0100
+#define MII_ABILITY_100BASE_TX 0x0080
+#define MII_ABILITY_10BASE_T_FD 0x0040
+#define MII_ABILITY_10BASE_T 0x0020
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_AN_LPABLE_NP 0x8000
+
+#define MII_ABILITY_TECH \
+ (MII_ABILITY_100BASE_T4 | \
+ MII_ABILITY_100BASE_TX_FD | \
+ MII_ABILITY_100BASE_TX | \
+ MII_ABILITY_10BASE_T | \
+ MII_ABILITY_10BASE_T_FD)
+
+#define MII_ABILITY_ALL \
+ (MII_AN_ADVERT_REMFAULT | \
+ MII_ABILITY_ASM_DIR | \
+ MII_ABILITY_PAUSE | \
+ MII_ABILITY_TECH)
+
+
+#define MII_ABILITY_BITS \
+ "\020" \
+ "\016REMFAULT" \
+ "\014ASM_DIR" \
+ "\013PAUSE" \
+ "\012100BASE_T4" \
+ "\011100BASE_TX_FD" \
+ "\010100BASE_TX" \
+ "\00710BASE_T_FD" \
+ "\00610BASE_T"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_AN_EXP_PARFAULT 0x0010
+#define MII_AN_EXP_LPCANNXTP 0x0008
+#define MII_AN_EXP_CANNXTPP 0x0004
+#define MII_AN_EXP_PAGERCVD 0x0002
+#define MII_AN_EXP_LPCANAN 0x0001
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_AN_EXP_BITS \
+ "\020" \
+ "\005PARFAULT" \
+ "\004LPCANNXTP" \
+ "\003CANNXTPP" \
+ "\002PAGERCVD" \
+ "\001LPCANAN"
+
+#define MII_1000TC_TESTMODE 0xe000
+#define MII_1000TC_CFG_EN 0x1000
+#define MII_1000TC_CFG_VAL 0x0800
+#define MII_1000TC_PORTTYPE 0x0400
+#define MII_1000TC_ADV_FULL 0x0200
+#define MII_1000TC_ADV_HALF 0x0100
+
+#define MII_1000TC_BITS \
+ "\020" \
+ "\015CFG_EN" \
+ "\014CFG_VAL" \
+ "\013PORTTYPE" \
+ "\012FULL" \
+ "\011HALF"
+
+#define MII_1000TS_CFG_FAULT 0x8000
+#define MII_1000TS_CFG_MASTER 0x4000
+#define MII_1000TS_LOCALRXOK 0x2000
+#define MII_1000TS_REMOTERXOK 0x1000
+#define MII_1000TS_LP_FULL 0x0800
+#define MII_1000TS_LP_HALF 0x0400
+
+#define MII_1000TS_BITS \
+ "\020" \
+ "\020CFG_FAULT" \
+ "\017CFG_MASTER" \
+ "\014CFG_LOCALRXOK" \
+ "\013CFG_REMOTERXOK" \
+ "\012LP_FULL" \
+ "\011LP_HALF"
+
+#define MII_XSTATUS_1000BASEX_FD 0x8000
+#define MII_XSTATUS_1000BASEX 0x4000
+#define MII_XSTATUS_1000BASET_FD 0x2000
+#define MII_XSTATUS_1000BASET 0x1000
+
+#define MII_XSTATUS_BITS \
+ "\020" \
+ "\0201000BASEX_FD" \
+ "\0171000BASEX" \
+ "\0161000BASET_FD" \
+ "\0151000BASET"
+
+#define MII_READ_CMD(p, r) \
+ ((6<<(18+5+5)) | ((p)<<(18+5)) | ((r)<<18))
+
+#define MII_WRITE_CMD(p, r, v) \
+ ((5<<(18+5+5)) | ((p)<<(18+5)) | ((r)<<18) | (2 << 16) | (v))
+
+#endif /* _GEM_MII_H_ */
diff --git a/usr/src/uts/common/io/vnd/frameio.c b/usr/src/uts/common/io/vnd/frameio.c
new file mode 100644
index 0000000000..e4e700fa12
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/frameio.c
@@ -0,0 +1,464 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Frame I/O utility functions
+ */
+
+#include <sys/frameio.h>
+
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/sysmacros.h>
+#include <sys/inttypes.h>
+
+static kmem_cache_t *frameio_cache;
+
+int
+frameio_init(void)
+{
+ frameio_cache = kmem_cache_create("frameio_cache",
+ sizeof (frameio_t) + sizeof (framevec_t) * FRAMEIO_NVECS_MAX,
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (frameio_cache == NULL)
+ return (1);
+
+ return (0);
+}
+
+void
+frameio_fini(void)
+{
+ if (frameio_cache != NULL)
+ kmem_cache_destroy(frameio_cache);
+}
+
+frameio_t *
+frameio_alloc(int kmflags)
+{
+ return (kmem_cache_alloc(frameio_cache, kmflags));
+}
+
+void
+frameio_free(frameio_t *fio)
+{
+ return (kmem_cache_free(frameio_cache, fio));
+}
+
+/*
+ * Ensure that we don't see any garbage in the framevecs that we're nominally
+ * supposed to work with. Specifically we want to make sure that the buflen and
+ * the address are not zero.
+ */
+static int
+frameio_hdr_check_vecs(frameio_t *fio)
+{
+ int i;
+ for (i = 0; i < fio->fio_nvecs; i++)
+ if (fio->fio_vecs[i].fv_buf == NULL ||
+ fio->fio_vecs[i].fv_buflen == 0)
+ return (EINVAL);
+
+ return (0);
+}
+
+/*
+ * We have to copy in framevec32_t's. To work around the data model issues and
+ * trying not to copy memory we first copy in the framevec32_t data into the
+ * standard fio_vec space. Next we work backwards copying a given framevec32_t
+ * to a temporaory framevec_t and then overwrite the frameio_t's data. Note that
+ * it is important that we do this in reverse so as to ensure that we don't
+ * clobber data as the framevec_t is larger than the framevec32_t.
+ */
+static int
+frameio_hdr_copyin_ilp32(frameio_t *fio, const void *addr)
+{
+ framevec32_t *vec32p;
+ framevec_t fv;
+ int i;
+
+ vec32p = (framevec32_t *)&fio->fio_vecs[0];
+
+ if (ddi_copyin(addr, vec32p, sizeof (framevec32_t) * fio->fio_nvecs,
+ 0) != 0)
+ return (EFAULT);
+
+ for (i = fio->fio_nvecs - 1; i >= 0; i--) {
+ fv.fv_buf = (void *)(uintptr_t)vec32p[i].fv_buf;
+ fv.fv_buflen = vec32p[i].fv_buflen;
+ fv.fv_actlen = vec32p[i].fv_actlen;
+ fio->fio_vecs[i].fv_buf = fv.fv_buf;
+ fio->fio_vecs[i].fv_buflen = fv.fv_buflen;
+ fio->fio_vecs[i].fv_actlen = fv.fv_actlen;
+ }
+
+ return (frameio_hdr_check_vecs(fio));
+}
+
+/*
+ * Copy in a frame io header into fio with space for up to nvecs. If the frameio
+ * contains more vectors than specified it will be ignored. mode should contain
+ * information about the datamodel.
+ */
+int
+frameio_hdr_copyin(frameio_t *fio, int max_vecs, const void *addr, uint_t mode)
+{
+ int model = ddi_model_convert_from(mode & FMODELS);
+ int cpf = mode & FKIOCTL ? FKIOCTL : 0;
+ size_t fsize = model == DDI_MODEL_ILP32 ?
+ sizeof (frameio32_t) : sizeof (frameio_t);
+
+ /*
+ * The start of the header is the same in all data models for the
+ * current verison.
+ */
+ if (ddi_copyin(addr, fio, fsize, cpf) != 0)
+ return (EFAULT);
+
+ if (fio->fio_version != FRAMEIO_VERSION_ONE)
+ return (EINVAL);
+
+ if (fio->fio_nvecs > FRAMEIO_NVECS_MAX || fio->fio_nvecs == 0)
+ return (EINVAL);
+
+ if (fio->fio_nvpf == 0)
+ return (EINVAL);
+
+ if (fio->fio_nvecs % fio->fio_nvpf != 0)
+ return (EINVAL);
+
+ if (fio->fio_nvecs > max_vecs)
+ return (EOVERFLOW);
+
+ addr = (void *)((uintptr_t)addr + fsize);
+ if (model == DDI_MODEL_ILP32) {
+ if (cpf != 0)
+ return (EINVAL);
+ return (frameio_hdr_copyin_ilp32(fio, addr));
+ }
+
+ if (ddi_copyin(addr, &fio->fio_vecs[0],
+ sizeof (framevec_t) * fio->fio_nvecs, cpf) != 0)
+ return (EFAULT);
+
+ return (frameio_hdr_check_vecs(fio));
+}
+
+static mblk_t *
+frameio_allocb(size_t sz)
+{
+ mblk_t *mp;
+
+ mp = allocb(sz, 0);
+ if (mp == NULL)
+ return (NULL);
+
+ mp->b_datap->db_type = M_DATA;
+ return (mp);
+}
+
+static int
+framevec_mblk_read(framevec_t *fv, mblk_t **mpp, int cpf)
+{
+ mblk_t *mp;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ mp = frameio_allocb(fv->fv_buflen);
+
+ if (mp == NULL) {
+ freemsg(mp);
+ return (EAGAIN);
+ }
+
+ if (ddi_copyin(fv->fv_buf, mp->b_wptr, fv->fv_buflen,
+ cpf) != 0) {
+ freemsg(mp);
+ return (EFAULT);
+ }
+
+ mp->b_wptr += fv->fv_buflen;
+ *mpp = mp;
+ return (0);
+}
+
+/*
+ * Read a set of frame vectors that make up a single message boundary and return
+ * that as a single message in *mpp that consists of multiple data parts.
+ */
+static int
+frameio_mblk_read(frameio_t *fio, framevec_t *fv, mblk_t **mpp, int cpf)
+{
+ int nparts = fio->fio_nvpf;
+ int part, error;
+ mblk_t *mp;
+
+ *mpp = NULL;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ /*
+ * Construct the initial frame
+ */
+ for (part = 0; part < nparts; part++) {
+ error = framevec_mblk_read(fv, &mp, cpf);
+ if (error != 0) {
+ freemsg(*mpp);
+ return (error);
+ }
+
+ if (*mpp == NULL)
+ *mpp = mp;
+ else
+ linkb(*mpp, mp);
+ fv++;
+ }
+
+ return (0);
+}
+
+/*
+ * Read data from a series of frameio vectors into a message block chain. A
+ * given frameio request has a number of discrete messages divided into
+ * individual vectors based on fio->fio_nvcspframe. Each discrete message will
+ * be constructed into a message block chain pointed to by b_next.
+ *
+ * If we get an EAGAIN while trying to construct a given message block what we
+ * return depends on what else we've done so far. If we have succesfully
+ * completed at least one message then we free everything else we've done so
+ * far and return that. If no messages have been completed we return EAGAIN. If
+ * instead we encounter a different error, say EFAULT, then all of the fv_actlen
+ * entries values are undefined.
+ */
+int
+frameio_mblk_chain_read(frameio_t *fio, mblk_t **mpp, int *nvecs, int cpf)
+{
+ int error = ENOTSUP;
+ int nframes = fio->fio_nvecs / fio->fio_nvpf;
+ int frame;
+ framevec_t *fv;
+ mblk_t *mp, *bmp = NULL;
+
+ /*
+ * Protect against bogus kernel subsystems.
+ */
+ VERIFY(fio->fio_nvecs > 0);
+ VERIFY(fio->fio_nvecs % fio->fio_nvpf == 0);
+
+ *mpp = NULL;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ fv = &fio->fio_vecs[0];
+ for (frame = 0; frame < nframes; frame++) {
+ error = frameio_mblk_read(fio, fv, &mp, cpf);
+ if (error != 0)
+ goto failed;
+
+ if (bmp != NULL)
+ bmp->b_next = mp;
+ else
+ *mpp = mp;
+ bmp = mp;
+ }
+
+ *nvecs = nframes;
+ return (0);
+failed:
+ /*
+ * On EAGAIN we've already taken care of making sure that we have no
+ * leftover messages, eg. they were never linked in.
+ */
+ if (error == EAGAIN) {
+ if (frame != 0)
+ error = 0;
+ if (*nvecs != NULL)
+ *nvecs = frame;
+ ASSERT(*mpp != NULL);
+ } else {
+ for (mp = *mpp; mp != NULL; mp = bmp) {
+ bmp = mp->b_next;
+ freemsg(mp);
+ }
+ if (nvecs != NULL)
+ *nvecs = 0;
+ *mpp = NULL;
+ }
+ return (error);
+}
+
+size_t
+frameio_frame_length(frameio_t *fio, framevec_t *fv)
+{
+ int i;
+ size_t len = 0;
+
+ for (i = 0; i < fio->fio_nvpf; i++, fv++)
+ len += fv->fv_buflen;
+
+ return (len);
+}
+
+/*
+ * Write a portion of an mblk to the current.
+ */
+static int
+framevec_write_mblk_part(framevec_t *fv, mblk_t *mp, size_t len, size_t moff,
+ size_t foff, int cpf)
+{
+ ASSERT(len <= MBLKL(mp) - moff);
+ ASSERT(len <= fv->fv_buflen - fv->fv_actlen);
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ if (ddi_copyout(mp->b_rptr + moff, fv->fv_buf + foff, len, cpf) != 0)
+ return (EFAULT);
+ fv->fv_actlen += len;
+
+ return (0);
+}
+
+/*
+ * Because copying this out to the user might fail we don't want to update the
+ * b_rptr in case we need to copy it out again.
+ */
+static int
+framevec_map_blk(frameio_t *fio, framevec_t *fv, mblk_t *mp, int cpf)
+{
+ int err;
+ size_t msize, blksize, len, moff, foff;
+
+ msize = msgsize(mp);
+ if (msize > frameio_frame_length(fio, fv))
+ return (EOVERFLOW);
+
+ moff = 0;
+ foff = 0;
+ blksize = MBLKL(mp);
+ fv->fv_actlen = 0;
+ while (msize != 0) {
+ len = MIN(blksize, fv->fv_buflen - fv->fv_actlen);
+ err = framevec_write_mblk_part(fv, mp, len, moff, foff, cpf);
+ if (err != 0)
+ return (err);
+
+ msize -= len;
+ blksize -= len;
+ moff += len;
+ foff += len;
+
+ if (blksize == 0 && msize != 0) {
+ mp = mp->b_cont;
+ ASSERT(mp != NULL);
+ moff = 0;
+ blksize = MBLKL(mp);
+ }
+
+ if (fv->fv_buflen == fv->fv_actlen && msize != 0) {
+ fv++;
+ fv->fv_actlen = 0;
+ foff = 0;
+ }
+ }
+
+ return (0);
+}
+
+int
+frameio_mblk_chain_write(frameio_t *fio, frameio_write_mblk_map_t map,
+ mblk_t *mp, int *nwrite, int cpf)
+{
+ int mcount = 0;
+ int ret = 0;
+
+ if (map != MAP_BLK_FRAME)
+ return (EINVAL);
+
+ while (mp != NULL && mcount < fio->fio_nvecs) {
+ ret = framevec_map_blk(fio, &fio->fio_vecs[mcount], mp, cpf);
+ if (ret != 0)
+ break;
+ mcount += fio->fio_nvpf;
+ mp = mp->b_next;
+ }
+
+ if (ret != 0 && mcount == 0) {
+ if (nwrite != NULL)
+ *nwrite = 0;
+ return (ret);
+ }
+
+ if (nwrite != NULL)
+ *nwrite = mcount / fio->fio_nvpf;
+
+ return (0);
+}
+
+/*
+ * Copy out nframes worth of frameio header data back to userland.
+ */
+int
+frameio_hdr_copyout(frameio_t *fio, int nframes, void *addr, uint_t mode)
+{
+ int i;
+ int model = ddi_model_convert_from(mode & FMODELS);
+ framevec32_t *vec32p;
+ framevec32_t f;
+
+ if (fio->fio_nvecs / fio->fio_nvpf < nframes)
+ return (EINVAL);
+
+ fio->fio_nvecs = nframes * fio->fio_nvpf;
+
+ if (model == DDI_MODEL_NONE) {
+ if (ddi_copyout(fio, addr,
+ sizeof (frameio_t) + fio->fio_nvecs * sizeof (framevec_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+ }
+
+ ASSERT(model == DDI_MODEL_ILP32);
+
+ vec32p = (framevec32_t *)&fio->fio_vecs[0];
+ for (i = 0; i < fio->fio_nvecs; i++) {
+ f.fv_buf = (caddr32_t)(uintptr_t)fio->fio_vecs[i].fv_buf;
+ if (fio->fio_vecs[i].fv_buflen > UINT_MAX ||
+ fio->fio_vecs[i].fv_actlen > UINT_MAX)
+ return (EOVERFLOW);
+ f.fv_buflen = fio->fio_vecs[i].fv_buflen;
+ f.fv_actlen = fio->fio_vecs[i].fv_actlen;
+ vec32p[i].fv_buf = f.fv_buf;
+ vec32p[i].fv_buflen = f.fv_buflen;
+ vec32p[i].fv_actlen = f.fv_actlen;
+ }
+
+ if (ddi_copyout(fio, addr,
+ sizeof (frameio32_t) + fio->fio_nvecs * sizeof (framevec32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+void
+frameio_mark_consumed(frameio_t *fio, int nframes)
+{
+ int i;
+
+ ASSERT(fio->fio_nvecs / fio->fio_nvpf >= nframes);
+ for (i = 0; i < nframes * fio->fio_nvpf; i++)
+ fio->fio_vecs[i].fv_actlen = fio->fio_vecs[i].fv_buflen;
+}
diff --git a/usr/src/uts/common/io/vnd/vnd.c b/usr/src/uts/common/io/vnd/vnd.c
new file mode 100644
index 0000000000..d62c80c1c6
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/vnd.c
@@ -0,0 +1,5584 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * vnd - virtual (machine) networking datapath
+ *
+ * vnd's purpose is to provide a highly performant data path for Layer 2 network
+ * traffic and exist side by side an active IP netstack, each servicing
+ * different datalinks. vnd provides many of the same capabilities as the
+ * current TCP/IP stack does and some specific to layer two. Specifically:
+ *
+ * o Use of the DLD fastpath
+ * o Packet capture hooks
+ * o Ability to use hardware capabilities
+ * o Useful interfaces for handling multiple frames
+ *
+ * The following image shows where vnd fits into today's networking stack:
+ *
+ * +---------+----------+----------+
+ * | libdlpi | libvnd | libsocket|
+ * +---------+----------+----------+
+ * | · · VFS |
+ * | VFS · VFS +----------+
+ * | · | sockfs |
+ * +---------+----------+----------+
+ * | | VND | IP |
+ * | +----------+----------+
+ * | DLD/DLS |
+ * +-------------------------------+
+ * | MAC |
+ * +-------------------------------+
+ * | GLDv3 |
+ * +-------------------------------+
+ *
+ * -----------------------------------------
+ * A Tale of Two Devices - DDI Device Basics
+ * -----------------------------------------
+ *
+ * vnd presents itself to userland as a character device; however, it also is a
+ * STREAMS device so that it can interface with dld and the rest of the
+ * networking stack. Users never interface with the STREAMs devices directly and
+ * they are purely an implementation detail of vnd. Opening the STREAMS device
+ * require kcred and as such userland cannot interact with it or push it onto
+ * the stream head.
+ *
+ * The main vnd character device, /dev/vnd/ctl, is a self-cloning device. Every
+ * clone gets its own minor number; however, minor nodes are not created in the
+ * devices tree for these instances. In this state a user may do two different
+ * things. They may issue ioctls that affect global state or they may issue
+ * ioctls that try to attach it to a given datalink. Once a minor device has
+ * been attached to a datalink, all operations on it are scoped to that context,
+ * therefore subsequent global operations are not permitted.
+ *
+ * A given device can be linked into the /devices and /dev name space via a link
+ * ioctl. That ioctl causes a minor node to be created in /devices and then it
+ * will also appear under /dev/vnd/ due to vnd's sdev plugin. This is similar
+ * to, but simpler than, IP's persistence mechanism.
+ *
+ * ---------------------
+ * Binding to a datalink
+ * ---------------------
+ *
+ * Datalinks are backed by the dld (datalink device) and dls (datalink services)
+ * drivers. These drivers provide a STREAMS device for datalinks on the system
+ * which are exposed through /dev/net. Userland generally manipulates datalinks
+ * through libdlpi. When an IP interface is being plumbed up what actually
+ * happens is that someone does a dlpi_open(3DLPI) of the underlying datalink
+ * and then pushes on the ip STREAMS module with an I_PUSH ioctl. Modules may
+ * then can negotiate with dld and dls to obtain access to various capabilities
+ * and fast paths via a series of STREAMS messages.
+ *
+ * In vnd, we do the same thing, but we leave our STREAMS module as an
+ * implementation detail of the system. We don't want users to be able to
+ * arbitrarily push vnd STREAMS module onto any stream, so we explicitly require
+ * kcred to manipulate it. Thus, when a user issues a request to attach a
+ * datalink to a minor instance of the character device, that vnd minor instance
+ * itself does a layered open (ldi_open_by_name(9F)) of the specified datalink.
+ * vnd does that open using the passed in credentials from the ioctl, not kcred.
+ * This ensures that users who doesn't have permissions to open the device
+ * cannot. Once that's been opened, we push on the vnd streams module.
+ *
+ * Once the vnd STREAMS instance has been created for this device, eg. the
+ * I_PUSH ioctl returns, we explicitly send a STREAMS ioctl
+ * (VND_STRIOC_ASSOCIATE) to associate the vnd STREAMS and character devices.
+ * This association begins the STREAM device's initialization. We start up an
+ * asynchronous state machine that takes care of all the different aspects of
+ * plumbing up the device with dld and dls and enabling the MAC fast path. We
+ * need to guarantee to consumers of the character device that by the time their
+ * ioctl returns, the data path has been fully initialized.
+ *
+ * The state progression is fairly linear. There are two general steady states.
+ * The first is VND_S_ONLINE, which means that everything is jacked up and good
+ * to go. The alternative is VND_S_ZOMBIE, which means that the streams device
+ * encountered an error or we have finished tearing it down and the character
+ * device can clean it up. The following is our state progression and the
+ * meaning of each state:
+ *
+ * |
+ * |
+ * V
+ * +---------------+
+ * | VNS_S_INITIAL | This is our initial state. Every
+ * +---------------+ vnd STREAMS device starts here.
+ * | While in this state, only dlpi
+ * | M_PROTO and M_IOCTL messages can be
+ * | sent or received. All STREAMS based
+ * | data messages are dropped.
+ * | We transition out of this state by
+ * | sending a DL_INFO_REQ to obtain
+ * | information about the underlying
+ * | link.
+ * v
+ * +-----------------+
+ * +--<-| VNS_S_INFO_SENT | In this state, we verify and
+ * | +-----------------+ record information about the
+ * | | underlying device. If the device is
+ * | | not suitable, eg. not of type
+ * v | DL_ETHER, then we immediately
+ * | | become a ZOMBIE. To leave this
+ * | | state we request exclusive active
+ * | | access to the device via
+ * v | DL_EXCLUSIVE_REQ.
+ * | v
+ * | +----------------------+
+ * +--<-| VNS_S_EXCLUSIVE_SENT | In this state, we verify whether
+ * | +----------------------+ or not we were able to obtain
+ * | | | exclusive access to the device. If
+ * | | | we were not able to, then we leave,
+ * v | | as that means that something like
+ * | | | IP is already plumbed up on top of
+ * | | | the datalink. We leave this state
+ * | | | by progressing through to the
+ * | | | appropriate DLPI primitive, either
+ * v | | DLPI_ATTACH_REQ or DLPI_BIND_REQ
+ * | | | depending on the style of the
+ * | | | datalink.
+ * | | v
+ * | | +-------------------+
+ * +------ |--<-| VNS_S_ATTACH_SENT | In this state, we verify we were
+ * | | +-------------------+ able to perform a standard DLPI
+ * | | | attach and if so, go ahead and
+ * v | | send a DLPI_BIND_REQ.
+ * | v v
+ * | +-------------------+
+ * +--<-| VNS_S_BIND_SENT | In this state we see the result of
+ * | +-------------------+ our attempt to bind to PPA 0 of the
+ * v | underlying device. Because we're
+ * | | trying to be a layer two datapath,
+ * | | the specific attachment point isn't
+ * | | too important as we're going to
+ * v | have to enable promiscuous mode. We
+ * | | transition out of this by sending
+ * | | our first of three promiscuous mode
+ * | | requests.
+ * v v
+ * | +------------------------+
+ * +--<-| VNS_S_SAP_PROMISC_SENT | In this state we verify that we
+ * | +------------------------+ were able to enable promiscuous
+ * | | mode at the physical level. We
+ * | | transition out of this by enabling
+ * | | multicast and broadcast promiscuous
+ * v | mode.
+ * | v
+ * | +--------------------------+
+ * +--<-| VNS_S_MULTI_PROMISC_SENT | In this state we verify that we
+ * | +--------------------------+ have enabled DL_PROMISC_MULTI and
+ * v | move onto the final promiscuous
+ * | | mode request.
+ * | v
+ * | +----------------------------+
+ * +--<-| VNS_S_RX_ONLY_PROMISC_SENT | In this state we verify that we
+ * | +----------------------------+ enabled RX_ONLY promiscuous mode.
+ * | | We specifically do this as we don't
+ * v | want to receive our own traffic
+ * | | that we'll send out. We leave this
+ * | | state by requesting the set of
+ * | | dld/dls capabilities that we can
+ * v | process.
+ * | |
+ * | v
+ * | +--------------------+
+ * +--<-| VNS_S_CAPAB_Q_SENT | We loop over the set of
+ * | +--------------------+ capabilities that dld advertised
+ * | | and enable the ones that currently
+ * v | support for use. See the section
+ * | | later on regarding capabilities
+ * | | for more information. We leave this
+ * | | state by sending an enable request.
+ * v v
+ * | +--------------------+
+ * +--<-| VNS_S_CAPAB_E_SENT | Here we finish all capability
+ * | +--------------------+ initialization. Once finished, we
+ * | | transition to the next state. If
+ * v | the dld fast path is not available,
+ * | | we become a zombie.
+ * | v
+ * | +--------------+
+ * +--<-| VNS_S_ONLINE | This is a vnd STREAMS device's
+ * | +--------------+ steady state. It will normally
+ * | | reside in this state while it is in
+ * | | active use. It will only transition
+ * v | to the next state when the STREAMS
+ * | | device is closed by the character
+ * | | device. In this state, all data
+ * | | flows over the dld fast path.
+ * | v
+ * | +---------------------+
+ * +--<-| VNS_S_SHUTTING_DOWN | This vnd state takes care of
+ * | +---------------------+ disabling capabilities and then
+ * | | transitions to zombie state to
+ * v | indicate that it is finished.
+ * | v
+ * | +--------------+
+ * +--->| VNS_S_ZOMBIE | In this state, the vnd STREAMS
+ * +--------------+ device is waiting to finished being
+ * reaped.
+ *
+ * If the stream association fails for any reason the state machine reaches
+ * VNS_S_ZOMBIE. A more detailed vnd_errno_t will propagate back through the
+ * STREAMS ioctl to the character device. That will fail the user ioctl and
+ * propagate the vnd_errno_t back to userland. If, on the other hand, the
+ * association succeeds, then the vnd STREAMS device will be fully plumbed up
+ * and ready to transmit and receive message blocks. Consumers will be able to
+ * start using the other cbops(9E) entry points once the attach has fully
+ * finished, which will occur after the original user attach ioctl to the
+ * character device returns.
+ *
+ * --------------------
+ * General Architecture
+ * --------------------
+ *
+ * There are several different devices and structures in the vnd driver. There
+ * is a per-netstack component, pieces related to the character device that
+ * consumers see, the internal STREAMS device state, and the data queues
+ * themselves. The following ASCII art picture describes their relationships and
+ * some of the major pieces of data that contain them. These are not exhaustive,
+ * eg. synchronization primitives are left out.
+ *
+ * +----------------+ +-----------------+
+ * | global | | global |
+ * | device list | | netstack list |
+ * | vnd_dev_list | | vnd_nsd_list |
+ * +----------------+ +-----------------+
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+
+ * | | per-netstack data | ---> | per-netstack data | --> ...
+ * | | vnd_pnsd_t | | vnd_pnsd_t |
+ * | | | +-------------------+
+ * | | |
+ * | | nestackid_t ---+----> Netstack ID
+ * | | vnd_pnsd_flags_t -+----> Status flags
+ * | | zoneid_t ---+----> Zone ID for this netstack
+ * | | hook_family_t ---+----> VND IPv4 Hooks
+ * | | hook_family_t ---+----> VND IPv6 Hooks
+ * | | list_t ----+ |
+ * | +------------+------+
+ * | |
+ * | v
+ * | +------------------+ +------------------+
+ * | | character device | ---> | character device | -> ...
+ * +---------->| vnd_dev_t | | vnd_dev_t |
+ * | | +------------------+
+ * | |
+ * | minor_t ---+--> device minor number
+ * | ldi_handle_t ---+--> handle to /dev/net/%datalink
+ * | vnd_dev_flags_t -+--> device flags, non blocking, etc.
+ * | char[] ---+--> name if linked
+ * | vnd_str_t * -+ |
+ * +--------------+---+
+ * |
+ * v
+ * +-------------------------+
+ * | STREAMS device |
+ * | vnd_str_t |
+ * | |
+ * | vnd_str_state_t ---+---> State machine state
+ * | gsqueue_t * ---+---> mblk_t Serialization queue
+ * | vnd_str_stat_t ---+---> per-device kstats
+ * | vnd_str_capab_t ---+----------------------------+
+ * | vnd_data_queue_t ---+ | |
+ * | vnd_data_queue_t -+ | | v
+ * +-------------------+-+---+ +---------------------+
+ * | | | Stream capabilities |
+ * | | | vnd_str_capab_t |
+ * | | | |
+ * | | supported caps <--+-- vnd_capab_flags_t |
+ * | | dld cap handle <--+-- void * |
+ * | | direct tx func <--+-- vnd_dld_tx_t |
+ * | | +---------------------+
+ * | |
+ * +----------------+ +-------------+
+ * | |
+ * v v
+ * +-------------------+ +-------------------+
+ * | Read data queue | | Write data queue |
+ * | vnd_data_queue_t | | vnd_data_queue_t |
+ * | | | |
+ * | size_t ----+--> Current size | size_t ----+--> Current size
+ * | size_t ----+--> Max size | size_t ----+--> Max size
+ * | mblk_t * ----+--> Queue head | mblk_t * ----+--> Queue head
+ * | mblk_t * ----+--> Queue tail | mblk_t * ----+--> Queue tail
+ * +-------------------+ +-------------------+
+ *
+ *
+ * Globally, we maintain two lists. One list contains all of the character
+ * device soft states. The other maintains a list of all our netstack soft
+ * states. Each netstack maintains a list of active devices that have been
+ * associated with a datalink in its netstack.
+ *
+ * Recall that a given minor instance of the character device exists in one of
+ * two modes. It can either be a cloned open of /dev/vnd/ctl, the control node,
+ * or it can be associated with a given datalink. When minor instances are in
+ * the former state, they do not exist in a given vnd_pnsd_t's list of devices.
+ * As part of attaching to a datalink, the given vnd_dev_t will be inserted into
+ * the appropriate vnd_pnsd_t. In addition, this will cause a STREAMS device, a
+ * vnd_str_t, to be created and associated to a vnd_dev_t.
+ *
+ * The character device, and its vnd_dev_t, is the interface to the rest of the
+ * system. The vnd_dev_t keeps track of various aspects like whether various
+ * operations, such as read, write and the frameio ioctls, are considered
+ * blocking or non-blocking in the O_NONBLOCK sense. It also is responsible for
+ * keeping track of things like the name of the device, if any, in /dev. The
+ * vnd_str_t, on the other hand manages aspects like buffer sizes and the actual
+ * data queues. However, ioctls that manipulate these properties all go through
+ * the vnd_dev_t to its associated vnd_str_t.
+ *
+ * Each of the STREAMS devices, the vnd_str_t, maintains two data queues. One
+ * for frames to transmit (write queue) and one for frames received (read
+ * queue). These data queues have a maximum size and attempting to add data
+ * beyond that maximum size will result in data being dropped. The sizes are
+ * configurable via ioctls VND_IOC_SETTXBUF, VND_IOC_SETRXBUF. Data either sits
+ * in those buffers or has a reservation in those buffers while they are in vnd
+ * and waiting to be consumed by the user or by mac.
+ *
+ * Finally, the vnd_str_t also has a vnd_str_capab_t which we use to manage the
+ * available, negotiated, and currently active features.
+ *
+ * ----------------------
+ * Data Path and gsqueues
+ * ----------------------
+ *
+ * There's a lot of plumbing in vnd to get to the point where we can send data,
+ * but vnd's bread and butter is the data path, so it's worth diving into it in
+ * more detail. Data enters and exits the system from two ends.
+ *
+ * The first end is the vnd consumer. This comes in the form of read and write
+ * system calls as well as the frame I/O ioctls. The read and write system calls
+ * operate on a single frame at a time. Think of a frame as a single message
+ * that has come in off the wire, which may itself comprise multiple mblk_t's
+ * linked together in the kernel. readv(2) and writev(2) have the same
+ * limitations as read(2) and write(2). We enforce this as the system is
+ * required to fill up every uio(9S) buffer before moving onto the next one.
+ * This means that if you have a MTU sized buffer and two frames come in which
+ * are less than half of the MTU they must fill up the given iovec. Even if we
+ * didn't want to do this, we have no way of informing the supplier of the
+ * iovecs that they were only partially filled or where one frame ends and
+ * another begins. That's life, as such we have frame I/O which solves this
+ * problem. It allows for multiple frames to be consumed as well as for frames
+ * to be broken down into multiple vector components.
+ *
+ * The second end is the mac direct calls. As part of negotiating capabilities
+ * via dld, we give mac a function of ours to call when packets are received
+ * [vnd_mac_input()] and a callback to indicate that flow has been restored
+ * [vnd_mac_flow_control()]. In turn, we also get a function pointer that we can
+ * transmit data with. As part of the contract with mac, mac is allowed to flow
+ * control us by returning a cookie to the transmit function. When that happens,
+ * all outbound traffic is halted until our callback function is called and we
+ * can schedule drains.
+ *
+ * It's worth looking at these in further detail. We'll start with the rx path.
+ *
+ *
+ * |
+ * * . . . packets from gld
+ * |
+ * v
+ * +-------------+
+ * | mac |
+ * +-------------+
+ * |
+ * v
+ * +-------------+
+ * | dld |
+ * +-------------+
+ * |
+ * * . . . dld direct callback
+ * |
+ * v
+ * +---------------+
+ * | vnd_mac_input |
+ * +---------------+
+ * |
+ * v
+ * +---------+ +-------------+
+ * | dropped |<--*---------| vnd_hooks |
+ * | by | . +-------------+
+ * | hooks | . drop probe |
+ * +---------+ kstat bump * . . . Do we have free
+ * | buffer space?
+ * |
+ * no . | . yes
+ * . + .
+ * +---*--+------*-------+
+ * | |
+ * * . . drop probe * . . recv probe
+ * | kstat bump | kstat bump
+ * v |
+ * +---------+ * . . fire pollin
+ * | freemsg | v
+ * +---------+ +-----------------------+
+ * | vnd_str_t`vns_dq_read |
+ * +-----------------------+
+ * ^ ^
+ * +----------+ | | +---------+
+ * | read(9E) |-->-+ +--<--| frameio |
+ * +----------+ +---------+
+ *
+ * The rx path is rather linear. Packets come into us from mac. We always run
+ * them through the various hooks, and if they come out of that, we inspect the
+ * read data queue. If there is not enough space for a packet, we drop it.
+ * Otherwise, we append it to the data queue, and fire read notifications
+ * targetting anyone polling or doing blocking I/O on this device. Those
+ * consumers then drain the head of the data queue.
+ *
+ * The tx path is more complicated due to mac flow control. After any call into
+ * mac, we may have to potentially suspend writes and buffer data for an
+ * arbitrary amount of time. As such, we need to carefully track the total
+ * amount of outstanding data so that we don't waste kernel memory. This is
+ * further complicated by the fact that mac will asynchronously tell us when our
+ * flow has been resumed.
+ *
+ * For data to be able to enter the system, it needs to be able to take a
+ * reservation from the write data queue. Once the reservation has been
+ * obtained, we enter the gsqueue so that we can actually append it. We use
+ * gsqueues (serialization queues) to ensure that packets are manipulated in
+ * order as we deal with the draining and appending packets. We also leverage
+ * its worker thread to help us do draining after mac has restorted our flow.
+ *
+ * The following image describes the flow:
+ *
+ * +-----------+ +--------------+ +-------------------------+ +------+
+ * | write(9E) |-->| Space in the |--*--->| gsqueue_enter_one() |-->| Done |
+ * | frameio | | write queue? | . | +->vnd_squeue_tx_append | +------+
+ * +-----------+ +--------------+ . +-------------------------+
+ * | ^ .
+ * | | . reserve space from gsqueue
+ * | | |
+ * queue . . . * | space v
+ * full | * . . . avail +------------------------+
+ * v | | vnd_squeue_tx_append() |
+ * +--------+ +------------+ +------------------------+
+ * | EAGAIN |<--*------| Non-block? |<-+ |
+ * +--------+ . +------------+ | v
+ * . yes v | wait +--------------+
+ * no . .* * . . for | append chain |
+ * +----+ space | to outgoing |
+ * | mblk chain |
+ * from gsqueue +--------------+
+ * | |
+ * | +-------------------------------------------------+
+ * | |
+ * | | yes . . .
+ * v v .
+ * +-----------------------+ +--------------+ . +------+
+ * | vnd_squeue_tx_drain() |--->| mac blocked? |----*---->| Done |
+ * +-----------------------+ +--------------+ +------+
+ * | |
+ * +---------------------------------|---------------------+
+ * | | tx |
+ * | no . . * queue . . *
+ * | flow controlled . | empty * . fire pollout
+ * | . v | if mblk_t's
+ * +-------------+ . +---------------------+ | sent
+ * | set blocked |<----*------| vnd_squeue_tx_one() |--------^-------+
+ * | flags | +---------------------+ |
+ * +-------------+ More data | | | More data |
+ * and limit ^ v * . . and limit ^
+ * not reached . . * | | reached |
+ * +----+ | |
+ * v |
+ * +----------+ +-------------+ +---------------------------+
+ * | mac flow |--------->| remove mac |--->| gsqueue_enter_one() with |
+ * | control | | block flags | | vnd_squeue_tx_drain() and |
+ * | callback | +-------------+ | GSQUEUE_FILL flag, iff |
+ * +----------+ | not already scheduled |
+ * +---------------------------+
+ *
+ * The final path taken for a given write(9E)/frameio ioctl depends on whether
+ * or not the vnd_dev_t is non-blocking. That controls the initial path of
+ * trying to take a reservation in write data queue. If the device is in
+ * non-blocking mode, we'll return EAGAIN when there is not enough space
+ * available, otherwise, the calling thread blocks on the data queue.
+ *
+ * Today when we call into vnd_squeue_tx_drain() we will not try to drain the
+ * entire queue, as that could be quite large and we don't want to necessarily
+ * keep the thread that's doing the drain until it's been finished. Not only
+ * could more data be coming in, but the draining thread could be a userland
+ * thread that has more work to do. We have two limits today. There is an upper
+ * bound on the total amount of data and the total number of mblk_t chains. If
+ * we hit either limit, then we will schedule another drain in the gsqueue and
+ * go from there.
+ *
+ * It's worth taking some time to describe how we interact with gsqueues. vnd
+ * has a gsqueue_set_t for itself. It's important that it has its own set, as
+ * the profile of work that vnd does is different from other sub-systems in the
+ * kernel. When we open a STREAMS device in vnd_s_open, we get a random gsqueue.
+ * Unlike TCP/IP which uses an gsqueue for per TCP connection, we end up
+ * maintaining one for a given device. Because of that, we want to use a
+ * pseudo-random one to try and spread out the load, and picking one at random
+ * is likely to be just as good as any fancy algorithm we might come up with,
+ * especially as any two devices could have radically different transmit
+ * profiles.
+ *
+ * While some of the write path may seem complicated, it does allow us to
+ * maintain an important property. Once we have acknowledged a write(9E) or
+ * frameio ioctl, we will not drop the packet, excepting something like ipf via
+ * the firewall hooks.
+ *
+ * There is one other source of flow control that can exist in the system which
+ * is in the form of a barrier. The barrier is an internal mechanism used for
+ * ensuring that an gsqueue is drained for a given device. We use this as part
+ * of tearing down. Specifically we disable the write path so nothing new can be
+ * inserted into the gsqueue and then insert a barrier block. Once the barrier
+ * block comes out of the gsqueue, then we know nothing else in the gsqueue that
+ * could refer to the vnd_str_t, being destroyed, exists.
+ *
+ * ---------------------
+ * vnd, zones, netstacks
+ * ---------------------
+ *
+ * vnd devices are scoped to datalinks and datalinks are scoped to a netstack.
+ * Because of that, vnd is also a netstack module. It registers with the
+ * netstack sub-system and receives callbacks every time a netstack is created,
+ * being shutdown, and destroyed. The netstack callbacks drive the creation and
+ * destruction of the vnd_pnsd_t structures.
+ *
+ * Recall from the earlier architecture diagrams that every vnd device is scoped
+ * to a netstack and known about by a given vnd_pnsd_t. When that netstack is
+ * torn down, we also tear down any vnd devices that are hanging around. When
+ * the netstack is torn down, we know that any zones that are scoped to that
+ * netstack are being shut down and have no processes remaining. This is going
+ * to be the case whether they are shared or exclusive stack zones. We have to
+ * perform a careful dance.
+ *
+ * There are two different callbacks that happen on tear down, the first is a
+ * shutdown callback, the second is a destroy callback. When the shutdown
+ * callback is fired we need to prepare for the netstack to go away and ensure
+ * that nothing can continue to persist itself.
+ *
+ * More specifically, when we get notice of a stack being shutdown we first
+ * remove the netstack from the global netstack list to ensure that no one new
+ * can come in and find the netstack and get a reference to it. After that, we
+ * notify the neti hooks that they're going away. Once that's all done, we get
+ * to the heart of the matter.
+ *
+ * When shutting down there could be any number of outstanding contexts that
+ * have a reference on the vnd_pnsd_t and on the individual links. However, we
+ * know that no one new will be able to find the vnd_pnsd_t. To account for
+ * things that have existing references we mark the vnd_pnsd_t`vpnd_flags with
+ * VND_NS_CONDEMNED. This is checked by code paths that wish to append a device
+ * to the netstack's list. If this is set, then they must not append to it.
+ * Once this is set, we know that the netstack's list of devices can never grow,
+ * only shrink.
+ *
+ * Next, for each device we tag it with VND_D_ZONE_DYING. This indicates that
+ * the container for the device is being destroyed and that we should not allow
+ * additional references to the device to be created, whether via open, or
+ * linking. The presence of this bit also allows things like the list ioctl and
+ * sdev to know not to consider its existence. At the conclusion of this being
+ * set, we know that no one else should be able to obtain a new reference to the
+ * device.
+ *
+ * Once that has been set for all devices, we go through and remove any existing
+ * links that have been established in sdev. Because doing that may cause the
+ * final reference for the device to be dropped, which still has a reference to
+ * the netstack, we have to restart our walk due to dropped locks. We know that
+ * this walk will eventually complete because the device cannot be relinked and
+ * no new devices will be attached in this netstack due to VND_NS_CONDEMNED.
+ * Once that's finished, the shutdown callback returns.
+ *
+ * When we reach the destroy callback, we simply wait for references on the
+ * netstack to disappear. Because the zone has been shut down, all processes in
+ * it that have open references have been terminated and reaped. Any threads
+ * that are newly trying to reference it will fail. However, there is one thing
+ * that can halt this that we have no control over, which is the global zone
+ * holding open a reference to the device. In this case the zone halt will hang
+ * in vnd_stack_destroy. Once the last references is dropped we finish destroy
+ * the netinfo hooks and free the vnd_pnsd_t.
+ *
+ * ----
+ * sdev
+ * ----
+ *
+ * vnd registers a sdev plugin which allows it to dynamically fill out /dev/vnd
+ * for both the global and non-global zones. In any given zone we always supply
+ * a control node via /dev/vnd/ctl. This is the self-cloning node. Each zone
+ * will also have an entry per-link in that zone under /dev/vnd/%datalink, eg.
+ * if a link was named net0, there would be a /dev/vnd/net0. The global zone can
+ * also see every link for every zone, ala /dev/net, under
+ * /dev/vnd/%zonename/%datalink, eg. if a zone named 'turin' had a vnd device
+ * named net0, the global zone would have /dev/vnd/turin/net0.
+ *
+ * The sdev plugin has three interfaces that it supplies back to sdev. One is to
+ * validate that a given node is still valid. The next is a callback from sdev
+ * to say that it is no longer using the node. The third and final one is from
+ * sdev where it asks us to fill a directory. All of the heavy lifting is done
+ * in directory filling and in valiation. We opt not to maintain a reference on
+ * the device while there is an sdev node present. This makes the removal of
+ * nodes much simpler and most of the possible failure modes shouldn't cause any
+ * real problems. For example, the open path has to handle both dev_t's which no
+ * longer exist and which are no longer linked.
+ *
+ * -----
+ * hooks
+ * -----
+ *
+ * Like IP, vnd sends all L3 packets through its firewall hooks. Currently vnd
+ * provides these for L3 IP and IPv6 traffic. Each netstack provides these hooks
+ * in a minimal fashion. While we will allow traffic to be filtered through the
+ * hooks, we do not provide means for packet injection or additional inspection
+ * at this time. There are a total of four different events created:
+ *
+ * o IPv4 physical in
+ * o IPv4 physical out
+ * o IPv6 physical in
+ * o IPv6 physical out
+ *
+ * ---------------
+ * Synchronization
+ * ---------------
+ *
+ * To make our synchronization simpler, we've put more effort into making the
+ * metadata/setup paths do more work. That work allows the data paths to make
+ * assumptions around synchronization that simplify the general case. Each major
+ * structure, the vnd_pnsd_t, vnd_dev_t, vnd_str_t, and vnd_data_queue_t is
+ * annotated with the protection that its members receives. The following
+ * annotations are used:
+ *
+ * A Atomics; these values are only modified using atomics values.
+ * Currently this only applies to kstat values.
+ * E Existence; no lock is needed to access this member, it does not
+ * change while the structure is valid.
+ * GL Global Lock; these members are protected by the global
+ * vnd_dev_lock.
+ * L Locked; access to the member is controlled by a lock that is in
+ * the structure.
+ * NSL netstack lock; this member is protected by the containing
+ * netstack. This only applies to the vnd_dev_t`vdd_nslink.
+ * X This member is special, and is discussed in this section.
+ *
+ * In addition to locking, we also have reference counts on the vnd_dev_t and
+ * the vnd_pnsd_t. The reference counts describe the lifetimes of the structure.
+ * With rare exception, once a reference count is decremented, the consumer
+ * should not assume that the data is valid any more. The only exception to this
+ * is the case where we're removing an extant reference count from a link into
+ * /devices or /dev. Reference counts are obtained on these structures as a part
+ * of looking them up.
+ *
+ * # Global Lock Ordering
+ * ######################
+ *
+ * The following is the order that you must take locks in vnd:
+ *
+ * 1) vnd`vnd_dev_lock
+ * 2) vnd_pnsd_t`vpnd_lock
+ * 3) vnd_dev_t`vnd_lock
+ * 4) vnd_str_t`vns_lock
+ * 5) vnd_data_queue_t`vdq_lock
+ *
+ * One must adhere to the following rules:
+ *
+ * o You must acquire a lower numbered lock before a high numbered lock.
+ * o It is NOT legal to hold two locks of the same level concurrently, eg. you
+ * can not hold two different vnd_dev_t's vnd_lock at the same time.
+ * o You may release locks in any order.
+ * o If you release a lock, you must honor the locking rules before acquiring
+ * it again.
+ * o You should not hold any locks when calling any of the rele functions.
+ *
+ * # Special Considerations
+ * ########################
+ *
+ * While most of the locking is what's expected, it's worth going into the
+ * special nature that a few members hold. Today, only two structures have
+ * special considerations: the vnd_dev_t and the vnd_str_t. All members with
+ * special considerations have an additional annotation that describes how you
+ * should interact with it.
+ *
+ * vnd_dev_t: The vdd_nsd and vdd_cr are only valid when the minor node is
+ * attached or in the process of attaching. If the code path that goes through
+ * requires an attached vnd_dev_t, eg. the data path and tear down path, then it
+ * is always legal to dereference that member without a lock held. When they are
+ * added to the system, they should be done under the vdd_lock and done as part
+ * of setting the VND_D_ATTACH_INFLIGHT flag. These should not change during the
+ * lifetime of the vnd_dev_t.
+ *
+ * vnd_dev_t: The vdd_ldih is similar to the vdd_nsd and vdd_cr, except that it
+ * always exists as it is a part of the structure. The only time that it's valid
+ * to be using it is during the attach path with the VND_D_ATTACH_INFLIGHT flag
+ * set or during tear down. Outside of those paths which are naturally
+ * serialized, there is no explicit locking around the member.
+ *
+ * vnd_str_t: The vns_dev and vns_nsd work in similar ways. They are not
+ * initially set as part of creating the structure, but are set as part of
+ * responding to the association ioctl. Anything in the data path or metadata
+ * path that requires association may assume that they exist, as we do not kick
+ * off the state machine until they're set.
+ *
+ * vnd_str_t: The vns_drainblk and vns_barrierblk are similarly special. The
+ * members are designed to be used as part of various operations with the
+ * gsqueues. A lock isn't needed to use them, but to work with them, the
+ * appropriate flag in the vnd_str_t`vns_flags must have been set by the current
+ * thread. Otherwise, it is always fair game to refer to their addresses. Their
+ * contents are ignored by vnd, but some members are manipulated by the gsqueue
+ * subsystem.
+ */
+
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/ddi.h>
+#include <sys/ethernet.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/ksynch.h>
+#include <sys/taskq_impl.h>
+#include <sys/sdt.h>
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/dlpi.h>
+#include <sys/cred.h>
+#include <sys/id_space.h>
+#include <sys/list.h>
+#include <sys/ctype.h>
+#include <sys/policy.h>
+#include <sys/sunldi.h>
+#include <sys/cred.h>
+#include <sys/strsubr.h>
+#include <sys/poll.h>
+#include <sys/neti.h>
+#include <sys/hook.h>
+#include <sys/hook_event.h>
+#include <sys/vlan.h>
+#include <sys/dld.h>
+#include <sys/mac_client.h>
+#include <sys/netstack.h>
+#include <sys/fs/sdev_plugin.h>
+#include <sys/kstat.h>
+#include <sys/atomic.h>
+#include <sys/disp.h>
+#include <sys/random.h>
+#include <sys/gsqueue.h>
+
+#include <inet/ip.h>
+#include <inet/ip6.h>
+
+#include <sys/vnd.h>
+
+/*
+ * Globals
+ */
+static dev_info_t *vnd_dip;
+static taskq_t *vnd_taskq;
+static kmem_cache_t *vnd_str_cache;
+static kmem_cache_t *vnd_dev_cache;
+static kmem_cache_t *vnd_pnsd_cache;
+static id_space_t *vnd_minors;
+static int vnd_list_init = 0;
+static sdev_plugin_hdl_t vnd_sdev_hdl;
+static gsqueue_set_t *vnd_sqset;
+
+static kmutex_t vnd_dev_lock;
+static list_t vnd_dev_list; /* Protected by the vnd_dev_lock */
+static list_t vnd_nsd_list; /* Protected by the vnd_dev_lock */
+
+/*
+ * STREAMs ioctls
+ *
+ * The STREAMs ioctls are internal to vnd. No one should be seeing them, as such
+ * they aren't a part of the header file.
+ */
+#define VND_STRIOC (('v' << 24) | ('n' << 16) | ('d' << 8) | 0x80)
+
+/*
+ * Private ioctl to associate a given streams instance with a minor instance of
+ * the character device.
+ */
+#define VND_STRIOC_ASSOCIATE (VND_STRIOC | 0x1)
+
+typedef struct vnd_strioc_associate {
+ minor_t vsa_minor; /* minor device node */
+ netstackid_t vsa_nsid; /* netstack id */
+ vnd_errno_t vsa_errno; /* errno */
+} vnd_strioc_associate_t;
+
+typedef enum vnd_strioc_state {
+ VSS_UNKNOWN = 0,
+ VSS_COPYIN = 1,
+ VSS_COPYOUT = 2,
+} vnd_strioc_state_t;
+
+typedef struct vnd_strioc {
+ vnd_strioc_state_t vs_state;
+ caddr_t vs_addr;
+} vnd_strioc_t;
+
+/*
+ * VND SQUEUE TAGS, start at 0x42 so we don't overlap with extent tags. Though
+ * really, overlap is at the end of the day, inevitable.
+ */
+#define VND_SQUEUE_TAG_TX_DRAIN 0x42
+#define VND_SQUEUE_TAG_MAC_FLOW_CONTROL 0x43
+#define VND_SQUEUE_TAG_VND_WRITE 0x44
+#define VND_SQUEUE_TAG_ND_FRAMEIO_WRITE 0x45
+#define VND_SQUEUE_TAG_STRBARRIER 0x46
+
+/*
+ * vnd reserved names. These are names which are reserved by vnd and thus
+ * shouldn't be used by some external program.
+ */
+static char *vnd_reserved_names[] = {
+ "ctl",
+ "zone",
+ NULL
+};
+
+/*
+ * vnd's DTrace probe macros
+ *
+ * DTRACE_VND* are all for a stable provider. We also have an unstable internal
+ * set of probes for reference count manipulation.
+ */
+#define DTRACE_VND3(name, type1, arg1, type2, arg2, type3, arg3) \
+ DTRACE_PROBE3(__vnd_##name, type1, arg1, type2, arg2, type3, arg3);
+
+#define DTRACE_VND4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
+ DTRACE_PROBE4(__vnd_##name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4);
+
+#define DTRACE_VND5(name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5) \
+ DTRACE_PROBE5(__vnd_##name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5);
+
+#define DTRACE_VND_REFINC(vdp) \
+ DTRACE_PROBE2(vnd__ref__inc, vnd_dev_t *, vdp, int, vdp->vdd_ref);
+#define DTRACE_VND_REFDEC(vdp) \
+ DTRACE_PROBE2(vnd__ref__dec, vnd_dev_t *, vdp, int, vdp->vdd_ref);
+
+
+/*
+ * Tunables
+ */
+size_t vnd_vdq_default_size = 1024 * 64; /* 64 KB */
+size_t vnd_vdq_hard_max = 1024 * 1024 * 4; /* 4 MB */
+
+/*
+ * These numbers are designed as per-device tunables that are applied when a new
+ * vnd device is attached. They're a rough stab at what may be a reasonable
+ * amount of work to do in one burst in an squeue.
+ */
+size_t vnd_flush_burst_size = 1520 * 10; /* 10 1500 MTU packets */
+size_t vnd_flush_nburst = 10; /* 10 frames */
+
+/*
+ * Constants related to our sdev plugins
+ */
+#define VND_SDEV_NAME "vnd"
+#define VND_SDEV_ROOT "/dev/vnd"
+#define VND_SDEV_ZROOT "/dev/vnd/zone"
+
+/*
+ * Statistic macros
+ */
+#define VND_STAT_INC(vsp, field, val) \
+ atomic_add_64(&(vsp)->vns_ksdata.field.value.ui64, val)
+#define VND_LATENCY_1MS 1000000
+#define VND_LATENCY_10MS 10000000
+#define VND_LATENCY_100MS 100000000
+#define VND_LATENCY_1S 1000000000
+#define VND_LATENCY_10S 10000000000
+
+/*
+ * Constants for vnd hooks
+ */
+static uint8_t vnd_bcast_addr[6] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+#define IPV4_MCAST_LEN 3
+static uint8_t vnd_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
+#define IPV6_MCAST_LEN 2
+static uint8_t vnd_ipv6_mcast[2] = { 0x33, 0x33 };
+
+/*
+ * vnd internal data structures and types
+ */
+
+struct vnd_str;
+struct vnd_dev;
+struct vnd_pnsd;
+
+/*
+ * As part of opening the device stream we need to properly communicate with our
+ * underlying stream. This is a bit of an asynchronous dance and we need to
+ * properly work with dld to get everything set up. We have to initiate the
+ * conversation with dld and as such we keep track of our state here.
+ */
+typedef enum vnd_str_state {
+ VNS_S_INITIAL = 0,
+ VNS_S_INFO_SENT,
+ VNS_S_EXCLUSIVE_SENT,
+ VNS_S_ATTACH_SENT,
+ VNS_S_BIND_SENT,
+ VNS_S_SAP_PROMISC_SENT,
+ VNS_S_MULTI_PROMISC_SENT,
+ VNS_S_RX_ONLY_PROMISC_SENT,
+ VNS_S_CAPAB_Q_SENT,
+ VNS_S_CAPAB_E_SENT,
+ VNS_S_ONLINE,
+ VNS_S_SHUTTING_DOWN,
+ VNS_S_ZOMBIE
+} vnd_str_state_t;
+
+typedef enum vnd_str_flags {
+ VNS_F_NEED_ZONE = 0x1,
+ VNS_F_TASKQ_DISPATCHED = 0x2,
+ VNS_F_CONDEMNED = 0x4,
+ VNS_F_FLOW_CONTROLLED = 0x8,
+ VNS_F_DRAIN_SCHEDULED = 0x10,
+ VNS_F_BARRIER = 0x20,
+ VNS_F_BARRIER_DONE = 0x40
+} vnd_str_flags_t;
+
+typedef enum vnd_capab_flags {
+ VNS_C_HCKSUM = 0x1,
+ VNS_C_DLD = 0x2,
+ VNS_C_DIRECT = 0x4,
+ VNS_C_HCKSUM_BADVERS = 0x8
+} vnd_capab_flags_t;
+
+/*
+ * Definitions to interact with direct callbacks
+ */
+typedef void (*vnd_rx_t)(struct vnd_str *, mac_resource_t *, mblk_t *,
+ mac_header_info_t *);
+typedef uintptr_t vnd_mac_cookie_t;
+/* DLD Direct capability function */
+typedef int (*vnd_dld_cap_t)(void *, uint_t, void *, uint_t);
+/* DLD Direct tx function */
+typedef vnd_mac_cookie_t (*vnd_dld_tx_t)(void *, mblk_t *, uint64_t, uint16_t);
+/* DLD Direct function to set flow control callback */
+typedef void *(*vnd_dld_set_fcb_t)(void *, void (*)(void *, vnd_mac_cookie_t),
+ void *);
+/* DLD Direct function to see if flow controlled still */
+typedef int (*vnd_dld_is_fc_t)(void *, vnd_mac_cookie_t);
+
+/*
+ * The vnd_str_capab_t is always protected by the vnd_str_t it's a member of.
+ */
+typedef struct vnd_str_capab {
+ vnd_capab_flags_t vsc_flags;
+ t_uscalar_t vsc_hcksum_opts;
+ vnd_dld_cap_t vsc_capab_f;
+ void *vsc_capab_hdl;
+ vnd_dld_tx_t vsc_tx_f;
+ void *vsc_tx_hdl;
+ vnd_dld_set_fcb_t vsc_set_fcb_f;
+ void *vsc_set_fcb_hdl;
+ vnd_dld_is_fc_t vsc_is_fc_f;
+ void *vsc_is_fc_hdl;
+ vnd_mac_cookie_t vsc_fc_cookie;
+ void *vsc_tx_fc_hdl;
+} vnd_str_capab_t;
+
+/*
+ * The vnd_data_queue is a simple construct for storing a series of messages in
+ * a queue.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_data_queue {
+ struct vnd_str *vdq_vns; /* E */
+ kmutex_t vdq_lock;
+ kcondvar_t vdq_ready; /* Uses vdq_lock */
+ ssize_t vdq_max; /* L */
+ ssize_t vdq_cur; /* L */
+ mblk_t *vdq_head; /* L */
+ mblk_t *vdq_tail; /* L */
+} vnd_data_queue_t;
+
+typedef struct vnd_str_stat {
+ kstat_named_t vks_rbytes;
+ kstat_named_t vks_rpackets;
+ kstat_named_t vks_obytes;
+ kstat_named_t vks_opackets;
+ kstat_named_t vks_nhookindrops;
+ kstat_named_t vks_nhookoutdrops;
+ kstat_named_t vks_ndlpidrops;
+ kstat_named_t vks_ndataindrops;
+ kstat_named_t vks_ndataoutdrops;
+ kstat_named_t vks_tdrops;
+ kstat_named_t vks_linkname;
+ kstat_named_t vks_zonename;
+ kstat_named_t vks_nmacflow;
+ kstat_named_t vks_tmacflow;
+ kstat_named_t vks_mac_flow_1ms;
+ kstat_named_t vks_mac_flow_10ms;
+ kstat_named_t vks_mac_flow_100ms;
+ kstat_named_t vks_mac_flow_1s;
+ kstat_named_t vks_mac_flow_10s;
+} vnd_str_stat_t;
+
+/*
+ * vnd stream structure
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_str {
+ kmutex_t vns_lock;
+ kcondvar_t vns_cancelcv; /* Uses vns_lock */
+ kcondvar_t vns_barriercv; /* Uses vns_lock */
+ kcondvar_t vns_stcv; /* Uses vns_lock */
+ vnd_str_state_t vns_state; /* L */
+ vnd_str_state_t vns_laststate; /* L */
+ vnd_errno_t vns_errno; /* L */
+ vnd_str_flags_t vns_flags; /* L */
+ vnd_str_capab_t vns_caps; /* L */
+ taskq_ent_t vns_tqe; /* L */
+ vnd_data_queue_t vns_dq_read; /* E */
+ vnd_data_queue_t vns_dq_write; /* E */
+ mblk_t *vns_dlpi_inc; /* L */
+ queue_t *vns_rq; /* E */
+ queue_t *vns_wq; /* E */
+ queue_t *vns_lrq; /* E */
+ t_uscalar_t vns_dlpi_style; /* L */
+ t_uscalar_t vns_minwrite; /* L */
+ t_uscalar_t vns_maxwrite; /* L */
+ hrtime_t vns_fclatch; /* L */
+ hrtime_t vns_fcupdate; /* L */
+ kstat_t *vns_kstat; /* E */
+ gsqueue_t *vns_squeue; /* E */
+ mblk_t vns_drainblk; /* E + X */
+ mblk_t vns_barrierblk; /* E + X */
+ vnd_str_stat_t vns_ksdata; /* A */
+ size_t vns_nflush; /* L */
+ size_t vns_bsize; /* L */
+ struct vnd_dev *vns_dev; /* E + X */
+ struct vnd_pnsd *vns_nsd; /* E + X */
+} vnd_str_t;
+
+typedef enum vnd_dev_flags {
+ VND_D_ATTACH_INFLIGHT = 0x001,
+ VND_D_ATTACHED = 0x002,
+ VND_D_LINK_INFLIGHT = 0x004,
+ VND_D_LINKED = 0x008,
+ VND_D_CONDEMNED = 0x010,
+ VND_D_ZONE_DYING = 0x020,
+ VND_D_OPENED = 0x040
+} vnd_dev_flags_t;
+
+/*
+ * This represents the data associated with a minor device instance.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_dev {
+ kmutex_t vdd_lock;
+ list_node_t vdd_link; /* GL */
+ list_node_t vdd_nslink; /* NSL */
+ int vdd_ref; /* L */
+ vnd_dev_flags_t vdd_flags; /* L */
+ minor_t vdd_minor; /* E */
+ dev_t vdd_devid; /* E */
+ ldi_ident_t vdd_ldiid; /* E */
+ ldi_handle_t vdd_ldih; /* X */
+ cred_t *vdd_cr; /* X */
+ vnd_str_t *vdd_str; /* L */
+ struct pollhead vdd_ph; /* E */
+ struct vnd_pnsd *vdd_nsd; /* E + X */
+ char vdd_datalink[VND_NAMELEN]; /* L */
+ char vdd_lname[VND_NAMELEN]; /* L */
+} vnd_dev_t;
+
+typedef enum vnd_pnsd_flags {
+ VND_NS_CONDEMNED = 0x1
+} vnd_pnsd_flags_t;
+
+/*
+ * Per netstack data structure.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_pnsd {
+ list_node_t vpnd_link; /* protected by global dev lock */
+ zoneid_t vpnd_zid; /* E */
+ netstackid_t vpnd_nsid; /* E */
+ boolean_t vpnd_hooked; /* E */
+ net_handle_t vpnd_neti_v4; /* E */
+ hook_family_t vpnd_family_v4; /* E */
+ hook_event_t vpnd_event_in_v4; /* E */
+ hook_event_t vpnd_event_out_v4; /* E */
+ hook_event_token_t vpnd_token_in_v4; /* E */
+ hook_event_token_t vpnd_token_out_v4; /* E */
+ net_handle_t vpnd_neti_v6; /* E */
+ hook_family_t vpnd_family_v6; /* E */
+ hook_event_t vpnd_event_in_v6; /* E */
+ hook_event_t vpnd_event_out_v6; /* E */
+ hook_event_token_t vpnd_token_in_v6; /* E */
+ hook_event_token_t vpnd_token_out_v6; /* E */
+ kmutex_t vpnd_lock; /* Protects remaining members */
+ kcondvar_t vpnd_ref_change; /* Uses vpnd_lock */
+ int vpnd_ref; /* L */
+ vnd_pnsd_flags_t vpnd_flags; /* L */
+ list_t vpnd_dev_list; /* L */
+} vnd_pnsd_t;
+
+static void vnd_squeue_tx_drain(void *, mblk_t *, gsqueue_t *, void *);
+
+/*
+ * Drop function signature.
+ */
+typedef void (*vnd_dropper_f)(vnd_str_t *, mblk_t *, const char *);
+
+static void
+vnd_drop_ctl(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__ctl, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_in(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__in, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndataindrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_out(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__out, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndataoutdrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_hook_in(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__in, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_nhookindrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_hook_out(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__out, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_nhookoutdrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_panic(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ panic("illegal vnd drop");
+}
+
+static void
+vnd_mac_drop_input(vnd_str_t *vsp, mac_resource_t *unused, mblk_t *mp_chain,
+ mac_header_info_t *mhip)
+{
+ mblk_t *mp;
+
+ while (mp_chain != NULL) {
+ mp = mp_chain;
+ mp_chain = mp->b_next;
+ vnd_drop_hook_in(vsp, mp, "stream not associated");
+ }
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup(netstackid_t nsid)
+{
+ vnd_pnsd_t *nsp;
+
+ mutex_enter(&vnd_dev_lock);
+ for (nsp = list_head(&vnd_nsd_list); nsp != NULL;
+ nsp = list_next(&vnd_nsd_list, nsp)) {
+ if (nsp->vpnd_nsid == nsid) {
+ mutex_enter(&nsp->vpnd_lock);
+ VERIFY(nsp->vpnd_ref >= 0);
+ nsp->vpnd_ref++;
+ mutex_exit(&nsp->vpnd_lock);
+ break;
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (nsp);
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup_by_zid(zoneid_t zid)
+{
+ netstack_t *ns;
+ vnd_pnsd_t *nsp;
+ ns = netstack_find_by_zoneid(zid);
+ if (ns == NULL)
+ return (NULL);
+ nsp = vnd_nsd_lookup(ns->netstack_stackid);
+ netstack_rele(ns);
+ return (nsp);
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup_by_zonename(char *zname)
+{
+ zone_t *zonep;
+ vnd_pnsd_t *nsp;
+
+ zonep = zone_find_by_name(zname);
+ if (zonep == NULL)
+ return (NULL);
+
+ nsp = vnd_nsd_lookup_by_zid(zonep->zone_id);
+ zone_rele(zonep);
+ return (nsp);
+}
+
+static void
+vnd_nsd_ref(vnd_pnsd_t *nsp)
+{
+ mutex_enter(&nsp->vpnd_lock);
+ /*
+ * This can only be used on something that has been obtained through
+ * some other means. As such, the caller should already have a reference
+ * before adding another one. This function should not be used as a
+ * means of creating the initial reference.
+ */
+ VERIFY(nsp->vpnd_ref > 0);
+ nsp->vpnd_ref++;
+ mutex_exit(&nsp->vpnd_lock);
+ cv_broadcast(&nsp->vpnd_ref_change);
+}
+
+static void
+vnd_nsd_rele(vnd_pnsd_t *nsp)
+{
+ mutex_enter(&nsp->vpnd_lock);
+ VERIFY(nsp->vpnd_ref > 0);
+ nsp->vpnd_ref--;
+ mutex_exit(&nsp->vpnd_lock);
+ cv_broadcast(&nsp->vpnd_ref_change);
+}
+
+static vnd_dev_t *
+vnd_dev_lookup(minor_t m)
+{
+ vnd_dev_t *vdp;
+ mutex_enter(&vnd_dev_lock);
+ for (vdp = list_head(&vnd_dev_list); vdp != NULL;
+ vdp = list_next(&vnd_dev_list, vdp)) {
+ if (vdp->vdd_minor == m) {
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ mutex_exit(&vdp->vdd_lock);
+ break;
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (vdp);
+}
+
+static void
+vnd_dev_free(vnd_dev_t *vdp)
+{
+ /*
+ * When the STREAM exists we need to go through and make sure
+ * communication gets torn down. As part of closing the stream, we
+ * guarantee that nothing else should be able to enter the stream layer
+ * at this point. That means no one should be able to call
+ * read(),write() or one of the frameio ioctls.
+ */
+ if (vdp->vdd_flags & VND_D_ATTACHED) {
+ ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ crfree(vdp->vdd_cr);
+ vdp->vdd_cr = NULL;
+
+ /*
+ * We have to remove ourselves from our parents list now. It is
+ * really quite important that we have already set the condemend
+ * flag here so that our containing netstack basically knows
+ * that we're on the way down and knows not to wait for us. It's
+ * also important that we do that before we put a rele on the
+ * the device as that is the point at which it will check again.
+ */
+ mutex_enter(&vdp->vdd_nsd->vpnd_lock);
+ list_remove(&vdp->vdd_nsd->vpnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ vnd_nsd_rele(vdp->vdd_nsd);
+ vdp->vdd_nsd = NULL;
+ }
+ ASSERT(vdp->vdd_flags & VND_D_CONDEMNED);
+ id_free(vnd_minors, vdp->vdd_minor);
+ mutex_destroy(&vdp->vdd_lock);
+ kmem_cache_free(vnd_dev_cache, vdp);
+}
+
+static void
+vnd_dev_ref(vnd_dev_t *vdp)
+{
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ mutex_exit(&vdp->vdd_lock);
+}
+
+/*
+ * As part of releasing the hold on this we may tear down a given vnd_dev_t As
+ * such we need to make sure that we grab the list lock first before grabbing
+ * the vnd_dev_t's lock to ensure proper lock ordering.
+ */
+static void
+vnd_dev_rele(vnd_dev_t *vdp)
+{
+ mutex_enter(&vnd_dev_lock);
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref--;
+ DTRACE_VND_REFDEC(vdp);
+ if (vdp->vdd_ref > 0) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ return;
+ }
+
+ /*
+ * Now that we've removed this from the list, we can go ahead and
+ * drop the list lock. No one else can find this device and reference
+ * it. As its reference count is zero, it by definition does not have
+ * any remaining entries in /devices that could lead someone back to
+ * this.
+ */
+ vdp->vdd_flags |= VND_D_CONDEMNED;
+ list_remove(&vnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+
+ vnd_dev_free(vdp);
+}
+
+/*
+ * Insert a mesage block chain if there's space, otherwise drop it. Return one
+ * so someone who was waiting for data would now end up having found it. eg.
+ * caller should consider a broadcast.
+ */
+static int
+vnd_dq_push(vnd_data_queue_t *vqp, mblk_t *mp, boolean_t reserved,
+ vnd_dropper_f dropf)
+{
+ size_t msize;
+
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ if (reserved == B_FALSE) {
+ msize = msgsize(mp);
+ if (vqp->vdq_cur + msize > vqp->vdq_max) {
+ dropf(vqp->vdq_vns, mp, "buffer full");
+ return (0);
+ }
+ vqp->vdq_cur += msize;
+ }
+
+ if (vqp->vdq_head == NULL) {
+ ASSERT(vqp->vdq_tail == NULL);
+ vqp->vdq_head = mp;
+ vqp->vdq_tail = mp;
+ } else {
+ vqp->vdq_tail->b_next = mp;
+ vqp->vdq_tail = mp;
+ }
+
+ return (1);
+}
+
+/*
+ * Remove a message message block chain. If the amount of space in the buffer
+ * has changed we return 1. We have no way of knowing whether or not there is
+ * enough space overall for a given writer who is blocked, so we always end up
+ * having to return true and thus tell consumers that they should consider
+ * signalling.
+ */
+static int
+vnd_dq_pop(vnd_data_queue_t *vqp, mblk_t **mpp)
+{
+ size_t msize;
+ mblk_t *mp;
+
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(mpp != NULL);
+ if (vqp->vdq_head == NULL) {
+ ASSERT(vqp->vdq_tail == NULL);
+ *mpp = NULL;
+ return (0);
+ }
+
+ mp = vqp->vdq_head;
+ msize = msgsize(mp);
+
+ vqp->vdq_cur -= msize;
+ if (mp->b_next == NULL) {
+ vqp->vdq_head = NULL;
+ vqp->vdq_tail = NULL;
+ /*
+ * We can't be certain that this is always going to be zero.
+ * Someone may have basically taken a reservation of space on
+ * the data queue, eg. claimed spae but not yet pushed it on
+ * yet.
+ */
+ ASSERT(vqp->vdq_cur >= 0);
+ } else {
+ vqp->vdq_head = mp->b_next;
+ ASSERT(vqp->vdq_cur > 0);
+ }
+ mp->b_next = NULL;
+ *mpp = mp;
+ return (1);
+}
+
+/*
+ * Reserve space in the queue. This will bump up the size of the queue and
+ * entitle the user to push something on later without bumping the space.
+ */
+static int
+vnd_dq_reserve(vnd_data_queue_t *vqp, ssize_t size)
+{
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(size >= 0);
+
+ if (size == 0)
+ return (0);
+
+ if (size + vqp->vdq_cur > vqp->vdq_max)
+ return (0);
+
+ vqp->vdq_cur += size;
+ return (1);
+}
+
+static void
+vnd_dq_unreserve(vnd_data_queue_t *vqp, ssize_t size)
+{
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(size > 0);
+ ASSERT(size <= vqp->vdq_cur);
+
+ vqp->vdq_cur -= size;
+}
+
+static void
+vnd_dq_flush(vnd_data_queue_t *vqp, vnd_dropper_f dropf)
+{
+ mblk_t *mp, *next;
+
+ mutex_enter(&vqp->vdq_lock);
+ for (mp = vqp->vdq_head; mp != NULL; mp = next) {
+ next = mp->b_next;
+ mp->b_next = NULL;
+ dropf(vqp->vdq_vns, mp, "vnd_dq_flush");
+ }
+ vqp->vdq_cur = 0;
+ vqp->vdq_head = NULL;
+ vqp->vdq_tail = NULL;
+ mutex_exit(&vqp->vdq_lock);
+}
+
+static boolean_t
+vnd_dq_is_empty(vnd_data_queue_t *vqp)
+{
+ boolean_t ret;
+
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_head == NULL)
+ ret = B_TRUE;
+ else
+ ret = B_FALSE;
+ mutex_exit(&vqp->vdq_lock);
+
+ return (ret);
+}
+
+/*
+ * Get a network uint16_t from the message and translate it into something the
+ * host understands.
+ */
+static int
+vnd_mbc_getu16(mblk_t *mp, off_t off, uint16_t *out)
+{
+ size_t mpsize;
+ uint8_t *bp;
+
+ mpsize = msgsize(mp);
+ /* Check for overflow */
+ if (off + sizeof (uint16_t) > mpsize)
+ return (1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+
+ /*
+ * Data is in network order. Note the second byte of data might be in
+ * the next mp.
+ */
+ bp = mp->b_rptr + off;
+ *out = *bp << 8;
+ if (off + 1 == mpsize) {
+ mp = mp->b_cont;
+ bp = mp->b_rptr;
+ } else {
+ bp++;
+ }
+
+ *out |= *bp;
+ return (0);
+}
+
+/*
+ * Given an mblk chain find the mblk and address of a particular offset.
+ */
+static int
+vnd_mbc_getoffset(mblk_t *mp, off_t off, mblk_t **mpp, uintptr_t *offp)
+{
+ size_t mpsize;
+
+ if (off >= msgsize(mp))
+ return (1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+ *mpp = mp;
+ *offp = (uintptr_t)mp->b_rptr + off;
+
+ return (0);
+}
+
+/*
+ * Fetch the destination mac address. Set *dstp to that mac address. If the data
+ * is not contiguous in the first mblk_t, fill in datap and set *dstp to it.
+ */
+static int
+vnd_mbc_getdstmac(mblk_t *mp, uint8_t **dstpp, uint8_t *datap)
+{
+ int i;
+
+ if (MBLKL(mp) >= ETHERADDRL) {
+ *dstpp = mp->b_rptr;
+ return (0);
+ }
+
+ *dstpp = datap;
+ for (i = 0; i < ETHERADDRL; i += 2, datap += 2) {
+ if (vnd_mbc_getu16(mp, i, (uint16_t *)datap) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+vnd_hook(vnd_str_t *vsp, mblk_t **mpp, net_handle_t netiv4, hook_event_t hev4,
+ hook_event_token_t hetv4, net_handle_t netiv6, hook_event_t hev6,
+ hook_event_token_t hetv6, vnd_dropper_f hdrop, vnd_dropper_f ddrop)
+{
+ uint16_t etype;
+ int vlan = 0;
+ hook_pkt_event_t info;
+ size_t offset, mblen;
+ uint8_t *dstp;
+ uint8_t dstaddr[6];
+ hook_event_t he;
+ hook_event_token_t het;
+ net_handle_t neti;
+
+ /*
+ * Before we can ask if we're interested we have to do enough work to
+ * determine the ethertype.
+ */
+
+ /* Byte 12 is either the VLAN tag or the ethertype */
+ if (vnd_mbc_getu16(*mpp, 12, &etype) != 0) {
+ ddrop(vsp, *mpp, "packet has incomplete ethernet header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ if (etype == ETHERTYPE_VLAN) {
+ vlan = 1;
+ /* Actual ethertype is another four bytes in */
+ if (vnd_mbc_getu16(*mpp, 16, &etype) != 0) {
+ ddrop(vsp, *mpp,
+ "packet has incomplete ethernet vlan header");
+ *mpp = NULL;
+ return (1);
+ }
+ offset = sizeof (struct ether_vlan_header);
+ } else {
+ offset = sizeof (struct ether_header);
+ }
+
+ /*
+ * At the moment we only hook on the kinds of things that the IP module
+ * would normally.
+ */
+ if (etype != ETHERTYPE_IP && etype != ETHERTYPE_IPV6)
+ return (0);
+
+ if (etype == ETHERTYPE_IP) {
+ neti = netiv4;
+ he = hev4;
+ het = hetv4;
+ } else {
+ neti = netiv6;
+ he = hev6;
+ het = hetv6;
+ }
+
+ if (!he.he_interested)
+ return (0);
+
+
+ if (vnd_mbc_getdstmac(*mpp, &dstp, dstaddr) != 0) {
+ ddrop(vsp, *mpp, "packet has incomplete ethernet header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ /*
+ * Now that we know we're interested, we have to do some additional
+ * sanity checking for IPF's sake, ala ip_check_length(). Specifically
+ * we need to check to make sure that the remaining packet size,
+ * excluding MAC, is at least the size of an IP header.
+ */
+ mblen = msgsize(*mpp);
+ if ((etype == ETHERTYPE_IP &&
+ mblen - offset < IP_SIMPLE_HDR_LENGTH) ||
+ (etype == ETHERTYPE_IPV6 && mblen - offset < IPV6_HDR_LEN)) {
+ ddrop(vsp, *mpp, "packet has invalid IP header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ info.hpe_protocol = neti;
+ info.hpe_ifp = (phy_if_t)vsp;
+ info.hpe_ofp = (phy_if_t)vsp;
+ info.hpe_mp = mpp;
+ info.hpe_flags = 0;
+
+ if (bcmp(vnd_bcast_addr, dstp, ETHERADDRL) == 0)
+ info.hpe_flags |= HPE_BROADCAST;
+ else if (etype == ETHERTYPE_IP &&
+ bcmp(vnd_ipv4_mcast, vnd_bcast_addr, IPV4_MCAST_LEN) == 0)
+ info.hpe_flags |= HPE_MULTICAST;
+ else if (etype == ETHERTYPE_IPV6 &&
+ bcmp(vnd_ipv6_mcast, vnd_bcast_addr, IPV6_MCAST_LEN) == 0)
+ info.hpe_flags |= HPE_MULTICAST;
+
+ if (vnd_mbc_getoffset(*mpp, offset, &info.hpe_mb,
+ (uintptr_t *)&info.hpe_hdr) != 0) {
+ ddrop(vsp, *mpp, "packet too small -- "
+ "unable to find payload");
+ *mpp = NULL;
+ return (1);
+ }
+
+ if (hook_run(neti->netd_hooks, het, (hook_data_t)&info) != 0) {
+ hdrop(vsp, *mpp, "drooped by hooks");
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * This should not be used for DL_INFO_REQ.
+ */
+static mblk_t *
+vnd_dlpi_alloc(size_t len, t_uscalar_t prim)
+{
+ mblk_t *mp;
+ mp = allocb(len, BPRI_MED);
+ if (mp == NULL)
+ return (NULL);
+
+ mp->b_datap->db_type = M_PROTO;
+ mp->b_wptr = mp->b_rptr + len;
+ bzero(mp->b_rptr, len);
+ ((dl_unitdata_req_t *)mp->b_rptr)->dl_primitive = prim;
+
+ return (mp);
+}
+
+static void
+vnd_dlpi_inc_push(vnd_str_t *vsp, mblk_t *mp)
+{
+ mblk_t **mpp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ ASSERT(mp->b_next == NULL);
+ mpp = &vsp->vns_dlpi_inc;
+ while (*mpp != NULL)
+ mpp = &((*mpp)->b_next);
+ *mpp = mp;
+}
+
+static mblk_t *
+vnd_dlpi_inc_pop(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vsp->vns_dlpi_inc;
+ if (mp != NULL) {
+ VERIFY(mp->b_next == NULL || mp->b_next != mp);
+ vsp->vns_dlpi_inc = mp->b_next;
+ mp->b_next = NULL;
+ }
+ return (mp);
+}
+
+static int
+vnd_st_sinfo(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_info_req_t *dlir;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
+ BPRI_HI);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+ vsp->vns_state = VNS_S_INFO_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+
+ mp->b_datap->db_type = M_PCPROTO;
+ dlir = (dl_info_req_t *)mp->b_rptr;
+ mp->b_wptr = (uchar_t *)&dlir[1];
+ dlir->dl_primitive = DL_INFO_REQ;
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_info(vnd_str_t *vsp)
+{
+ dl_info_ack_t *dlia;
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ dlia = (dl_info_ack_t *)mp->b_rptr;
+ vsp->vns_dlpi_style = dlia->dl_provider_style;
+ vsp->vns_minwrite = dlia->dl_min_sdu;
+ vsp->vns_maxwrite = dlia->dl_max_sdu;
+
+ /*
+ * At this time we only support DL_ETHER devices.
+ */
+ if (dlia->dl_mac_type != DL_ETHER) {
+ freemsg(mp);
+ vsp->vns_errno = VND_E_NOTETHER;
+ return (1);
+ }
+
+ /*
+ * Because vnd operates on entire packets, we need to manually account
+ * for the ethernet header information. We add the size of the
+ * ether_vlan_header to account for this, regardless if it is using
+ * vlans or not.
+ */
+ vsp->vns_maxwrite += sizeof (struct ether_vlan_header);
+
+ freemsg(mp);
+ return (0);
+}
+
+static int
+vnd_st_sexclusive(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_attach_req_t), DL_EXCLUSIVE_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ vsp->vns_state = VNS_S_EXCLUSIVE_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+ return (0);
+}
+
+static int
+vnd_st_exclusive(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_error_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_exclusive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_EXCLUSIVE_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_exclusive: got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_DLEXCL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+/*
+ * Send down a DLPI_ATTACH_REQ.
+ */
+static int
+vnd_st_sattach(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_attach_req_t), DL_ATTACH_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ ((dl_attach_req_t *)mp->b_rptr)->dl_ppa = 0;
+ vsp->vns_state = VNS_S_ATTACH_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_attach(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp, "vnd_st_attach: unknown primitive type");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_ATTACH_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_attach: Got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_ATTACHFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_sbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_bind_req_t *dbrp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long),
+ DL_BIND_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+ dbrp = (dl_bind_req_t *)(mp->b_rptr);
+ dbrp->dl_sap = 0;
+ dbrp->dl_service_mode = DL_CLDLS;
+
+ vsp->vns_state = VNS_S_BIND_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_bind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_error_ack_t *)mp->b_rptr)->dl_primitive;
+
+ if (prim != DL_BIND_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp, "wrong dlpi primitive for vnd_st_bind");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_BINDFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_spromisc(vnd_str_t *vsp, int type, vnd_str_state_t next)
+{
+ mblk_t *mp;
+ dl_promiscon_req_t *dprp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_promiscon_req_t), DL_PROMISCON_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ dprp = (dl_promiscon_req_t *)mp->b_rptr;
+ dprp->dl_level = type;
+
+ vsp->vns_state = next;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_promisc(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_promisc");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_PROMISCON_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_promisc: Got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_PROMISCFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_scapabq(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+
+ mp = vnd_dlpi_alloc(sizeof (dl_capability_req_t), DL_CAPABILITY_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ vsp->vns_state = VNS_S_CAPAB_Q_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static void
+vnd_mac_input(vnd_str_t *vsp, mac_resource_t *unused, mblk_t *mp_chain,
+ mac_header_info_t *mhip)
+{
+ int signal = 0;
+ mblk_t *mp;
+ vnd_pnsd_t *nsp = vsp->vns_nsd;
+
+ ASSERT(vsp != NULL);
+ ASSERT(mp_chain != NULL);
+
+ for (mp = mp_chain; mp != NULL; mp = mp_chain) {
+ uint16_t vid;
+ mp_chain = mp->b_next;
+ mp->b_next = NULL;
+
+ /*
+ * If we were operating in a traditional dlpi context then we
+ * would have enabled DLIOCRAW and rather than the fast path we
+ * would come through dld_str_rx_raw. That function does two
+ * things that we have to consider doing ourselves. The first is
+ * that it adjusts the b_rptr back to account for dld bumping us
+ * past the mac header. It also tries to account for cases where
+ * mac provides an illusion of the mac header. Fortunately, dld
+ * only allows the fastpath when the media type is the same as
+ * the native type. Therefore all we have to do here is adjust
+ * the b_rptr.
+ */
+ ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
+ mp->b_rptr -= mhip->mhi_hdrsize;
+ vid = VLAN_ID(mhip->mhi_tci);
+ if (mhip->mhi_istagged && vid != VLAN_ID_NONE) {
+ bcopy(mp->b_rptr, mp->b_rptr + 4, 12);
+ mp->b_rptr += 4;
+ }
+
+ if (nsp->vpnd_hooked && vnd_hook(vsp, &mp, nsp->vpnd_neti_v4,
+ nsp->vpnd_event_in_v4, nsp->vpnd_token_in_v4,
+ nsp->vpnd_neti_v6, nsp->vpnd_event_in_v6,
+ nsp->vpnd_token_in_v6, vnd_drop_hook_in, vnd_drop_in) != 0)
+ continue;
+
+ VND_STAT_INC(vsp, vks_rpackets, 1);
+ VND_STAT_INC(vsp, vks_rbytes, msgsize(mp));
+ DTRACE_VND5(recv, mblk_t *, mp, void *, NULL, void *, NULL,
+ vnd_str_t *, vsp, mblk_t *, mp);
+ mutex_enter(&vsp->vns_dq_read.vdq_lock);
+ signal |= vnd_dq_push(&vsp->vns_dq_read, mp, B_FALSE,
+ vnd_drop_in);
+ mutex_exit(&vsp->vns_dq_read.vdq_lock);
+
+ }
+
+ if (signal != 0) {
+ cv_broadcast(&vsp->vns_dq_read.vdq_ready);
+ pollwakeup(&vsp->vns_dev->vdd_ph, POLLIN | POLLRDNORM);
+ }
+
+}
+
+static void
+vnd_mac_flow_control_stat(vnd_str_t *vsp, hrtime_t diff)
+{
+ VND_STAT_INC(vsp, vks_nmacflow, 1);
+ VND_STAT_INC(vsp, vks_tmacflow, diff);
+ if (diff >= VND_LATENCY_1MS)
+ VND_STAT_INC(vsp, vks_mac_flow_1ms, 1);
+ if (diff >= VND_LATENCY_10MS)
+ VND_STAT_INC(vsp, vks_mac_flow_10ms, 1);
+ if (diff >= VND_LATENCY_100MS)
+ VND_STAT_INC(vsp, vks_mac_flow_100ms, 1);
+ if (diff >= VND_LATENCY_1S)
+ VND_STAT_INC(vsp, vks_mac_flow_1s, 1);
+ if (diff >= VND_LATENCY_10S)
+ VND_STAT_INC(vsp, vks_mac_flow_10s, 1);
+}
+
+/*
+ * This is a callback from MAC that indicates that we are allowed to send
+ * packets again.
+ */
+static void
+vnd_mac_flow_control(void *arg, vnd_mac_cookie_t cookie)
+{
+ vnd_str_t *vsp = arg;
+ hrtime_t now, diff;
+
+ mutex_enter(&vsp->vns_lock);
+ now = gethrtime();
+
+ /*
+ * Check for the case that we beat vnd_squeue_tx_one to the punch.
+ * There's also an additional case here that we got notified because
+ * we're sharing a device that ran out of tx descriptors, even though it
+ * wasn't because of us.
+ */
+ if (!(vsp->vns_flags & VNS_F_FLOW_CONTROLLED)) {
+ vsp->vns_fcupdate = now;
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+
+ ASSERT(vsp->vns_flags & VNS_F_FLOW_CONTROLLED);
+ ASSERT(vsp->vns_caps.vsc_fc_cookie == cookie);
+ vsp->vns_flags &= ~VNS_F_FLOW_CONTROLLED;
+ vsp->vns_caps.vsc_fc_cookie = NULL;
+ diff = now - vsp->vns_fclatch;
+ vsp->vns_fclatch = 0;
+ DTRACE_VND3(flow__resumed, vnd_str_t *, vsp, uint64_t,
+ vsp->vns_dq_write.vdq_cur, uintptr_t, cookie);
+ /*
+ * If someone has asked to flush the squeue and thus inserted a barrier,
+ * than we shouldn't schedule a drain.
+ */
+ if (!(vsp->vns_flags & (VNS_F_DRAIN_SCHEDULED | VNS_F_BARRIER))) {
+ vsp->vns_flags |= VNS_F_DRAIN_SCHEDULED;
+ gsqueue_enter_one(vsp->vns_squeue, &vsp->vns_drainblk,
+ vnd_squeue_tx_drain, vsp, GSQUEUE_FILL,
+ VND_SQUEUE_TAG_MAC_FLOW_CONTROL);
+ }
+ mutex_exit(&vsp->vns_lock);
+}
+
+static void
+vnd_mac_enter(vnd_str_t *vsp, mac_perim_handle_t *mphp)
+{
+ ASSERT(MUTEX_HELD(&vsp->vns_lock));
+ VERIFY(vsp->vns_caps.vsc_capab_f(vsp->vns_caps.vsc_capab_hdl,
+ DLD_CAPAB_PERIM, mphp, DLD_ENABLE) == 0);
+}
+
+static void
+vnd_mac_exit(vnd_str_t *vsp, mac_perim_handle_t mph)
+{
+ ASSERT(MUTEX_HELD(&vsp->vns_lock));
+ VERIFY(vsp->vns_caps.vsc_capab_f(vsp->vns_caps.vsc_capab_hdl,
+ DLD_CAPAB_PERIM, mph, DLD_DISABLE) == 0);
+}
+
+static int
+vnd_dld_cap_enable(vnd_str_t *vsp, vnd_rx_t rxfunc)
+{
+ int ret;
+ dld_capab_direct_t d;
+ mac_perim_handle_t mph;
+ vnd_str_capab_t *c = &vsp->vns_caps;
+
+ bzero(&d, sizeof (d));
+ d.di_rx_cf = (uintptr_t)rxfunc;
+ d.di_rx_ch = vsp;
+ d.di_flags = DI_DIRECT_RAW;
+
+ vnd_mac_enter(vsp, &mph);
+
+ /*
+ * If we're coming in here for a second pass, we need to make sure that
+ * we remove an existing flow control notification callback, otherwise
+ * we'll create a duplicate that will remain with garbage data.
+ */
+ if (c->vsc_tx_fc_hdl != NULL) {
+ ASSERT(c->vsc_set_fcb_hdl != NULL);
+ (void) c->vsc_set_fcb_f(c->vsc_set_fcb_hdl, NULL,
+ c->vsc_tx_fc_hdl);
+ c->vsc_tx_fc_hdl = NULL;
+ }
+
+ if (vsp->vns_caps.vsc_capab_f(c->vsc_capab_hdl,
+ DLD_CAPAB_DIRECT, &d, DLD_ENABLE) == 0) {
+ c->vsc_tx_f = (vnd_dld_tx_t)d.di_tx_df;
+ c->vsc_tx_hdl = d.di_tx_dh;
+ c->vsc_set_fcb_f = (vnd_dld_set_fcb_t)d.di_tx_cb_df;
+ c->vsc_set_fcb_hdl = d.di_tx_cb_dh;
+ c->vsc_is_fc_f = (vnd_dld_is_fc_t)d.di_tx_fctl_df;
+ c->vsc_is_fc_hdl = d.di_tx_fctl_dh;
+ c->vsc_tx_fc_hdl = c->vsc_set_fcb_f(c->vsc_set_fcb_hdl,
+ vnd_mac_flow_control, vsp);
+ c->vsc_flags |= VNS_C_DIRECT;
+ ret = 0;
+ } else {
+ vsp->vns_errno = VND_E_DIRECTFAIL;
+ ret = 1;
+ }
+ vnd_mac_exit(vsp, mph);
+ return (ret);
+}
+
+static int
+vnd_st_capabq(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_capability_ack_t *cap;
+ dl_capability_sub_t *subp;
+ dl_capab_hcksum_t *hck;
+ dl_capab_dld_t *dld;
+ unsigned char *rp;
+ int ret = 0;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+
+ rp = mp->b_rptr;
+ cap = (dl_capability_ack_t *)rp;
+ if (cap->dl_sub_length == 0)
+ goto done;
+
+ /* Don't try to process something too big */
+ if (sizeof (dl_capability_ack_t) + cap->dl_sub_length > MBLKL(mp)) {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vsp->vns_errno = VND_E_CAPACKINVAL;
+ ret = 1;
+ goto done;
+ }
+
+ rp += cap->dl_sub_offset;
+
+ while (cap->dl_sub_length > 0) {
+ subp = (dl_capability_sub_t *)rp;
+ /* Sanity check something crazy from down below */
+ if (subp->dl_length + sizeof (dl_capability_sub_t) >
+ cap->dl_sub_length) {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vsp->vns_errno = VND_E_SUBCAPINVAL;
+ ret = 1;
+ goto done;
+ }
+
+ switch (subp->dl_cap) {
+ case DL_CAPAB_HCKSUM:
+ hck = (dl_capab_hcksum_t *)(rp +
+ sizeof (dl_capability_sub_t));
+ if (hck->hcksum_version != HCKSUM_CURRENT_VERSION) {
+ vsp->vns_caps.vsc_flags |= VNS_C_HCKSUM_BADVERS;
+ break;
+ }
+ if (dlcapabcheckqid(&hck->hcksum_mid, vsp->vns_lrq) !=
+ B_TRUE) {
+ vsp->vns_errno = VND_E_CAPABPASS;
+ ret = 1;
+ goto done;
+ }
+ vsp->vns_caps.vsc_flags |= VNS_C_HCKSUM;
+ vsp->vns_caps.vsc_hcksum_opts = hck->hcksum_txflags;
+ break;
+ case DL_CAPAB_DLD:
+ dld = (dl_capab_dld_t *)(rp +
+ sizeof (dl_capability_sub_t));
+ if (dld->dld_version != DLD_CURRENT_VERSION) {
+ vsp->vns_errno = VND_E_DLDBADVERS;
+ ret = 1;
+ goto done;
+ }
+ if (dlcapabcheckqid(&dld->dld_mid, vsp->vns_lrq) !=
+ B_TRUE) {
+ vsp->vns_errno = VND_E_CAPABPASS;
+ ret = 1;
+ goto done;
+ }
+ vsp->vns_caps.vsc_flags |= VNS_C_DLD;
+ vsp->vns_caps.vsc_capab_f =
+ (vnd_dld_cap_t)dld->dld_capab;
+ vsp->vns_caps.vsc_capab_hdl =
+ (void *)dld->dld_capab_handle;
+ /*
+ * At this point in time, we have to set up a direct
+ * function that drops all input. This validates that
+ * we'll be able to set up direct input and that we can
+ * easily switch it earlier to the real data function
+ * when we've plumbed everything up.
+ */
+ if (vnd_dld_cap_enable(vsp, vnd_mac_drop_input) != 0) {
+ /* vns_errno set by vnd_dld_cap_enable */
+ ret = 1;
+ goto done;
+ }
+ break;
+ default:
+ /* Ignore unsupported cap */
+ break;
+ }
+
+ rp += sizeof (dl_capability_sub_t) + subp->dl_length;
+ cap->dl_sub_length -= sizeof (dl_capability_sub_t) +
+ subp->dl_length;
+ }
+
+done:
+ /* Make sure we enabled direct callbacks */
+ if (ret == 0 && !(vsp->vns_caps.vsc_flags & VNS_C_DIRECT)) {
+ vsp->vns_errno = VND_E_DIRECTNOTSUP;
+ ret = 1;
+ }
+
+ freemsg(mp);
+ return (ret);
+}
+
+static void
+vnd_st_sonline(vnd_str_t *vsp)
+{
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ vsp->vns_state = VNS_S_ONLINE;
+ cv_broadcast(&vsp->vns_stcv);
+}
+
+static void
+vnd_st_shutdown(vnd_str_t *vsp)
+{
+ mac_perim_handle_t mph;
+ vnd_str_capab_t *vsc = &vsp->vns_caps;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+
+ /*
+ * At this point in time we know that there is no one transmitting as
+ * our final reference has been torn down and that vnd_s_close inserted
+ * a barrier to validate that everything is flushed.
+ */
+ if (vsc->vsc_flags & VNS_C_DIRECT) {
+ vnd_mac_enter(vsp, &mph);
+ vsc->vsc_flags &= ~VNS_C_DIRECT;
+ (void) vsc->vsc_set_fcb_f(vsc->vsc_set_fcb_hdl, NULL,
+ vsc->vsc_tx_fc_hdl);
+ vsc->vsc_tx_fc_hdl = NULL;
+ (void) vsc->vsc_capab_f(vsc->vsc_capab_hdl, DLD_CAPAB_DIRECT,
+ NULL, DLD_DISABLE);
+ vnd_mac_exit(vsp, mph);
+ }
+
+ /*
+ * We could send an unbind, but dld also does that for us. As we add
+ * more capabilities and the like, we should revisit this.
+ */
+ vsp->vns_state = VNS_S_ZOMBIE;
+ cv_broadcast(&vsp->vns_stcv);
+}
+
+/*
+ * Perform state transitions. This is a one way shot down the flow chart
+ * described in the big theory statement.
+ */
+static void
+vnd_str_state_transition(void *arg)
+{
+ boolean_t died = B_FALSE;
+ vnd_str_t *vsp = arg;
+ mblk_t *mp;
+
+ mutex_enter(&vsp->vns_lock);
+ if (vsp->vns_dlpi_inc == NULL && (vsp->vns_state != VNS_S_INITIAL &&
+ vsp->vns_state != VNS_S_SHUTTING_DOWN)) {
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+ DTRACE_PROBE2(vnd__state__transition, uintptr_t, vsp,
+ vnd_str_state_t, vsp->vns_state);
+ switch (vsp->vns_state) {
+ case VNS_S_INITIAL:
+ VERIFY(vsp->vns_dlpi_inc == NULL);
+ if (vnd_st_sinfo(vsp) != 0)
+ died = B_TRUE;
+ break;
+ case VNS_S_INFO_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_info(vsp) == 0) {
+ if (vnd_st_sexclusive(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_EXCLUSIVE_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_exclusive(vsp) == 0) {
+ if (vsp->vns_dlpi_style == DL_STYLE2) {
+ if (vnd_st_sattach(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ if (vnd_st_sbind(vsp) != 0)
+ died = B_TRUE;
+ }
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_ATTACH_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_attach(vsp) == 0) {
+ if (vnd_st_sbind(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_BIND_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_bind(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_SAP,
+ VNS_S_SAP_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_SAP_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_MULTI,
+ VNS_S_MULTI_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_MULTI_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_RX_ONLY,
+ VNS_S_RX_ONLY_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_RX_ONLY_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_scapabq(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_CAPAB_Q_SENT:
+ if (vnd_st_capabq(vsp) != 0)
+ died = B_TRUE;
+ else
+ vnd_st_sonline(vsp);
+ break;
+ case VNS_S_SHUTTING_DOWN:
+ vnd_st_shutdown(vsp);
+ break;
+ case VNS_S_ZOMBIE:
+ while ((mp = vnd_dlpi_inc_pop(vsp)) != NULL)
+ vnd_drop_ctl(vsp, mp, "vsp committed suicide");
+ break;
+ default:
+ panic("vnd_str_t entered an unknown state");
+ }
+
+ if (died == B_TRUE) {
+ ASSERT(vsp->vns_errno != VND_E_SUCCESS);
+ vsp->vns_laststate = vsp->vns_state;
+ vsp->vns_state = VNS_S_ZOMBIE;
+ cv_broadcast(&vsp->vns_stcv);
+ }
+
+ mutex_exit(&vsp->vns_lock);
+}
+
+static void
+vnd_dlpi_taskq_dispatch(void *arg)
+{
+ vnd_str_t *vsp = arg;
+ int run = 1;
+
+ while (run != 0) {
+ vnd_str_state_transition(vsp);
+ mutex_enter(&vsp->vns_lock);
+ if (vsp->vns_flags & VNS_F_CONDEMNED ||
+ vsp->vns_dlpi_inc == NULL) {
+ run = 0;
+ vsp->vns_flags &= ~VNS_F_TASKQ_DISPATCHED;
+ }
+ if (vsp->vns_flags & VNS_F_CONDEMNED)
+ cv_signal(&vsp->vns_cancelcv);
+ mutex_exit(&vsp->vns_lock);
+ }
+}
+
+static int
+vnd_neti_getifname(net_handle_t neti, phy_if_t phy, char *buf, const size_t len)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getmtu(net_handle_t neti, phy_if_t phy, lif_if_t ifdata)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getptmue(net_handle_t neti)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifaddr(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ size_t nelem, net_ifaddr_t type[], void *storage)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifzone(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ zoneid_t *zid)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifflags(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ uint64_t *flags)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_phygetnext(net_handle_t neti, phy_if_t phy)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_phylookup(net_handle_t neti, const char *name)
+{
+ return (-1);
+}
+
+static lif_if_t
+vnd_neti_lifgetnext(net_handle_t neti, phy_if_t phy, lif_if_t ifdata)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_route(net_handle_t neti, struct sockaddr *address,
+ struct sockaddr *next)
+{
+ return ((phy_if_t)-1);
+}
+
+static int
+vnd_neti_ispchksum(net_handle_t neti, mblk_t *mp)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_isvchksum(net_handle_t neti, mblk_t *mp)
+{
+ return (-1);
+}
+
+static net_protocol_t vnd_neti_info_v4 = {
+ NETINFO_VERSION,
+ NHF_VND_INET,
+ vnd_neti_getifname,
+ vnd_neti_getmtu,
+ vnd_neti_getptmue,
+ vnd_neti_getlifaddr,
+ vnd_neti_getlifzone,
+ vnd_neti_getlifflags,
+ vnd_neti_phygetnext,
+ vnd_neti_phylookup,
+ vnd_neti_lifgetnext,
+ vnd_neti_inject,
+ vnd_neti_route,
+ vnd_neti_ispchksum,
+ vnd_neti_isvchksum
+};
+
+static net_protocol_t vnd_neti_info_v6 = {
+ NETINFO_VERSION,
+ NHF_VND_INET6,
+ vnd_neti_getifname,
+ vnd_neti_getmtu,
+ vnd_neti_getptmue,
+ vnd_neti_getlifaddr,
+ vnd_neti_getlifzone,
+ vnd_neti_getlifflags,
+ vnd_neti_phygetnext,
+ vnd_neti_phylookup,
+ vnd_neti_lifgetnext,
+ vnd_neti_inject,
+ vnd_neti_route,
+ vnd_neti_ispchksum,
+ vnd_neti_isvchksum
+};
+
+
+static int
+vnd_netinfo_init(vnd_pnsd_t *nsp)
+{
+ nsp->vpnd_neti_v4 = net_protocol_register(nsp->vpnd_nsid,
+ &vnd_neti_info_v4);
+ ASSERT(nsp->vpnd_neti_v4 != NULL);
+
+ nsp->vpnd_neti_v6 = net_protocol_register(nsp->vpnd_nsid,
+ &vnd_neti_info_v6);
+ ASSERT(nsp->vpnd_neti_v6 != NULL);
+
+ nsp->vpnd_family_v4.hf_version = HOOK_VERSION;
+ nsp->vpnd_family_v4.hf_name = "vnd_inet";
+
+ if (net_family_register(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4) != 0) {
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_family_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_family_v6.hf_version = HOOK_VERSION;
+ nsp->vpnd_family_v6.hf_name = "vnd_inet6";
+
+ if (net_family_register(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6) != 0) {
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_family_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_in_v4.he_version = HOOK_VERSION;
+ nsp->vpnd_event_in_v4.he_name = NH_PHYSICAL_IN;
+ nsp->vpnd_event_in_v4.he_flags = 0;
+ nsp->vpnd_event_in_v4.he_interested = B_FALSE;
+
+ nsp->vpnd_token_in_v4 = net_event_register(nsp->vpnd_neti_v4,
+ &nsp->vpnd_event_in_v4);
+ if (nsp->vpnd_token_in_v4 == NULL) {
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_in_v6.he_version = HOOK_VERSION;
+ nsp->vpnd_event_in_v6.he_name = NH_PHYSICAL_IN;
+ nsp->vpnd_event_in_v6.he_flags = 0;
+ nsp->vpnd_event_in_v6.he_interested = B_FALSE;
+
+ nsp->vpnd_token_in_v6 = net_event_register(nsp->vpnd_neti_v6,
+ &nsp->vpnd_event_in_v6);
+ if (nsp->vpnd_token_in_v6 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_out_v4.he_version = HOOK_VERSION;
+ nsp->vpnd_event_out_v4.he_name = NH_PHYSICAL_OUT;
+ nsp->vpnd_event_out_v4.he_flags = 0;
+ nsp->vpnd_event_out_v4.he_interested = B_FALSE;
+
+ nsp->vpnd_token_out_v4 = net_event_register(nsp->vpnd_neti_v4,
+ &nsp->vpnd_event_out_v4);
+ if (nsp->vpnd_token_out_v4 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_out_v6.he_version = HOOK_VERSION;
+ nsp->vpnd_event_out_v6.he_name = NH_PHYSICAL_OUT;
+ nsp->vpnd_event_out_v6.he_flags = 0;
+ nsp->vpnd_event_out_v6.he_interested = B_FALSE;
+
+ nsp->vpnd_token_out_v6 = net_event_register(nsp->vpnd_neti_v6,
+ &nsp->vpnd_event_out_v6);
+ if (nsp->vpnd_token_out_v6 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+vnd_netinfo_shutdown(vnd_pnsd_t *nsp)
+{
+ int ret;
+
+ ret = net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_out_v4);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_out_v6);
+ VERIFY(ret == 0);
+}
+
+static void
+vnd_netinfo_fini(vnd_pnsd_t *nsp)
+{
+ int ret;
+
+ ret = net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_out_v4);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_out_v6);
+ VERIFY(ret == 0);
+ ret = net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ VERIFY(ret == 0);
+ ret = net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ VERIFY(ret == 0);
+ ret = net_protocol_unregister(nsp->vpnd_neti_v4);
+ VERIFY(ret == 0);
+ ret = net_protocol_unregister(nsp->vpnd_neti_v6);
+ VERIFY(ret == 0);
+}
+
+static void
+vnd_strbarrier_cb(void *arg, mblk_t *bmp, gsqueue_t *gsp, void *dummy)
+{
+ vnd_str_t *vsp = arg;
+
+ VERIFY(bmp == &vsp->vns_barrierblk);
+ mutex_enter(&vsp->vns_lock);
+ VERIFY(vsp->vns_flags & VNS_F_BARRIER);
+ VERIFY(!(vsp->vns_flags & VNS_F_BARRIER_DONE));
+ vsp->vns_flags |= VNS_F_BARRIER_DONE;
+ mutex_exit(&vsp->vns_lock);
+
+ /*
+ * For better or worse, we have to broadcast here as we could have a
+ * thread that's blocked for completion as well as one that's blocked
+ * waiting to do a barrier itself.
+ */
+ cv_broadcast(&vsp->vns_barriercv);
+}
+
+/*
+ * This is a data barrier for the stream while it is in fastpath mode. It blocks
+ * and ensures that there is nothing else in the squeue.
+ */
+static void
+vnd_strbarrier(vnd_str_t *vsp)
+{
+ mutex_enter(&vsp->vns_lock);
+ while (vsp->vns_flags & VNS_F_BARRIER)
+ cv_wait(&vsp->vns_barriercv, &vsp->vns_lock);
+ vsp->vns_flags |= VNS_F_BARRIER;
+ mutex_exit(&vsp->vns_lock);
+
+ gsqueue_enter_one(vsp->vns_squeue, &vsp->vns_barrierblk,
+ vnd_strbarrier_cb, vsp, GSQUEUE_PROCESS, VND_SQUEUE_TAG_STRBARRIER);
+
+ mutex_enter(&vsp->vns_lock);
+ while (!(vsp->vns_flags & VNS_F_BARRIER_DONE))
+ cv_wait(&vsp->vns_barriercv, &vsp->vns_lock);
+ vsp->vns_flags &= ~VNS_F_BARRIER;
+ vsp->vns_flags &= ~VNS_F_BARRIER_DONE;
+ mutex_exit(&vsp->vns_lock);
+
+ /*
+ * We have to broadcast in case anyone is waiting for the barrier
+ * themselves.
+ */
+ cv_broadcast(&vsp->vns_barriercv);
+}
+
+/*
+ * Based on the type of message that we're dealing with we're going to want to
+ * do one of several things. Basically if it looks like it's something we know
+ * about, we should probably handle it in one of our transition threads.
+ * Otherwise, we should just simply putnext.
+ */
+static int
+vnd_s_rput(queue_t *q, mblk_t *mp)
+{
+ t_uscalar_t prim;
+ int dispatch = 0;
+ vnd_str_t *vsp = q->q_ptr;
+
+ switch (DB_TYPE(mp)) {
+ case M_PROTO:
+ case M_PCPROTO:
+ if (MBLKL(mp) < sizeof (t_uscalar_t)) {
+ vnd_drop_ctl(vsp, mp, "PROTO message too short");
+ break;
+ }
+
+ prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ if (prim == DL_UNITDATA_REQ || prim == DL_UNITDATA_IND) {
+ vnd_drop_ctl(vsp, mp,
+ "recieved an unsupported dlpi DATA req");
+ break;
+ }
+
+ /*
+ * Enqueue the entry and fire off a taskq dispatch.
+ */
+ mutex_enter(&vsp->vns_lock);
+ vnd_dlpi_inc_push(vsp, mp);
+ if (!(vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)) {
+ dispatch = 1;
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ }
+ mutex_exit(&vsp->vns_lock);
+ if (dispatch != 0)
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch,
+ vsp, 0, &vsp->vns_tqe);
+ break;
+ case M_DATA:
+ vnd_drop_in(vsp, mp, "M_DATA via put(9E)");
+ break;
+ default:
+ putnext(vsp->vns_rq, mp);
+ }
+ return (0);
+}
+
+static void
+vnd_strioctl(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct iocblk *iocp)
+{
+ int error;
+ vnd_strioc_t *visp;
+
+ if (iocp->ioc_cmd != VND_STRIOC_ASSOCIATE ||
+ iocp->ioc_count != TRANSPARENT) {
+ error = EINVAL;
+ goto nak;
+ }
+
+ /*
+ * All streams ioctls that we support must use kcred as a means to
+ * distinguish that this is a layered open by the kernel as opposed to
+ * one by a user who has done an I_PUSH of the module.
+ */
+ if (iocp->ioc_cr != kcred) {
+ error = EPERM;
+ goto nak;
+ }
+
+ if (mp->b_cont == NULL) {
+ error = EAGAIN;
+ goto nak;
+ }
+
+ visp = kmem_alloc(sizeof (vnd_strioc_t), KM_SLEEP);
+ ASSERT(MBLKL(mp->b_cont) == sizeof (caddr_t));
+ visp->vs_addr = *(caddr_t *)mp->b_cont->b_rptr;
+ visp->vs_state = VSS_COPYIN;
+
+ mcopyin(mp, (void *)visp, sizeof (vnd_strioc_associate_t), NULL);
+ qreply(q, mp);
+
+ return;
+
+nak:
+ if (mp->b_cont != NULL) {
+ freemsg(mp->b_cont);
+ mp->b_cont = NULL;
+ }
+
+ iocp->ioc_error = error;
+ mp->b_datap->db_type = M_IOCNAK;
+ iocp->ioc_count = 0;
+ qreply(q, mp);
+}
+
+static void
+vnd_striocdata(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct copyresp *csp)
+{
+ int error;
+ vnd_str_state_t state;
+ struct copyreq *crp;
+ vnd_strioc_associate_t *vss;
+ vnd_dev_t *vdp = NULL;
+ vnd_pnsd_t *nsp = NULL;
+ char iname[2*VND_NAMELEN];
+ zone_t *zone;
+ vnd_strioc_t *visp;
+
+ visp = (vnd_strioc_t *)csp->cp_private;
+
+ /* If it's not ours, it's not our problem */
+ if (csp->cp_cmd != VND_STRIOC_ASSOCIATE) {
+ if (q->q_next != NULL) {
+ putnext(q, mp);
+ } else {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vnd_drop_ctl(vsp, mp, "uknown cmd for M_IOCDATA");
+ }
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ return;
+ }
+
+ /* The nak is already sent for us */
+ if (csp->cp_rval != 0) {
+ vnd_drop_ctl(vsp, mp, "M_COPYIN failed");
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ return;
+ }
+
+ /* Data is sitting for us in b_cont */
+ if (mp->b_cont == NULL ||
+ MBLKL(mp->b_cont) != sizeof (vnd_strioc_associate_t)) {
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ miocnak(q, mp, 0, EINVAL);
+ qreply(q, mp);
+ return;
+ }
+
+ vss = (vnd_strioc_associate_t *)mp->b_cont->b_rptr;
+ vdp = vnd_dev_lookup(vss->vsa_minor);
+ if (vdp == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_NODEV;
+ goto nak;
+ }
+
+ nsp = vnd_nsd_lookup(vss->vsa_nsid);
+ if (nsp == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_NONETSTACK;
+ goto nak;
+ }
+
+ mutex_enter(&vsp->vns_lock);
+ if (!(vsp->vns_flags & VNS_F_NEED_ZONE)) {
+ mutex_exit(&vsp->vns_lock);
+ error = EEXIST;
+ vss->vsa_errno = VND_E_ASSOCIATED;
+ goto nak;
+ }
+
+ vsp->vns_nsd = nsp;
+ vsp->vns_flags &= ~VNS_F_NEED_ZONE;
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ mutex_exit(&vsp->vns_lock);
+
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch, vsp, 0,
+ &vsp->vns_tqe);
+
+
+ /* At this point we need to wait until we have transitioned to ONLINE */
+ mutex_enter(&vsp->vns_lock);
+ while (vsp->vns_state != VNS_S_ONLINE && vsp->vns_state != VNS_S_ZOMBIE)
+ cv_wait(&vsp->vns_stcv, &vsp->vns_lock);
+ state = vsp->vns_state;
+ mutex_exit(&vsp->vns_lock);
+
+ if (state == VNS_S_ZOMBIE) {
+ vss->vsa_errno = vsp->vns_errno;
+ error = EIO;
+ goto nak;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ mutex_enter(&vsp->vns_lock);
+ VERIFY(vdp->vdd_str == NULL);
+ /*
+ * Now initialize the remaining kstat properties and let's go ahead and
+ * create it.
+ */
+ (void) snprintf(iname, sizeof (iname), "z%d_%d",
+ vdp->vdd_nsd->vpnd_zid, vdp->vdd_minor);
+ vsp->vns_kstat = kstat_create_zone("vnd", vdp->vdd_minor, iname, "net",
+ KSTAT_TYPE_NAMED, sizeof (vnd_str_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
+ if (vsp->vns_kstat == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_KSTATCREATE;
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ goto nak;
+ }
+ vdp->vdd_str = vsp;
+ vsp->vns_dev = vdp;
+
+ /*
+ * Now, it's time to do the las thing that can fail, changing out the
+ * input function. After this we know that we can receive data, so we
+ * should make sure that we're ready.
+ */
+ if (vnd_dld_cap_enable(vsp, vnd_mac_input) != 0) {
+ error = EIO;
+ vss->vsa_errno = VND_E_DIRECTFAIL;
+ vdp->vdd_str = NULL;
+ vsp->vns_dev = NULL;
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ goto nak;
+ }
+
+ zone = zone_find_by_id(vdp->vdd_nsd->vpnd_zid);
+ ASSERT(zone != NULL);
+ vsp->vns_kstat->ks_data = &vsp->vns_ksdata;
+ /* Account for zone name */
+ vsp->vns_kstat->ks_data_size += strlen(zone->zone_name) + 1;
+ /* Account for eventual link name */
+ vsp->vns_kstat->ks_data_size += VND_NAMELEN;
+ kstat_named_setstr(&vsp->vns_ksdata.vks_zonename, zone->zone_name);
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ zone_rele(zone);
+ kstat_install(vsp->vns_kstat);
+
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * Note that the vnd_str_t does not keep a permanent hold on the
+ * vnd_pnsd_t. We leave that up to the vnd_dev_t as that's also what
+ * the nestack goes through to take care of everything.
+ */
+ vss->vsa_errno = VND_E_SUCCESS;
+nak:
+ if (vdp != NULL)
+ vnd_dev_rele(vdp);
+ if (nsp != NULL)
+ vnd_nsd_rele(nsp);
+ /*
+ * Change the copyin request to a copyout. Note that we can't use
+ * mcopyout here as it only works when the DB_TYPE is M_IOCTL. That's
+ * okay, as the copyin vs. copyout is basically the same.
+ */
+ DB_TYPE(mp) = M_COPYOUT;
+ visp->vs_state = VSS_COPYOUT;
+ crp = (struct copyreq *)mp->b_rptr;
+ crp->cq_private = (void *)visp;
+ crp->cq_addr = visp->vs_addr;
+ crp->cq_size = sizeof (vnd_strioc_associate_t);
+ qreply(q, mp);
+}
+
+static void
+vnd_stroutdata(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct copyresp *csp)
+{
+ ASSERT(csp->cp_private != NULL);
+ kmem_free(csp->cp_private, sizeof (vnd_strioc_t));
+ if (csp->cp_cmd != VND_STRIOC_ASSOCIATE) {
+ if (q->q_next != NULL) {
+ putnext(q, mp);
+ } else {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vnd_drop_ctl(vsp, mp, "uknown cmd for M_IOCDATA");
+ }
+ return;
+ }
+
+ /* The nak is already sent for us */
+ if (csp->cp_rval != 0) {
+ vnd_drop_ctl(vsp, mp, "M_COPYOUT failed");
+ return;
+ }
+
+ /* Ack and let's be done with it all */
+ miocack(q, mp, 0, 0);
+}
+
+static int
+vnd_s_wput(queue_t *q, mblk_t *mp)
+{
+ vnd_str_t *vsp = q->q_ptr;
+ struct copyresp *crp;
+ vnd_strioc_state_t vstate;
+ vnd_strioc_t *visp;
+
+ switch (DB_TYPE(mp)) {
+ case M_IOCTL:
+ vnd_strioctl(q, vsp, mp, (struct iocblk *)mp->b_rptr);
+ return (0);
+ case M_IOCDATA:
+ crp = (struct copyresp *)mp->b_rptr;
+ ASSERT(crp->cp_private != NULL);
+ visp = (vnd_strioc_t *)crp->cp_private;
+ vstate = visp->vs_state;
+ ASSERT(vstate == VSS_COPYIN || vstate == VSS_COPYOUT);
+ if (vstate == VSS_COPYIN)
+ vnd_striocdata(q, vsp, mp,
+ (struct copyresp *)mp->b_rptr);
+ else
+ vnd_stroutdata(q, vsp, mp,
+ (struct copyresp *)mp->b_rptr);
+ return (0);
+ default:
+ break;
+ }
+ if (q->q_next != NULL)
+ putnext(q, mp);
+ else
+ vnd_drop_ctl(vsp, mp, "!M_IOCTL in wput");
+
+ return (0);
+}
+
+static int
+vnd_s_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
+{
+ vnd_str_t *vsp;
+ uint_t rand;
+
+ if (q->q_ptr != NULL)
+ return (EINVAL);
+
+ if (!(sflag & MODOPEN))
+ return (ENXIO);
+
+ if (credp != kcred)
+ return (EPERM);
+
+ vsp = kmem_cache_alloc(vnd_str_cache, KM_SLEEP);
+ bzero(vsp, sizeof (*vsp));
+ mutex_init(&vsp->vns_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&vsp->vns_cancelcv, NULL, CV_DRIVER, NULL);
+ cv_init(&vsp->vns_barriercv, NULL, CV_DRIVER, NULL);
+ cv_init(&vsp->vns_stcv, NULL, CV_DRIVER, NULL);
+ vsp->vns_state = VNS_S_INITIAL;
+
+ mutex_init(&vsp->vns_dq_read.vdq_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&vsp->vns_dq_write.vdq_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_enter(&vnd_dev_lock);
+ vsp->vns_dq_read.vdq_max = vnd_vdq_default_size;
+ vsp->vns_dq_read.vdq_vns = vsp;
+ vsp->vns_dq_write.vdq_max = vnd_vdq_default_size;
+ vsp->vns_dq_write.vdq_vns = vsp;
+ mutex_exit(&vnd_dev_lock);
+ vsp->vns_rq = q;
+ vsp->vns_wq = WR(q);
+ q->q_ptr = WR(q)->q_ptr = vsp;
+ vsp->vns_flags = VNS_F_NEED_ZONE;
+ vsp->vns_nflush = vnd_flush_nburst;
+ vsp->vns_bsize = vnd_flush_burst_size;
+
+ (void) random_get_pseudo_bytes((uint8_t *)&rand, sizeof (rand));
+ vsp->vns_squeue = gsqueue_set_get(vnd_sqset, rand);
+
+ /*
+ * We create our kstat and initialize all of its fields now, but we
+ * don't install it until we actually do the zone association so we can
+ * get everything.
+ */
+ kstat_named_init(&vsp->vns_ksdata.vks_rbytes, "rbytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_rpackets, "rpackets",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_obytes, "obytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_opackets, "opackets",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_nhookindrops, "nhookindrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_nhookoutdrops, "nhookoutdrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndlpidrops, "ndlpidrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndataindrops, "ndataindrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndataoutdrops, "ndataoutdrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_tdrops, "total_drops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_linkname, "linkname",
+ KSTAT_DATA_STRING);
+ kstat_named_init(&vsp->vns_ksdata.vks_zonename, "zonename",
+ KSTAT_DATA_STRING);
+ kstat_named_init(&vsp->vns_ksdata.vks_nmacflow, "flowcontrol_events",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_tmacflow, "flowcontrol_time",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_1ms, "flowcontrol_1ms",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_10ms, "flowcontrol_10ms",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_100ms,
+ "flowcontrol_100ms", KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_1s, "flowcontrol_1s",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_10s, "flowcontrol_10s",
+ KSTAT_DATA_UINT64);
+ qprocson(q);
+ /*
+ * Now that we've called qprocson, grab the lower module for making sure
+ * that we don't have any pass through modules.
+ */
+ vsp->vns_lrq = RD(vsp->vns_wq->q_next);
+
+ return (0);
+}
+
+static int
+vnd_s_close(queue_t *q, int flag, cred_t *credp)
+{
+ vnd_str_t *vsp;
+ mblk_t *mp;
+
+ VERIFY(WR(q)->q_next != NULL);
+
+ vsp = q->q_ptr;
+ ASSERT(vsp != NULL);
+
+ /*
+ * We need to transition ourselves down. This means that we have a few
+ * important different things to do in the process of tearing down our
+ * input and output buffers, making sure we've drained the current
+ * squeue, and disabling the fast path. Before we disable the fast path,
+ * we should make sure the squeue is drained. Because we're in streams
+ * close, we know that no packets can come into us from userland, but we
+ * can receive more. As such, the following is the exact order of things
+ * that we do:
+ *
+ * 1) flush the vns_dq_read
+ * 2) Insert the drain mblk
+ * 3) When it's been received, tear down the fast path by kicking
+ * off the state machine.
+ * 4) One final flush of both the vns_dq_read,vns_dq_write
+ */
+
+ vnd_dq_flush(&vsp->vns_dq_read, vnd_drop_in);
+ vnd_strbarrier(vsp);
+ mutex_enter(&vsp->vns_lock);
+ vsp->vns_state = VNS_S_SHUTTING_DOWN;
+ if (!(vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)) {
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch, vsp,
+ 0, &vsp->vns_tqe);
+ }
+ while (vsp->vns_state != VNS_S_ZOMBIE)
+ cv_wait(&vsp->vns_stcv, &vsp->vns_lock);
+ mutex_exit(&vsp->vns_lock);
+
+ qprocsoff(q);
+ mutex_enter(&vsp->vns_lock);
+ vsp->vns_flags |= VNS_F_CONDEMNED;
+ while (vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)
+ cv_wait(&vsp->vns_cancelcv, &vsp->vns_lock);
+
+ while ((mp = vnd_dlpi_inc_pop(vsp)) != NULL)
+ vnd_drop_ctl(vsp, mp, "vnd_s_close");
+ mutex_exit(&vsp->vns_lock);
+
+ q->q_ptr = NULL;
+ vnd_dq_flush(&vsp->vns_dq_read, vnd_drop_in);
+ vnd_dq_flush(&vsp->vns_dq_write, vnd_drop_out);
+ mutex_destroy(&vsp->vns_dq_read.vdq_lock);
+ mutex_destroy(&vsp->vns_dq_write.vdq_lock);
+
+ if (vsp->vns_kstat != NULL)
+ kstat_delete(vsp->vns_kstat);
+ mutex_destroy(&vsp->vns_lock);
+ cv_destroy(&vsp->vns_stcv);
+ cv_destroy(&vsp->vns_barriercv);
+ cv_destroy(&vsp->vns_cancelcv);
+ kmem_cache_free(vnd_str_cache, vsp);
+
+ return (0);
+}
+
+static vnd_mac_cookie_t
+vnd_squeue_tx_one(vnd_str_t *vsp, mblk_t *mp)
+{
+ hrtime_t txtime;
+ vnd_mac_cookie_t vc;
+
+ VND_STAT_INC(vsp, vks_opackets, 1);
+ VND_STAT_INC(vsp, vks_obytes, msgsize(mp));
+ DTRACE_VND5(send, mblk_t *, mp, void *, NULL, void *, NULL,
+ vnd_str_t *, vsp, mblk_t *, mp);
+ /* Actually tx now */
+ txtime = gethrtime();
+ vc = vsp->vns_caps.vsc_tx_f(vsp->vns_caps.vsc_tx_hdl,
+ mp, 0, MAC_DROP_ON_NO_DESC);
+
+ /*
+ * We need to check two different conditions before we immediately set
+ * the flow control lock. The first thing that we need to do is verify
+ * that this is an instance of hard flow control, so to say. The flow
+ * control callbacks won't always fire in cases where we still get a
+ * cookie returned. The explicit check for flow control will guarantee
+ * us that we'll get a subsequent notification callback.
+ *
+ * The second case comes about because we do not hold the
+ * vnd_str_t`vns_lock across calls to tx, we need to determine if a flow
+ * control notification already came across for us in a different thread
+ * calling vnd_mac_flow_control(). To deal with this, we record a
+ * timestamp every time that we change the flow control state. We grab
+ * txtime here before we transmit because that guarantees that the
+ * hrtime_t of the call to vnd_mac_flow_control() will be after txtime.
+ *
+ * If the flow control notification beat us to the punch, the value of
+ * vns_fcupdate will be larger than the value of txtime, and we should
+ * just record the statistics. However, if we didn't beat it to the
+ * punch (txtime > vns_fcupdate), then we know that it's safe to wait
+ * for a notification.
+ */
+ if (vc != NULL) {
+ hrtime_t diff;
+
+ if (vsp->vns_caps.vsc_is_fc_f(vsp->vns_caps.vsc_is_fc_hdl,
+ vc) == 0)
+ return (NULL);
+ mutex_enter(&vsp->vns_lock);
+ diff = vsp->vns_fcupdate - txtime;
+ if (diff > 0) {
+ mutex_exit(&vsp->vns_lock);
+ vnd_mac_flow_control_stat(vsp, diff);
+ return (NULL);
+ }
+ vsp->vns_flags |= VNS_F_FLOW_CONTROLLED;
+ vsp->vns_caps.vsc_fc_cookie = vc;
+ vsp->vns_fclatch = txtime;
+ vsp->vns_fcupdate = txtime;
+ DTRACE_VND3(flow__blocked, vnd_str_t *, vsp,
+ uint64_t, vsp->vns_dq_write.vdq_cur, uintptr_t, vc);
+ mutex_exit(&vsp->vns_lock);
+ }
+
+ return (vc);
+}
+
+static void
+vnd_squeue_tx_drain(void *arg, mblk_t *drain_mp, gsqueue_t *gsp, void *dummy)
+{
+ mblk_t *mp;
+ int nmps;
+ size_t mptot, nflush, bsize;
+ boolean_t blocked, empty;
+ vnd_data_queue_t *vqp;
+ vnd_str_t *vsp = arg;
+
+ mutex_enter(&vsp->vns_lock);
+ /*
+ * We either enter here via an squeue or via vnd_squeue_tx_append(). In
+ * the former case we need to mark that there is no longer an active
+ * user of the drain block.
+ */
+ if (drain_mp != NULL) {
+ VERIFY(drain_mp == &vsp->vns_drainblk);
+ VERIFY(vsp->vns_flags & VNS_F_DRAIN_SCHEDULED);
+ vsp->vns_flags &= ~VNS_F_DRAIN_SCHEDULED;
+ }
+
+ /*
+ * If we're still flow controlled or under a flush barrier, nothing to
+ * do.
+ */
+ if (vsp->vns_flags & (VNS_F_FLOW_CONTROLLED | VNS_F_BARRIER)) {
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+
+ nflush = vsp->vns_nflush;
+ bsize = vsp->vns_bsize;
+ mutex_exit(&vsp->vns_lock);
+
+ nmps = 0;
+ mptot = 0;
+ blocked = B_FALSE;
+ vqp = &vsp->vns_dq_write;
+ while (nmps < nflush && mptot <= bsize) {
+ mutex_enter(&vqp->vdq_lock);
+ if (vnd_dq_pop(vqp, &mp) == 0) {
+ mutex_exit(&vqp->vdq_lock);
+ break;
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ nmps++;
+ mptot += msgsize(mp);
+ if (vnd_squeue_tx_one(vsp, mp) != NULL) {
+ blocked = B_TRUE;
+ break;
+ }
+ }
+
+ empty = vnd_dq_is_empty(&vsp->vns_dq_write);
+
+ /*
+ * If the queue is not empty, we're not blocked, and there isn't a drain
+ * scheduled, put it into the squeue with the drain block and
+ * GSQUEUE_FILL.
+ */
+ if (blocked == B_FALSE && empty == B_FALSE) {
+ mutex_enter(&vsp->vns_lock);
+ if (!(vsp->vns_flags & VNS_F_DRAIN_SCHEDULED)) {
+ mblk_t *mp = &vsp->vns_drainblk;
+ vsp->vns_flags |= VNS_F_DRAIN_SCHEDULED;
+ gsqueue_enter_one(vsp->vns_squeue,
+ mp, vnd_squeue_tx_drain, vsp,
+ GSQUEUE_FILL, VND_SQUEUE_TAG_TX_DRAIN);
+ }
+ mutex_exit(&vsp->vns_lock);
+ }
+
+ /*
+ * If we drained some amount of data, we need to signal the data queue.
+ */
+ if (nmps > 0) {
+ cv_broadcast(&vsp->vns_dq_write.vdq_ready);
+ pollwakeup(&vsp->vns_dev->vdd_ph, POLLOUT);
+ }
+}
+
+static void
+vnd_squeue_tx_append(void *arg, mblk_t *mp, gsqueue_t *gsp, void *dummy)
+{
+ vnd_str_t *vsp = arg;
+ vnd_data_queue_t *vqp = &vsp->vns_dq_write;
+ vnd_pnsd_t *nsp = vsp->vns_nsd;
+ size_t len = msgsize(mp);
+
+ /*
+ * Before we append this packet, we should run it through the firewall
+ * rules.
+ */
+ if (nsp->vpnd_hooked && vnd_hook(vsp, &mp, nsp->vpnd_neti_v4,
+ nsp->vpnd_event_out_v4, nsp->vpnd_token_out_v4, nsp->vpnd_neti_v6,
+ nsp->vpnd_event_out_v6, nsp->vpnd_token_out_v6, vnd_drop_hook_out,
+ vnd_drop_out) != 0) {
+ /*
+ * Because we earlier reserved space for this packet and it's
+ * not making the cut, we need to go through and unreserve that
+ * space. Also note that the message block will likely be freed
+ * by the time we return from vnd_hook so we cannot rely on it.
+ */
+ mutex_enter(&vqp->vdq_lock);
+ vnd_dq_unreserve(vqp, len);
+ mutex_exit(&vqp->vdq_lock);
+ return;
+ }
+
+ /*
+ * We earlier reserved space for this packet. So for now simply append
+ * it and call drain. We know that no other drain can be going on right
+ * now thanks to the squeue.
+ */
+ mutex_enter(&vqp->vdq_lock);
+ (void) vnd_dq_push(&vsp->vns_dq_write, mp, B_TRUE, vnd_drop_panic);
+ mutex_exit(&vqp->vdq_lock);
+ vnd_squeue_tx_drain(vsp, NULL, NULL, NULL);
+}
+
+/*
+ * We need to see if this is a valid name of sorts for us. That means a few
+ * things. First off, we can't assume that what we've been given has actually
+ * been null terminated. More importantly, that it's a valid name as far as
+ * ddi_create_minor_node is concerned (that means no '@', '/', or ' '). We
+ * further constrain ourselves to simply alphanumeric characters and a few
+ * additional ones, ':', '-', and '_'.
+ */
+static int
+vnd_validate_name(const char *buf, size_t buflen)
+{
+ int i, len;
+
+ /* First make sure a null terminator exists */
+ for (i = 0; i < buflen; i++)
+ if (buf[i] == '\0')
+ break;
+ len = i;
+ if (i == 0 || i == buflen)
+ return (0);
+
+ for (i = 0; i < len; i++)
+ if (!isalnum(buf[i]) && buf[i] != ':' && buf[i] != '-' &&
+ buf[i] != '_')
+ return (0);
+
+ return (1);
+}
+
+static int
+vnd_ioctl_attach(vnd_dev_t *vdp, uintptr_t arg, cred_t *credp, int cpflag)
+{
+ vnd_ioc_attach_t via;
+ vnd_strioc_associate_t vss;
+ vnd_pnsd_t *nsp;
+ zone_t *zonep;
+ zoneid_t zid;
+ char buf[2*VND_NAMELEN];
+ int ret, rp;
+
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ if (secpolicy_net_rawaccess(credp) != 0)
+ return (EPERM);
+
+ if (ddi_copyin((void *)arg, &via, sizeof (via), cpflag) != 0)
+ return (EFAULT);
+ via.via_errno = VND_E_SUCCESS;
+
+ if (vnd_validate_name(via.via_name, VND_NAMELEN) == 0) {
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ /*
+ * Only the global zone can request to create a device in a different
+ * zone.
+ */
+ zid = crgetzoneid(credp);
+ if (zid != GLOBAL_ZONEID && via.via_zoneid != -1 &&
+ zid != via.via_zoneid) {
+ via.via_errno = VND_E_PERM;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ if (via.via_zoneid == -1)
+ via.via_zoneid = zid;
+
+ /*
+ * Establish the name we'll use now. We want to be extra paranoid about
+ * the device we're opening so check that now.
+ */
+ if (zid == GLOBAL_ZONEID && via.via_zoneid != zid) {
+ zonep = zone_find_by_id(via.via_zoneid);
+ if (zonep == NULL) {
+ via.via_errno = VND_E_NOZONE;
+ ret = EIO;
+ goto errcopyout;
+ }
+ if (snprintf(NULL, 0, "/dev/net/zone/%s/%s", zonep->zone_name,
+ via.via_name) >= sizeof (buf)) {
+ zone_rele(zonep);
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+ (void) snprintf(buf, sizeof (buf), "/dev/net/zone/%s/%s",
+ zonep->zone_name, via.via_name);
+ zone_rele(zonep);
+ zonep = NULL;
+ } else {
+ if (snprintf(NULL, 0, "/dev/net/%s", via.via_name) >=
+ sizeof (buf)) {
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+ (void) snprintf(buf, sizeof (buf), "/dev/net/%s", via.via_name);
+ }
+
+ /*
+ * If our zone is dying then the netstack will have been removed from
+ * this list.
+ */
+ nsp = vnd_nsd_lookup_by_zid(via.via_zoneid);
+ if (nsp == NULL) {
+ via.via_errno = VND_E_NOZONE;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ /*
+ * Note we set the attached handle even though we haven't actually
+ * finished the process of attaching the ldi handle.
+ */
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & (VND_D_ATTACHED | VND_D_ATTACH_INFLIGHT)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_nsd_rele(nsp);
+ via.via_errno = VND_E_ATTACHED;
+ ret = EIO;
+ goto errcopyout;
+ }
+ vdp->vdd_flags |= VND_D_ATTACH_INFLIGHT;
+ ASSERT(vdp->vdd_cr == NULL);
+ crhold(credp);
+ vdp->vdd_cr = credp;
+ ASSERT(vdp->vdd_nsd == NULL);
+ vdp->vdd_nsd = nsp;
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * Place an additional hold on the vnd_pnsd_t as we go through and do
+ * all of the rest of our work. This will be the hold that we keep for
+ * as long as this thing is attached.
+ */
+ vnd_nsd_ref(nsp);
+
+ ret = ldi_open_by_name(buf, FREAD | FWRITE, vdp->vdd_cr,
+ &vdp->vdd_ldih, vdp->vdd_ldiid);
+ if (ret != 0) {
+ if (ret == ENODEV)
+ via.via_errno = VND_E_NODATALINK;
+ goto err;
+ }
+
+ /*
+ * Unfortunately the I_PUSH interface doesn't allow us a way to detect
+ * whether or not we're coming in from a layered device. We really want
+ * to make sure that a normal user can't push on our streams module.
+ * Currently the only idea I have for this is to make sure that the
+ * credp is kcred which is really terrible.
+ */
+ ret = ldi_ioctl(vdp->vdd_ldih, I_PUSH, (intptr_t)"vnd", FKIOCTL,
+ kcred, &rp);
+ if (ret != 0) {
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ via.via_errno = VND_E_STRINIT;
+ ret = EIO;
+ goto err;
+ }
+
+ vss.vsa_minor = vdp->vdd_minor;
+ vss.vsa_nsid = nsp->vpnd_nsid;
+
+ ret = ldi_ioctl(vdp->vdd_ldih, VND_STRIOC_ASSOCIATE, (intptr_t)&vss,
+ FKIOCTL, kcred, &rp);
+ if (ret != 0 || vss.vsa_errno != VND_E_SUCCESS) {
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ if (ret == 0) {
+ via.via_errno = vss.vsa_errno;
+ ret = EIO;
+ }
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_nsd->vpnd_lock);
+
+ /*
+ * There's a chance that our netstack was condemned while we've had a
+ * hold on it. As such we need to check and if so, error out.
+ */
+ if (vdp->vdd_nsd->vpnd_flags & VND_NS_CONDEMNED) {
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ ret = EIO;
+ via.via_errno = VND_E_NOZONE;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_str != NULL);
+ vdp->vdd_flags &= ~VND_D_ATTACH_INFLIGHT;
+ vdp->vdd_flags |= VND_D_ATTACHED;
+ (void) strlcpy(vdp->vdd_datalink, via.via_name,
+ sizeof (vdp->vdd_datalink));
+ list_insert_tail(&vdp->vdd_nsd->vpnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ vnd_nsd_rele(nsp);
+
+ return (0);
+
+err:
+ mutex_enter(&vdp->vdd_lock);
+ vdp->vdd_flags &= ~VND_D_ATTACH_INFLIGHT;
+ crfree(vdp->vdd_cr);
+ vdp->vdd_cr = NULL;
+ vdp->vdd_nsd = NULL;
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * We have two holds to drop here. One for our original reference and
+ * one for the hold this operation would have represented.
+ */
+ vnd_nsd_rele(nsp);
+ vnd_nsd_rele(nsp);
+errcopyout:
+ if (ddi_copyout(&via, (void *)arg, sizeof (via), cpflag) != 0)
+ ret = EFAULT;
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_link(vnd_dev_t *vdp, intptr_t arg, cred_t *credp, int cpflag)
+{
+ int ret = 0;
+ vnd_ioc_link_t vil;
+ char mname[2*VND_NAMELEN];
+ char **c;
+ vnd_dev_t *v;
+ zoneid_t zid;
+
+ /* Not anyone can link something */
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ if (ddi_copyin((void *)arg, &vil, sizeof (vil), cpflag) != 0)
+ return (EFAULT);
+
+ if (vnd_validate_name(vil.vil_name, VND_NAMELEN) == 0) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+
+ c = vnd_reserved_names;
+ while (*c != NULL) {
+ if (strcmp(vil.vil_name, *c) == 0) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+ c++;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOTATTACHED;
+ goto errcopyout;
+ }
+
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOZONE;
+ goto errcopyout;
+ }
+
+ if (vdp->vdd_flags & (VND_D_LINK_INFLIGHT | VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_LINKED;
+ goto errcopyout;
+ }
+ vdp->vdd_flags |= VND_D_LINK_INFLIGHT;
+ zid = vdp->vdd_nsd->vpnd_zid;
+ mutex_exit(&vdp->vdd_lock);
+
+ if (snprintf(NULL, 0, "z%d:%s", zid, vil.vil_name) >=
+ sizeof (mname)) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+
+ mutex_enter(&vnd_dev_lock);
+ for (v = list_head(&vnd_dev_list); v != NULL;
+ v = list_next(&vnd_dev_list, v)) {
+ if (!(v->vdd_flags & VND_D_LINKED))
+ continue;
+
+ if (v->vdd_nsd->vpnd_zid == zid &&
+ strcmp(v->vdd_lname, vil.vil_name) == 0) {
+ mutex_exit(&vnd_dev_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_LINKEXISTS;
+ goto error;
+ }
+ }
+
+ /*
+ * We set the name and mark ourselves attached while holding the list
+ * lock to ensure that no other user can mistakingly find our name.
+ */
+ (void) snprintf(mname, sizeof (mname), "z%d:%s", zid,
+ vil.vil_name);
+ mutex_enter(&vdp->vdd_lock);
+
+ /*
+ * Because we dropped our lock, we need to double check whether or not
+ * the zone was marked as dying while we were here. If it hasn't, then
+ * it's safe for us to link it in.
+ */
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOZONE;
+ goto error;
+ }
+
+ (void) strlcpy(vdp->vdd_lname, vil.vil_name, sizeof (vdp->vdd_lname));
+ if (ddi_create_minor_node(vnd_dip, mname, S_IFCHR, vdp->vdd_minor,
+ DDI_PSEUDO, 0) != DDI_SUCCESS) {
+ ret = EIO;
+ vil.vil_errno = VND_E_MINORNODE;
+ } else {
+ vdp->vdd_flags &= ~VND_D_LINK_INFLIGHT;
+ vdp->vdd_flags |= VND_D_LINKED;
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ ret = 0;
+ }
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+
+ if (ret == 0) {
+ /*
+ * Add a reference to represent that this device is linked into
+ * the file system name space to ensure that it doesn't
+ * disappear.
+ */
+ vnd_dev_ref(vdp);
+ return (0);
+ }
+
+error:
+ mutex_enter(&vdp->vdd_lock);
+ vdp->vdd_flags &= ~VND_D_LINK_INFLIGHT;
+ vdp->vdd_lname[0] = '\0';
+ mutex_exit(&vdp->vdd_lock);
+
+errcopyout:
+ if (ddi_copyout(&vil, (void *)arg, sizeof (vil), cpflag) != 0)
+ ret = EFAULT;
+ return (ret);
+}
+
+/*
+ * Common unlink function. This is used both from the ioctl path and from the
+ * netstack shutdown path. The caller is required to hold the mutex on the
+ * vnd_dev_t, but they basically will have it relinquished for them. The only
+ * thing the caller is allowed to do afterward is to potentially rele the
+ * vnd_dev_t if they have their own hold. Note that only the ioctl path has its
+ * own hold.
+ */
+static void
+vnd_dev_unlink(vnd_dev_t *vdp)
+{
+ char mname[2*VND_NAMELEN];
+
+ ASSERT(MUTEX_HELD(&vdp->vdd_lock));
+
+ (void) snprintf(mname, sizeof (mname), "z%d:%s",
+ vdp->vdd_nsd->vpnd_zid, vdp->vdd_lname);
+ ddi_remove_minor_node(vnd_dip, mname);
+ vdp->vdd_lname[0] = '\0';
+ vdp->vdd_flags &= ~VND_D_LINKED;
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * This rele corresponds to the reference that we took in
+ * vnd_ioctl_link.
+ */
+ vnd_dev_rele(vdp);
+}
+
+static int
+vnd_ioctl_unlink(vnd_dev_t *vdp, intptr_t arg, cred_t *credp, int cpflag)
+{
+ int ret;
+ zoneid_t zid;
+ vnd_ioc_unlink_t viu;
+
+ /* Not anyone can unlink something */
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ zid = crgetzoneid(credp);
+
+ if (ddi_copyin((void *)arg, &viu, sizeof (viu), cpflag) != 0)
+ return (EFAULT);
+
+ viu.viu_errno = VND_E_SUCCESS;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ viu.viu_errno = VND_E_NOTLINKED;
+ goto err;
+ }
+ VERIFY(vdp->vdd_flags & VND_D_ATTACHED);
+
+ if (zid != GLOBAL_ZONEID && zid != vdp->vdd_nsd->vpnd_zid) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ viu.viu_errno = VND_E_PERM;
+ goto err;
+ }
+
+ /* vnd_dev_unlink releases the vdp mutex for us */
+ vnd_dev_unlink(vdp);
+ ret = 0;
+err:
+ if (ddi_copyout(&viu, (void *)arg, sizeof (viu), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_setrxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ if (ddi_copyin((void *)arg, &vib, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ mutex_enter(&vnd_dev_lock);
+ if (vib.vib_size > vnd_vdq_hard_max) {
+ mutex_exit(&vnd_dev_lock);
+ vib.vib_errno = VND_E_BUFTOOBIG;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (vib.vib_size < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_BUFTOOSMALL;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_enter(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ vdp->vdd_str->vns_dq_read.vdq_max = vib.vib_size;
+ mutex_exit(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_getrxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ vib.vib_size = vdp->vdd_str->vns_dq_read.vdq_max;
+ mutex_exit(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_getmaxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vnd_dev_lock);
+ vib.vib_size = vnd_vdq_hard_max;
+ mutex_exit(&vnd_dev_lock);
+
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+vnd_ioctl_gettxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ vib.vib_size = vdp->vdd_str->vns_dq_write.vdq_max;
+ mutex_exit(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_settxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ if (ddi_copyin((void *)arg, &vib, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ mutex_enter(&vnd_dev_lock);
+ if (vib.vib_size > vnd_vdq_hard_max) {
+ mutex_exit(&vnd_dev_lock);
+ vib.vib_errno = VND_E_BUFTOOBIG;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (vib.vib_size < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_BUFTOOSMALL;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vdp->vdd_str->vns_lock);
+
+ mutex_enter(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ vdp->vdd_str->vns_dq_write.vdq_max = vib.vib_size;
+ mutex_exit(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_gettu(vnd_dev_t *vdp, intptr_t arg, int mode, boolean_t min)
+{
+ vnd_ioc_buf_t vib;
+
+ vib.vib_errno = 0;
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & VND_D_ATTACHED) {
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (min == B_TRUE)
+ vib.vib_size = vdp->vdd_str->vns_minwrite;
+ else
+ vib.vib_size = vdp->vdd_str->vns_maxwrite;
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ } else {
+ vib.vib_errno = VND_E_NOTATTACHED;
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+vnd_frameio_read(vnd_dev_t *vdp, intptr_t addr, int mode)
+{
+ int ret, nonblock, nwrite;
+ frameio_t *fio;
+ vnd_data_queue_t *vqp;
+ mblk_t *mp;
+
+ fio = frameio_alloc(KM_NOSLEEP | KM_NORMALPRI);
+ if (fio == NULL)
+ return (EAGAIN);
+
+ ret = frameio_hdr_copyin(fio, FRAMEIO_NVECS_MAX, (const void *)addr,
+ mode);
+ if (ret != 0) {
+ frameio_free(fio);
+ return (ret);
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ frameio_free(fio);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ nonblock = mode & (FNONBLOCK | FNDELAY);
+
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+
+ /* Check empty case */
+ if (vqp->vdq_cur == 0) {
+ if (nonblock != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EWOULDBLOCK);
+ }
+ while (vqp->vdq_cur == 0) {
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EINTR);
+ }
+ }
+ }
+
+ ret = frameio_mblk_chain_write(fio, MAP_BLK_FRAME, vqp->vdq_head,
+ &nwrite, mode & FKIOCTL);
+ if (ret != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (ret);
+ }
+
+ ret = frameio_hdr_copyout(fio, nwrite, (void *)addr, mode);
+ if (ret != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (ret);
+ }
+
+ while (nwrite > 0) {
+ (void) vnd_dq_pop(vqp, &mp);
+ freemsg(mp);
+ nwrite--;
+ }
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+
+ return (0);
+}
+
+static int
+vnd_frameio_write(vnd_dev_t *vdp, intptr_t addr, int mode)
+{
+ frameio_t *fio;
+ int ret, nonblock, nframes, i, nread;
+ size_t maxwrite, minwrite, total, flen;
+ mblk_t *mp_chain, *mp, *nmp;
+ vnd_data_queue_t *vqp;
+
+ fio = frameio_alloc(KM_NOSLEEP | KM_NORMALPRI);
+ if (fio == NULL)
+ return (EAGAIN);
+
+ ret = frameio_hdr_copyin(fio, FRAMEIO_NVECS_MAX, (void *)addr, mode);
+ if (ret != 0) {
+ frameio_free(fio);
+ return (ret);
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ frameio_free(fio);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ nonblock = mode & (FNONBLOCK | FNDELAY);
+
+ /*
+ * Make sure no single frame is larger than we can accept.
+ */
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ minwrite = vdp->vdd_str->vns_minwrite;
+ maxwrite = vdp->vdd_str->vns_maxwrite;
+ mutex_exit(&vdp->vdd_str->vns_lock);
+
+ nframes = fio->fio_nvpf / fio->fio_nvecs;
+ total = 0;
+ for (i = 0; i < nframes; i++) {
+ flen = frameio_frame_length(fio,
+ &fio->fio_vecs[i*fio->fio_nvpf]);
+ if (flen < minwrite || flen > maxwrite) {
+ frameio_free(fio);
+ return (ERANGE);
+ }
+ total += flen;
+ }
+
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ while (vnd_dq_reserve(vqp, total) == 0) {
+ if (nonblock != 0) {
+ frameio_free(fio);
+ mutex_exit(&vqp->vdq_lock);
+ return (EAGAIN);
+ }
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ /*
+ * We've reserved our space, let's copyin and go from here.
+ */
+ ret = frameio_mblk_chain_read(fio, &mp_chain, &nread, mode & FKIOCTL);
+ if (ret != 0) {
+ frameio_free(fio);
+ vnd_dq_unreserve(vqp, total);
+ cv_broadcast(&vqp->vdq_ready);
+ pollwakeup(&vdp->vdd_ph, POLLOUT);
+ return (ret);
+ }
+
+ for (mp = mp_chain; mp != NULL; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ gsqueue_enter_one(vdp->vdd_str->vns_squeue, mp,
+ vnd_squeue_tx_append, vdp->vdd_str, GSQUEUE_PROCESS,
+ VND_SQUEUE_TAG_VND_WRITE);
+ }
+
+ /*
+ * Update the frameio structure to indicate that we wrote those frames.
+ */
+ frameio_mark_consumed(fio, nread);
+ ret = frameio_hdr_copyout(fio, nread, (void *)addr, mode);
+ frameio_free(fio);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_list_copy_info(vnd_dev_t *vdp, vnd_ioc_info_t *arg, int mode)
+{
+ const char *link;
+ uint32_t vers = 1;
+ ASSERT(MUTEX_HELD(&vdp->vdd_lock));
+
+ /*
+ * Copy all of the members out to userland.
+ */
+ if (ddi_copyout(&vers, &arg->vii_version, sizeof (uint32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (vdp->vdd_flags & VND_D_LINKED)
+ link = vdp->vdd_lname;
+ else
+ link = "<anonymous>";
+ if (ddi_copyout(link, arg->vii_name, sizeof (arg->vii_name),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (ddi_copyout(vdp->vdd_datalink, arg->vii_datalink,
+ sizeof (arg->vii_datalink), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (ddi_copyout(&vdp->vdd_nsd->vpnd_zid, &arg->vii_zone,
+ sizeof (zoneid_t), mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+static int
+vnd_ioctl_list(intptr_t arg, cred_t *credp, int mode)
+{
+ vnd_ioc_list_t vl;
+ vnd_ioc_list32_t vl32;
+ zoneid_t zid;
+ vnd_dev_t *vdp;
+ vnd_ioc_info_t *vip;
+ int found, cancopy, ret;
+
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ if (ddi_copyin((void *)arg, &vl32, sizeof (vnd_ioc_list32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ vl.vl_nents = vl32.vl_nents;
+ vl.vl_actents = vl32.vl_actents;
+ vl.vl_ents = (void *)(uintptr_t)vl32.vl_ents;
+ } else {
+ if (ddi_copyin((void *)arg, &vl, sizeof (vnd_ioc_list_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+
+ cancopy = vl.vl_nents;
+ vip = vl.vl_ents;
+ found = 0;
+ zid = crgetzoneid(credp);
+ mutex_enter(&vnd_dev_lock);
+ for (vdp = list_head(&vnd_dev_list); vdp != NULL;
+ vdp = list_next(&vnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & VND_D_ATTACHED &&
+ !(vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING)) &&
+ (zid == GLOBAL_ZONEID || zid == vdp->vdd_nsd->vpnd_zid)) {
+ found++;
+ if (cancopy > 0) {
+ ret = vnd_ioctl_list_copy_info(vdp, vip, mode);
+ if (ret != 0) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ return (ret);
+ }
+ cancopy--;
+ vip++;
+ }
+ }
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ if (ddi_copyout(&found, &((vnd_ioc_list_t *)arg)->vl_actents,
+ sizeof (uint_t), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+
+static int
+vnd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ int ret;
+ minor_t m;
+ vnd_dev_t *vdp;
+
+ m = getminor(dev);
+ ASSERT(m != 0);
+
+ /*
+ * Make sure no one has come in on an ioctl from the strioc case.
+ */
+ if ((cmd & VND_STRIOC) == VND_STRIOC)
+ return (ENOTTY);
+
+ /*
+ * Like close, seems like if this minor isn't found, it's a programmer
+ * error somehow.
+ */
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENXIO);
+
+ switch (cmd) {
+ case VND_IOC_ATTACH:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_attach(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_LINK:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_link(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_UNLINK:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_unlink(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_GETRXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_getrxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_SETRXBUF:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_setrxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETTXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_SETTXBUF:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_settxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETMAXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ if (crgetzoneid(credp) != GLOBAL_ZONEID) {
+ ret = EPERM;
+ break;
+ }
+ ret = vnd_ioctl_getmaxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETMINTU:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettu(vdp, arg, mode, B_TRUE);
+ break;
+ case VND_IOC_GETMAXTU:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettu(vdp, arg, mode, B_FALSE);
+ break;
+ case VND_IOC_FRAMEIO_READ:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_frameio_read(vdp, arg, mode);
+ break;
+ case VND_IOC_FRAMEIO_WRITE:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_frameio_write(vdp, arg, mode);
+ break;
+ case VND_IOC_LIST:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_list(arg, credp, mode);
+ break;
+ default:
+ ret = ENOTTY;
+ break;
+ }
+
+ vnd_dev_rele(vdp);
+ return (ret);
+}
+
+static int
+vnd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ vnd_dev_t *vdp;
+ minor_t m;
+ zoneid_t zid;
+
+ if (flag & (FEXCL | FNDELAY))
+ return (ENOTSUP);
+
+ if (otyp & OTYP_BLK)
+ return (ENOTSUP);
+
+ zid = crgetzoneid(credp);
+ m = getminor(*devp);
+
+ /*
+ * If we have an open of a non-zero instance then we need to look that
+ * up in our list of entries.
+ */
+ if (m != 0) {
+
+ /*
+ * We don't check for rawaccess globally as a user could be
+ * doing a list ioctl on the control node which doesn't require
+ * this privilege.
+ */
+ if (secpolicy_net_rawaccess(credp) != 0)
+ return (EPERM);
+
+
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENOENT);
+
+ /*
+ * We need to check to make sure that the user is allowed to
+ * open this node. At this point it should be an attached handle
+ * as that's all we're allowed to access.
+ */
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if (zid != GLOBAL_ZONEID && zid != vdp->vdd_nsd->vpnd_zid) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if ((flag & FEXCL) && (vdp->vdd_flags & VND_D_OPENED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (EBUSY);
+ }
+
+ if (!(vdp->vdd_flags & VND_D_OPENED)) {
+ vdp->vdd_flags |= VND_D_OPENED;
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+
+ return (0);
+ }
+
+ if (flag & FEXCL)
+ return (ENOTSUP);
+
+ /*
+ * We need to clone ourselves and set up new a state.
+ */
+ vdp = kmem_cache_alloc(vnd_dev_cache, KM_SLEEP);
+ bzero(vdp, sizeof (vnd_dev_t));
+
+ if (ldi_ident_from_dev(*devp, &vdp->vdd_ldiid) != 0) {
+ kmem_cache_free(vnd_dev_cache, vdp);
+ return (EINVAL);
+ }
+
+ vdp->vdd_minor = id_alloc(vnd_minors);
+ mutex_init(&vdp->vdd_lock, NULL, MUTEX_DRIVER, NULL);
+ list_link_init(&vdp->vdd_link);
+ vdp->vdd_ref = 1;
+ *devp = makedevice(getmajor(*devp), vdp->vdd_minor);
+ vdp->vdd_devid = *devp;
+ DTRACE_VND_REFINC(vdp);
+ vdp->vdd_flags |= VND_D_OPENED;
+
+ mutex_enter(&vnd_dev_lock);
+ list_insert_head(&vnd_dev_list, vdp);
+ mutex_exit(&vnd_dev_lock);
+
+ return (0);
+}
+
+static int
+vnd_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ minor_t m;
+ vnd_dev_t *vdp;
+
+ m = getminor(dev);
+ if (m == 0)
+ return (ENXIO);
+
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_flags & VND_D_OPENED);
+ vdp->vdd_flags &= ~VND_D_OPENED;
+ mutex_exit(&vdp->vdd_lock);
+
+ /* Remove the hold from the previous open. */
+ vnd_dev_rele(vdp);
+
+ /* And now from lookup */
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static int
+vnd_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int nonblock, error = 0;
+ size_t mpsize;
+ vnd_dev_t *vdp;
+ vnd_data_queue_t *vqp;
+ mblk_t *mp = NULL;
+ offset_t u_loffset;
+
+ /*
+ * If we have more than one uio we refuse to do anything. That's for
+ * frameio.
+ */
+ if (uiop->uio_iovcnt > 1)
+ return (EINVAL);
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ nonblock = uiop->uio_fmode & (FNONBLOCK | FNDELAY);
+
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+
+ /* Check empty case */
+ if (vqp->vdq_cur == 0) {
+ if (nonblock != 0) {
+ error = EWOULDBLOCK;
+ goto err;
+ }
+ while (vqp->vdq_cur == 0) {
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ error = EINTR;
+ goto err;
+ }
+ }
+ }
+
+ /* Ensure our buffer is big enough */
+ mp = vqp->vdq_head;
+ ASSERT(mp != NULL);
+ mpsize = msgsize(mp);
+ if (mpsize > uiop->uio_resid) {
+ error = EOVERFLOW;
+ goto err;
+ }
+
+ u_loffset = uiop->uio_loffset;
+ while (mp != NULL) {
+ if (uiomove(mp->b_rptr, MBLKL(mp), UIO_READ, uiop) != 0) {
+ error = EFAULT;
+ uiop->uio_loffset = u_loffset;
+ mp = NULL;
+ goto err;
+ }
+ mpsize -= MBLKL(mp);
+ mp = mp->b_cont;
+ }
+ ASSERT(mpsize == 0);
+ (void) vnd_dq_pop(vqp, &mp);
+ freemsg(mp);
+err:
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+
+ return (error);
+}
+
+static int
+vnd_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int nonblock, error;
+ vnd_dev_t *vdp;
+ mblk_t *mp;
+ ssize_t iosize, origsize;
+ vnd_data_queue_t *vqp;
+
+ if (uiop->uio_iovcnt > 1)
+ return (EINVAL);
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ nonblock = uiop->uio_fmode & (FNONBLOCK | FNDELAY);
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (uiop->uio_resid > vdp->vdd_str->vns_maxwrite ||
+ uiop->uio_resid < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ vnd_dev_rele(vdp);
+ return (ERANGE);
+ }
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ VERIFY(vdp->vdd_str != NULL);
+
+ /*
+ * Reserve space in the data queue if we can. If we can't, block or
+ * return EAGAIN. If we can, go and squeue_enter.
+ */
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ while (vnd_dq_reserve(vqp, uiop->uio_resid) == 0) {
+ if (nonblock != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+ return (EAGAIN);
+ }
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ /*
+ * Now that we've reserved the space, try to allocate kernel space for
+ * and copy in the block. To take care of all this we use the
+ * strmakedata subroutine for now.
+ */
+ origsize = iosize = uiop->uio_resid;
+ error = strmakedata(&iosize, uiop, vdp->vdd_str->vns_wq->q_stream, 0,
+ &mp);
+
+ /*
+ * strmakedata() will return an error or it may only consume a portion
+ * of the data.
+ */
+ if (error != 0 || uiop->uio_resid != 0) {
+ vnd_dq_unreserve(vqp, origsize);
+ cv_broadcast(&vqp->vdq_ready);
+ pollwakeup(&vdp->vdd_ph, POLLOUT);
+ vnd_dev_rele(vdp);
+ return (ENOSR);
+ }
+
+ gsqueue_enter_one(vdp->vdd_str->vns_squeue, mp,
+ vnd_squeue_tx_append, vdp->vdd_str, GSQUEUE_PROCESS,
+ VND_SQUEUE_TAG_VND_WRITE);
+
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static int
+vnd_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int ready = 0;
+ vnd_dev_t *vdp;
+ vnd_data_queue_t *vqp;
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ if ((events & POLLIN) || (events & POLLRDNORM)) {
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_head != NULL)
+ ready |= events & (POLLIN | POLLRDNORM);
+ mutex_exit(&vqp->vdq_lock);
+ }
+
+ if (events & POLLOUT) {
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_cur != vqp->vdq_max)
+ ready |= POLLOUT;
+ mutex_exit(&vqp->vdq_lock);
+ }
+
+ if (ready != 0) {
+ *reventsp = ready;
+ vnd_dev_rele(vdp);
+ return (0);
+ }
+
+ *reventsp = 0;
+ if (!anyyet)
+ *phpp = &vdp->vdd_ph;
+
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static void *
+vnd_stack_init(netstackid_t stackid, netstack_t *ns)
+{
+ vnd_pnsd_t *nsp;
+
+ nsp = kmem_cache_alloc(vnd_pnsd_cache, KM_SLEEP);
+ bzero(nsp, sizeof (*nsp));
+ nsp->vpnd_nsid = stackid;
+ nsp->vpnd_zid = netstackid_to_zoneid(stackid);
+ nsp->vpnd_flags = 0;
+ mutex_init(&nsp->vpnd_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&nsp->vpnd_dev_list, sizeof (vnd_dev_t),
+ offsetof(vnd_dev_t, vdd_nslink));
+ if (vnd_netinfo_init(nsp) == 0)
+ nsp->vpnd_hooked = B_TRUE;
+
+ mutex_enter(&vnd_dev_lock);
+ list_insert_tail(&vnd_nsd_list, nsp);
+ mutex_exit(&vnd_dev_lock);
+
+ return (nsp);
+}
+
+static void
+vnd_stack_shutdown(netstackid_t stackid, void *arg)
+{
+ vnd_pnsd_t *nsp = arg;
+ vnd_dev_t *vdp;
+
+ ASSERT(nsp != NULL);
+ /*
+ * After shut down no one should be able to find their way to this
+ * netstack again.
+ */
+ mutex_enter(&vnd_dev_lock);
+ list_remove(&vnd_nsd_list, nsp);
+ mutex_exit(&vnd_dev_lock);
+
+ /*
+ * Make sure hooks know that they're going away.
+ */
+ if (nsp->vpnd_hooked == B_TRUE)
+ vnd_netinfo_shutdown(nsp);
+
+ /*
+ * Now we need to go through and notify each zone that they are in
+ * teardown phase. See the big theory statement section on vnd, zones,
+ * netstacks, and sdev for more information about this.
+ */
+ mutex_enter(&nsp->vpnd_lock);
+ nsp->vpnd_flags |= VND_NS_CONDEMNED;
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_CONDEMNED))
+ vdp->vdd_flags |= VND_D_ZONE_DYING;
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&nsp->vpnd_lock);
+
+ /*
+ * Next we remove all the links as we know nothing new can be added to
+ * the list and that none of the extent devices can obtain additional
+ * links.
+ */
+restart:
+ mutex_enter(&nsp->vpnd_lock);
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if ((vdp->vdd_flags & VND_D_CONDEMNED) ||
+ !(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ continue;
+ }
+
+ /*
+ * We drop our lock here and restart afterwards. Note that as
+ * part of unlinking we end up doing a rele of the vnd_dev_t. If
+ * this is the final hold on the vnd_dev_t then it might try and
+ * remove itself. Our locking rules requires not to be holding
+ * any locks when we call any of the rele functions.
+ *
+ * Note that the unlink function requires holders to call into
+ * it with the vnd_dev_t->vdd_lock held and will take care of it
+ * for us. Because we don't have a hold on it, we're done at
+ * this point.
+ */
+ mutex_exit(&nsp->vpnd_lock);
+ /* Forcibly unlink */
+ vnd_dev_unlink(vdp);
+ goto restart;
+ }
+ mutex_exit(&nsp->vpnd_lock);
+}
+
+static void
+vnd_stack_destroy(netstackid_t stackid, void *arg)
+{
+ vnd_pnsd_t *nsp = arg;
+
+ ASSERT(nsp != NULL);
+
+ /*
+ * Now that we've unlinked everything we just have to hang out for
+ * it to finish exiting. Now that it's no longer the kernel itself
+ * that's doing this we just need to wait for our reference count to
+ * equal zero and then we're free. If the global zone is holding open a
+ * reference to a vnd device for another zone, that's bad, but there's
+ * nothing much we can do. See the section on 'vnd, zones, netstacks' in
+ * the big theory statement for more information.
+ */
+ mutex_enter(&nsp->vpnd_lock);
+ while (nsp->vpnd_ref != 0)
+ cv_wait(&nsp->vpnd_ref_change, &nsp->vpnd_lock);
+ mutex_exit(&nsp->vpnd_lock);
+
+ /*
+ * During shutdown we removed ourselves from the list and now we have no
+ * more references so we can safely say that there is nothing left and
+ * destroy everything that we had sitting around.
+ */
+ if (nsp->vpnd_hooked == B_TRUE)
+ vnd_netinfo_fini(nsp);
+
+ mutex_destroy(&nsp->vpnd_lock);
+ list_destroy(&nsp->vpnd_dev_list);
+ kmem_cache_free(vnd_pnsd_cache, nsp);
+}
+
+/*
+ * Convert a node with a name of the form /dev/vnd/zone/%zonename and
+ * /dev/vnd/zone/%zonename/%linkname to the corresponding vnd netstack.
+ */
+static vnd_pnsd_t *
+vnd_sdev_ctx_to_ns(sdev_ctx_t ctx)
+{
+ enum vtype vt;
+ const char *path = sdev_ctx_path(ctx);
+ char *zstart, *dup;
+ size_t duplen;
+ vnd_pnsd_t *nsp;
+
+ vt = sdev_ctx_vtype(ctx);
+ ASSERT(strncmp(path, VND_SDEV_ZROOT, strlen(VND_SDEV_ZROOT)) == 0);
+
+ if (vt == VDIR) {
+ zstart = strrchr(path, '/');
+ ASSERT(zstart != NULL);
+ zstart++;
+ return (vnd_nsd_lookup_by_zonename(zstart));
+ }
+
+ ASSERT(vt == VCHR);
+
+ dup = strdup(path);
+ duplen = strlen(dup) + 1;
+ zstart = strrchr(dup, '/');
+ *zstart = '\0';
+ zstart--;
+ zstart = strrchr(dup, '/');
+ zstart++;
+ nsp = vnd_nsd_lookup_by_zonename(zstart);
+ kmem_free(dup, duplen);
+
+ return (nsp);
+}
+
+static sdev_plugin_validate_t
+vnd_sdev_validate_dir(sdev_ctx_t ctx)
+{
+ vnd_pnsd_t *nsp;
+
+ if (strcmp(sdev_ctx_path(ctx), VND_SDEV_ROOT) == 0)
+ return (SDEV_VTOR_VALID);
+
+ if (strcmp(sdev_ctx_path(ctx), VND_SDEV_ZROOT) == 0) {
+ ASSERT(getzoneid() == GLOBAL_ZONEID);
+ ASSERT(sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL);
+ return (SDEV_VTOR_VALID);
+ }
+
+ nsp = vnd_sdev_ctx_to_ns(ctx);
+ if (nsp == NULL)
+ return (SDEV_VTOR_INVALID);
+ vnd_nsd_rele(nsp);
+
+ return (SDEV_VTOR_VALID);
+}
+
+static sdev_plugin_validate_t
+vnd_sdev_validate(sdev_ctx_t ctx)
+{
+ enum vtype vt;
+ dev_t dev;
+ vnd_dev_t *vdp;
+
+ vt = sdev_ctx_vtype(ctx);
+ if (vt == VDIR)
+ return (vnd_sdev_validate_dir(ctx));
+ ASSERT(vt == VCHR);
+
+ if (strcmp("ctl", sdev_ctx_name(ctx)) == 0)
+ return (SDEV_VTOR_VALID);
+
+ dev = (uintptr_t)sdev_ctx_vtype_data(ctx);
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (SDEV_VTOR_STALE);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED) ||
+ (vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING))) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_STALE);
+ }
+
+ if (strcmp(sdev_ctx_name(ctx), vdp->vdd_lname) != 0) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_STALE);
+ }
+
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_VALID);
+}
+
+/*
+ * This function is a no-op. sdev never has holds on our devices as they can go
+ * away at any time and specfs has to deal with that fact.
+ */
+static void
+vnd_sdev_inactive(sdev_ctx_t ctx)
+{
+}
+
+static int
+vnd_sdev_fillzone(vnd_pnsd_t *nsp, sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_dev_t *vdp;
+
+ mutex_enter(&nsp->vpnd_lock);
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if ((vdp->vdd_flags & VND_D_LINKED) &&
+ !(vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING))) {
+ ret = sdev_plugin_mknod(ctx, vdp->vdd_lname, S_IFCHR,
+ vdp->vdd_devid);
+ if (ret != 0 && ret != EEXIST) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&nsp->vpnd_lock);
+ vnd_nsd_rele(nsp);
+ return (ret);
+ }
+ }
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&nsp->vpnd_lock);
+
+ return (0);
+}
+
+static int
+vnd_sdev_filldir_root(sdev_ctx_t ctx)
+{
+ zoneid_t zid;
+ vnd_pnsd_t *nsp;
+ int ret;
+
+ zid = getzoneid();
+ nsp = vnd_nsd_lookup(zoneid_to_netstackid(zid));
+ ASSERT(nsp != NULL);
+ ret = vnd_sdev_fillzone(nsp, ctx);
+ vnd_nsd_rele(nsp);
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * Checking the zone id is not sufficient as the global zone could be
+ * reaching down into a non-global zone's mounted /dev.
+ */
+ if (zid == GLOBAL_ZONEID && (sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL)) {
+ ret = sdev_plugin_mkdir(ctx, "zone");
+ if (ret != 0 && ret != EEXIST)
+ return (ret);
+ }
+
+ /*
+ * Always add a reference to the control node. There's no need to
+ * reference it since it always exists and is always what we clone from.
+ */
+ ret = sdev_plugin_mknod(ctx, "ctl", S_IFCHR,
+ makedevice(ddi_driver_major(vnd_dip), 0));
+ if (ret != 0 && ret != EEXIST)
+ return (ret);
+
+ return (0);
+}
+
+static int
+vnd_sdev_filldir_zroot(sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_pnsd_t *nsp;
+ zone_t *zonep;
+
+ ASSERT(getzoneid() == GLOBAL_ZONEID);
+ ASSERT(sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL);
+
+ mutex_enter(&vnd_dev_lock);
+ for (nsp = list_head(&vnd_nsd_list); nsp != NULL;
+ nsp = list_next(&vnd_nsd_list, nsp)) {
+ mutex_enter(&nsp->vpnd_lock);
+ if (list_is_empty(&nsp->vpnd_dev_list)) {
+ mutex_exit(&nsp->vpnd_lock);
+ continue;
+ }
+ mutex_exit(&nsp->vpnd_lock);
+ zonep = zone_find_by_id(nsp->vpnd_zid);
+ /*
+ * This zone must be being torn down, so skip it.
+ */
+ if (zonep == NULL)
+ continue;
+ ret = sdev_plugin_mkdir(ctx, zonep->zone_name);
+ zone_rele(zonep);
+ if (ret != 0 && ret != EEXIST) {
+ mutex_exit(&vnd_dev_lock);
+ return (ret);
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (0);
+}
+
+static int
+vnd_sdev_filldir(sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_pnsd_t *nsp;
+
+ ASSERT(sdev_ctx_vtype(ctx) == VDIR);
+ if (strcmp(VND_SDEV_ROOT, sdev_ctx_path(ctx)) == 0)
+ return (vnd_sdev_filldir_root(ctx));
+
+ if (strcmp(VND_SDEV_ZROOT, sdev_ctx_path(ctx)) == 0)
+ return (vnd_sdev_filldir_zroot(ctx));
+
+ ASSERT(strncmp(VND_SDEV_ZROOT, sdev_ctx_path(ctx),
+ strlen(VND_SDEV_ZROOT)) == 0);
+ nsp = vnd_sdev_ctx_to_ns(ctx);
+ if (nsp == NULL)
+ return (0);
+
+ ret = vnd_sdev_fillzone(nsp, ctx);
+ vnd_nsd_rele(nsp);
+
+ return (ret);
+}
+
+static sdev_plugin_ops_t vnd_sdev_ops = {
+ SDEV_PLUGIN_VERSION,
+ SDEV_PLUGIN_SUBDIR,
+ vnd_sdev_validate,
+ vnd_sdev_filldir,
+ vnd_sdev_inactive
+};
+
+static int
+vnd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int errp = 0;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ /*
+ * Only allow one instance.
+ */
+ if (vnd_dip != NULL)
+ return (DDI_FAILURE);
+
+ vnd_dip = dip;
+ if (ddi_create_minor_node(vnd_dip, "vnd", S_IFCHR, 0, DDI_PSEUDO, 0) !=
+ DDI_SUCCESS) {
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
+ DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
+ ddi_remove_minor_node(vnd_dip, NULL);
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ vnd_sdev_hdl = sdev_plugin_register(VND_SDEV_NAME, &vnd_sdev_ops,
+ &errp);
+ if (vnd_sdev_hdl == NULL) {
+ ddi_remove_minor_node(vnd_dip, NULL);
+ ddi_prop_remove_all(vnd_dip);
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ vnd_sqset = gsqueue_set_create(GSQUEUE_DEFAULT_WAIT,
+ GSQUEUE_DEFAULT_PRIORITY);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vnd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mutex_enter(&vnd_dev_lock);
+ if (!list_is_empty(&vnd_dev_list)) {
+ mutex_exit(&vnd_dev_lock);
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ return (DDI_FAILURE);
+}
+
+static int
+vnd_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ int error;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)vnd_dip;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+ return (error);
+}
+
+
+
+static void
+vnd_ddi_fini(void)
+{
+ netstack_unregister(NS_VND);
+ if (vnd_taskq != NULL)
+ taskq_destroy(vnd_taskq);
+ if (vnd_str_cache != NULL)
+ kmem_cache_destroy(vnd_str_cache);
+ if (vnd_dev_cache != NULL)
+ kmem_cache_destroy(vnd_dev_cache);
+ if (vnd_pnsd_cache != NULL)
+ kmem_cache_destroy(vnd_pnsd_cache);
+ if (vnd_minors != NULL)
+ id_space_destroy(vnd_minors);
+ if (vnd_list_init != 0) {
+ list_destroy(&vnd_nsd_list);
+ list_destroy(&vnd_dev_list);
+ mutex_destroy(&vnd_dev_lock);
+ vnd_list_init = 0;
+ }
+ frameio_fini();
+}
+
+static int
+vnd_ddi_init(void)
+{
+ if (frameio_init() != 0)
+ return (DDI_FAILURE);
+
+ vnd_str_cache = kmem_cache_create("vnd_str_cache", sizeof (vnd_str_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_str_cache == NULL) {
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+ vnd_dev_cache = kmem_cache_create("vnd_dev_cache", sizeof (vnd_dev_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_dev_cache == NULL) {
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+ vnd_pnsd_cache = kmem_cache_create("vnd_pnsd_cache",
+ sizeof (vnd_pnsd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_pnsd_cache == NULL) {
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ vnd_taskq = taskq_create_instance("vnd", -1, 1, minclsyspri, 0, 0, 0);
+ if (vnd_taskq == NULL) {
+ kmem_cache_destroy(vnd_pnsd_cache);
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ vnd_minors = id_space_create("vnd_minors", 1, INT32_MAX);
+ if (vnd_minors == NULL) {
+ taskq_destroy(vnd_taskq);
+ kmem_cache_destroy(vnd_pnsd_cache);
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ mutex_init(&vnd_dev_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&vnd_dev_list, sizeof (vnd_dev_t),
+ offsetof(vnd_dev_t, vdd_link));
+ list_create(&vnd_nsd_list, sizeof (vnd_pnsd_t),
+ offsetof(vnd_pnsd_t, vpnd_link));
+ vnd_list_init = 1;
+
+ netstack_register(NS_VND, vnd_stack_init, vnd_stack_shutdown,
+ vnd_stack_destroy);
+
+ return (DDI_SUCCESS);
+}
+
+static struct module_info vnd_minfo = {
+ 0, /* module id */
+ "vnd", /* module name */
+ 1, /* smallest packet size */
+ INFPSZ, /* largest packet size (infinite) */
+ 1, /* high watermark */
+ 0 /* low watermark */
+};
+
+static struct qinit vnd_r_qinit = {
+ vnd_s_rput,
+ NULL,
+ vnd_s_open,
+ vnd_s_close,
+ NULL,
+ &vnd_minfo,
+ NULL
+};
+
+static struct qinit vnd_w_qinit = {
+ vnd_s_wput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &vnd_minfo,
+ NULL
+};
+
+static struct streamtab vnd_strtab = {
+ &vnd_r_qinit,
+ &vnd_w_qinit,
+ NULL,
+ NULL
+};
+
+
+static struct cb_ops vnd_cb_ops = {
+ vnd_open, /* open */
+ vnd_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ vnd_read, /* read */
+ vnd_write, /* write */
+ vnd_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ vnd_chpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* streamtab */
+ D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops vnd_dev_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ vnd_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ vnd_attach, /* attach */
+ vnd_detach, /* detach */
+ nodev, /* reset */
+ &vnd_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed /* quiesce */
+};
+
+static struct modldrv vnd_modldrv = {
+ &mod_driverops,
+ "Virtual Networking Datapath Driver",
+ &vnd_dev_ops
+};
+
+static struct fmodsw vnd_fmodfsw = {
+ "vnd",
+ &vnd_strtab,
+ D_NEW | D_MP
+};
+
+static struct modlstrmod vnd_modlstrmod = {
+ &mod_strmodops,
+ "Virtual Networking Datapath Driver",
+ &vnd_fmodfsw
+};
+
+static struct modlinkage vnd_modlinkage = {
+ MODREV_1,
+ &vnd_modldrv,
+ &vnd_modlstrmod,
+ NULL
+};
+
+int
+_init(void)
+{
+ int error;
+
+ /*
+ * We need to do all of our global initialization in init as opposed to
+ * attach and detach. The problem here is that because vnd can be used
+ * from a stream context while being detached, we can not rely on having
+ * run attach to create everything, alas. so it goes in _init, just like
+ * our friend ip.
+ */
+ if ((error = vnd_ddi_init()) != DDI_SUCCESS)
+ return (error);
+ error = mod_install((&vnd_modlinkage));
+ if (error != 0)
+ vnd_ddi_fini();
+ return (error);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&vnd_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ error = mod_remove(&vnd_modlinkage);
+ if (error == 0)
+ vnd_ddi_fini();
+ return (error);
+}
diff --git a/usr/src/uts/common/io/vnd/vnd.conf b/usr/src/uts/common/io/vnd/vnd.conf
new file mode 100644
index 0000000000..65872e1ddf
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/vnd.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+name="vnd" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c
index 4615f00e51..c183a9c4f5 100644
--- a/usr/src/uts/common/io/vnic/vnic_dev.c
+++ b/usr/src/uts/common/io/vnic/vnic_dev.c
@@ -52,6 +52,7 @@
#include <sys/vlan.h>
#include <sys/vnic.h>
#include <sys/vnic_impl.h>
+#include <sys/mac_impl.h>
#include <sys/mac_flow_impl.h>
#include <inet/ip_impl.h>
@@ -1059,6 +1060,18 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
err = mac_maxsdu_update(vn->vn_mh, mtu);
break;
}
+ case MAC_PROP_VN_PROMISC_FILTERED: {
+ boolean_t filtered;
+
+ if (pr_valsize < sizeof (filtered)) {
+ err = EINVAL;
+ break;
+ }
+
+ bcopy(pr_val, &filtered, sizeof (filtered));
+ mac_set_promisc_filtered(vn->vn_mch, filtered);
+ break;
+ }
case MAC_PROP_SECONDARY_ADDRS: {
mac_secondary_addr_t msa;
@@ -1080,8 +1093,14 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
{
vnic_t *vn = arg;
int ret = 0;
+ boolean_t out;
switch (pr_num) {
+ case MAC_PROP_VN_PROMISC_FILTERED:
+ out = mac_get_promisc_filtered(vn->vn_mch);
+ ASSERT(pr_valsize >= sizeof (boolean_t));
+ bcopy(&out, pr_val, sizeof (boolean_t));
+ break;
case MAC_PROP_SECONDARY_ADDRS:
ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val);
break;
diff --git a/usr/src/uts/common/krtld/bootrd.c b/usr/src/uts/common/krtld/bootrd.c
index 08e5d98c09..35ad67da96 100644
--- a/usr/src/uts/common/krtld/bootrd.c
+++ b/usr/src/uts/common/krtld/bootrd.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
@@ -36,17 +37,95 @@
extern void (*_kobj_printf)(void *, const char *fmt, ...);
extern int get_weakish_int(int *);
extern struct bootops *ops;
-extern struct boot_fs_ops bufs_ops, bhsfs_ops;
+extern struct boot_fs_ops bufs_ops, bhsfs_ops, bbootfs_ops;
extern int kmem_ready;
static uint64_t rd_start, rd_end;
struct boot_fs_ops *bfs_ops;
-struct boot_fs_ops *bfs_tab[] = {&bufs_ops, &bhsfs_ops, NULL};
+struct boot_fs_ops *bfs_tab[] = {&bufs_ops, &bhsfs_ops, &bbootfs_ops, NULL};
static uintptr_t scratch_max = 0;
#define _kmem_ready get_weakish_int(&kmem_ready)
+int
+BRD_MOUNTROOT(struct boot_fs_ops *ops, char *str)
+{
+ return (ops->fsw_mountroot(str));
+}
+
+int
+BRD_UNMOUNTROOT(struct boot_fs_ops *ops)
+{
+ if (bfs_ops != &bbootfs_ops)
+ bbootfs_ops.fsw_closeall(1);
+
+ return (ops->fsw_unmountroot());
+}
+
+int
+BRD_OPEN(struct boot_fs_ops *ops, char *file, int flags)
+{
+ int len = strlen(SYSTEM_BOOT_PATH);
+ int fd;
+
+ /*
+ * Our policy is that we try bootfs first. If bootfs is the only
+ * filesystem, that's the end of it. Otherwise we will fall back to
+ * the normal root (i.e., ramdisk) filesystem at this point and try
+ * again if the file does not exist in bootfs.
+ */
+ fd = bbootfs_ops.fsw_open(file, flags);
+
+ if (bfs_ops == &bbootfs_ops)
+ return (fd);
+
+ if (strncmp(file, SYSTEM_BOOT_PATH, len) == 0 || fd >= 0)
+ return ((fd < 0) ? fd : (fd | BFD_F_SYSTEM_BOOT));
+
+ return (ops->fsw_open(file, flags));
+}
+
+int
+BRD_CLOSE(struct boot_fs_ops *ops, int fd)
+{
+ if (fd & BFD_F_SYSTEM_BOOT)
+ return (bbootfs_ops.fsw_close(fd & ~BFD_F_SYSTEM_BOOT));
+
+ return (ops->fsw_close(fd));
+}
+
+ssize_t
+BRD_READ(struct boot_fs_ops *ops, int fd, caddr_t buf, size_t len)
+{
+ if (fd & BFD_F_SYSTEM_BOOT) {
+ return (bbootfs_ops.fsw_read(fd & ~BFD_F_SYSTEM_BOOT,
+ buf, len));
+ }
+
+ return (ops->fsw_read(fd, buf, len));
+}
+
+off_t
+BRD_SEEK(struct boot_fs_ops *ops, int fd, off_t addr, int whence)
+{
+ if (fd & BFD_F_SYSTEM_BOOT) {
+ return (bbootfs_ops.fsw_lseek(fd & ~BFD_F_SYSTEM_BOOT,
+ addr, whence));
+ }
+
+ return (ops->fsw_lseek(fd, addr, whence));
+}
+
+int
+BRD_FSTAT(struct boot_fs_ops *ops, int fd, struct bootstat *bsp)
+{
+ if (fd & BFD_F_SYSTEM_BOOT)
+ return (bbootfs_ops.fsw_fstat(fd & ~BFD_F_SYSTEM_BOOT, bsp));
+
+ return (ops->fsw_fstat(fd, bsp));
+}
+
/*
* This one reads the ramdisk. If fi_memp is set, we copy the
* ramdisk content to the designated buffer. Otherwise, we
diff --git a/usr/src/uts/common/os/bio.c b/usr/src/uts/common/os/bio.c
index 96502b8230..b8d2e29058 100644
--- a/usr/src/uts/common/os/bio.c
+++ b/usr/src/uts/common/os/bio.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -1320,6 +1321,9 @@ pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
cpup = CPU; /* get pointer AFTER preemption is disabled */
CPU_STATS_ADDQ(cpup, vm, pgin, 1);
CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len));
+
+ atomic_add_64(&curzone->zone_pgpgin, btopr(len));
+
if ((flags & B_ASYNC) == 0) {
klwp_t *lwp = ttolwp(curthread);
if (lwp != NULL)
@@ -1336,13 +1340,19 @@ pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
if (pp != NULL && pp->p_vnode != NULL) {
if (IS_SWAPFSVP(pp->p_vnode)) {
CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len));
+ atomic_add_64(&curzone->zone_anonpgin,
+ btopr(len));
} else {
if (pp->p_vnode->v_flag & VVMEXEC) {
CPU_STATS_ADDQ(cpup, vm, execpgin,
btopr(len));
+ atomic_add_64(&curzone->zone_execpgin,
+ btopr(len));
} else {
CPU_STATS_ADDQ(cpup, vm, fspgin,
btopr(len));
+ atomic_add_64(&curzone->zone_fspgin,
+ btopr(len));
}
}
}
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
index eb8c6e730a..078298c517 100644
--- a/usr/src/uts/common/os/brand.c
+++ b/usr/src/uts/common/os/brand.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/kmem.h>
@@ -45,7 +46,7 @@ struct brand_mach_ops native_mach_ops = {
};
#else /* !__sparcv9 */
struct brand_mach_ops native_mach_ops = {
- NULL, NULL, NULL, NULL
+ NULL, NULL, NULL, NULL, NULL, NULL
};
#endif /* !__sparcv9 */
@@ -53,7 +54,8 @@ brand_t native_brand = {
BRAND_VER_1,
"native",
NULL,
- &native_mach_ops
+ &native_mach_ops,
+ 0,
};
/*
@@ -602,14 +604,14 @@ int
brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
- char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
+ char *brandlib, char *brandlib32)
{
vnode_t *nvp;
Ehdr ehdr;
Addr uphdr_vaddr;
intptr_t voffset;
- int interp;
+ char *interp;
int i, err;
struct execenv env;
struct execenv origenv;
@@ -619,7 +621,6 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
klwp_t *lwp = ttolwp(curthread);
brand_proc_data_t *spd;
brand_elf_data_t sed, *sedp;
- char *linker;
uintptr_t lddata; /* lddata of executable's linker */
ASSERT(curproc->p_brand == pbrand);
@@ -636,12 +637,10 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
*/
if (args->to_model == DATAMODEL_NATIVE) {
args->emulator = brandlib;
- linker = brandlinker;
}
#if defined(_LP64)
else {
args->emulator = brandlib32;
- linker = brandlinker32;
}
#endif /* _LP64 */
@@ -725,7 +724,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
if (args->to_model == DATAMODEL_NATIVE) {
err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
&voffset, exec_file, &interp, &env.ex_bssbase,
- &env.ex_brkbase, &env.ex_brksize, NULL);
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
}
#if defined(_LP64)
else {
@@ -733,7 +732,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
Elf32_Addr uphdr_vaddr32;
err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
&voffset, exec_file, &interp, &env.ex_bssbase,
- &env.ex_brkbase, &env.ex_brksize, NULL);
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
Ehdr32to64(&ehdr32, &ehdr);
if (uphdr_vaddr32 == (Elf32_Addr)-1)
@@ -744,6 +743,10 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
#endif /* _LP64 */
if (err != 0) {
restoreexecenv(&origenv, &orig_sigaltstack);
+
+ if (interp != NULL)
+ kmem_free(interp, MAXPATHLEN);
+
return (err);
}
@@ -761,7 +764,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
sedp->sed_phent = ehdr.e_phentsize;
sedp->sed_phnum = ehdr.e_phnum;
- if (interp) {
+ if (interp != NULL) {
if (ehdr.e_type == ET_DYN) {
/*
* This is a shared object executable, so we
@@ -777,16 +780,20 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
* it in and store relevant information about it in the
* aux vector, where the brand library can find it.
*/
- if ((err = lookupname(linker, UIO_SYSSPACE,
+ if ((err = lookupname(interp, UIO_SYSSPACE,
FOLLOW, NULLVPP, &nvp)) != 0) {
- uprintf("%s: not found.", brandlinker);
+ uprintf("%s: not found.", interp);
restoreexecenv(&origenv, &orig_sigaltstack);
+ kmem_free(interp, MAXPATHLEN);
return (err);
}
+
+ kmem_free(interp, MAXPATHLEN);
+
if (args->to_model == DATAMODEL_NATIVE) {
err = mapexec_brand(nvp, args, &ehdr,
&uphdr_vaddr, &voffset, exec_file, &interp,
- NULL, NULL, NULL, &lddata);
+ NULL, NULL, NULL, &lddata, NULL);
}
#if defined(_LP64)
else {
@@ -794,7 +801,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
Elf32_Addr uphdr_vaddr32;
err = mapexec32_brand(nvp, args, &ehdr32,
&uphdr_vaddr32, &voffset, exec_file, &interp,
- NULL, NULL, NULL, &lddata);
+ NULL, NULL, NULL, &lddata, NULL);
Ehdr32to64(&ehdr32, &ehdr);
if (uphdr_vaddr32 == (Elf32_Addr)-1)
@@ -934,9 +941,9 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
/*
* Third, the /proc aux vectors set up by elfexec() point to
- * brand emulation library and it's linker. Copy these to the
+ * brand emulation library and its linker. Copy these to the
* /proc brand specific aux vector, and update the regular
- * /proc aux vectors to point to the executable (and it's
+ * /proc aux vectors to point to the executable (and its
* linker). This will enable debuggers to access the
* executable via the usual /proc or elf notes aux vectors.
*
diff --git a/usr/src/uts/common/os/core.c b/usr/src/uts/common/os/core.c
index d4dddbe477..3ca17e1f17 100644
--- a/usr/src/uts/common/os/core.c
+++ b/usr/src/uts/common/os/core.c
@@ -64,6 +64,7 @@
#include <sys/contract/process_impl.h>
#include <sys/ddi.h>
+extern int yield(void);
/*
* Processes running within a zone potentially dump core in 3 locations,
* based on the per-process, per-zone, and the global zone's core settings.
diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c
index 733fd03a92..b0098946b3 100644
--- a/usr/src/uts/common/os/cred.c
+++ b/usr/src/uts/common/os/cred.c
@@ -727,6 +727,14 @@ crgetzoneid(const cred_t *cr)
cr->cr_zone->zone_id);
}
+zoneid_t
+crgetzonedid(const cred_t *cr)
+{
+ return (cr->cr_zone == NULL ?
+ (cr->cr_uid == -1 ? (zoneid_t)-1 : GLOBAL_ZONEID) :
+ cr->cr_zone->zone_did);
+}
+
projid_t
crgetprojid(const cred_t *cr)
{
diff --git a/usr/src/uts/common/os/ddi_intr_irm.c b/usr/src/uts/common/os/ddi_intr_irm.c
index c3c0481e7f..a4b35dcb5b 100644
--- a/usr/src/uts/common/os/ddi_intr_irm.c
+++ b/usr/src/uts/common/os/ddi_intr_irm.c
@@ -1320,7 +1320,7 @@ i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
/* Log callback errors */
if (ret != DDI_SUCCESS) {
- cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
+ cmn_err(CE_WARN, "!%s%d: failed callback (action=%d, ret=%d)\n",
ddi_driver_name(req_p->ireq_dip),
ddi_get_instance(req_p->ireq_dip), (int)action, ret);
}
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 994c034fe0..e896bbd1e5 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -97,6 +97,7 @@ uint_t auxv_hwcap32_2 = 0; /* 32-bit version of auxv_hwcap2 */
#endif
#define PSUIDFLAGS (SNOCD|SUGID)
+#define RANDOM_LEN 16 /* 16 bytes for AT_RANDOM aux entry */
/*
* exece() - system call wrapper around exec_common()
@@ -858,8 +859,22 @@ gexec(
if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
args->traceinval = 1;
}
- if (pp->p_proc_flag & P_PR_PTRACE)
- psignal(pp, SIGTRAP);
+
+ /*
+ * If legacy ptrace is enabled, defer to the brand as to the
+ * behavior as to the SIGTRAP generated during exec(). (If
+ * we're not branded or the brand isn't interested in changing
+ * the default behavior, we generate the SIGTRAP.)
+ */
+ if (pp->p_proc_flag & P_PR_PTRACE) {
+ if (PROC_IS_BRANDED(pp) &&
+ BROP(pp)->b_ptrace_exectrap != NULL) {
+ BROP(pp)->b_ptrace_exectrap(pp);
+ } else {
+ psignal(pp, SIGTRAP);
+ }
+ }
+
if (args->traceinval)
prinvalidate(&pp->p_user);
}
@@ -1498,6 +1513,27 @@ stk_add(uarg_t *args, const char *sp, enum uio_seg segflg)
return (0);
}
+/*
+ * Add a fixed size byte array to the stack (only from kernel space).
+ */
+static int
+stk_byte_add(uarg_t *args, const uint8_t *sp, size_t len)
+{
+ int error;
+
+ if (STK_AVAIL(args) < sizeof (int))
+ return (E2BIG);
+ *--args->stk_offp = args->stk_strp - args->stk_base;
+
+ if (len > STK_AVAIL(args))
+ return (E2BIG);
+ bcopy(sp, args->stk_strp, len);
+
+ args->stk_strp += len;
+
+ return (0);
+}
+
static int
stk_getptr(uarg_t *args, char *src, char **dst)
{
@@ -1534,6 +1570,9 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
size_t size, pad;
char *argv = (char *)uap->argp;
char *envp = (char *)uap->envp;
+ uint32_t rvals;
+ uint8_t *rtp;
+ uint8_t rdata[RANDOM_LEN];
/*
* Copy interpreter's name and argument to argv[0] and argv[1].
@@ -1603,8 +1642,9 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
args->ne = args->na - argc;
/*
- * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
- * AT_SUN_EMULATOR strings to the stack.
+ * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME,
+ * AT_SUN_BRAND_NROOT, and AT_SUN_EMULATOR strings, as well as AT_RANDOM
+ * array, to the stack.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
@@ -1617,6 +1657,33 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
if (args->emulator != NULL &&
(error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
return (error);
+
+ /*
+ * For the AT_RANDOM aux vector we provide 16 bytes of random
+ * data. However, we don't want to depend on
+ * kcf_rnd_get_pseudo_bytes for early processes, so just jumble
+ * some bits from hrtime to get some (pseudo)randomness.
+ */
+ rvals = (uint32_t)gethrtime();
+ rtp = (uint8_t *)&rvals;
+ rdata[0] = rdata[11] = *rtp++;
+ rdata[1] = rdata[15] = *rtp++;
+ rdata[2] = rdata[9] = *rtp++;
+ rdata[3] = rdata[12] = *rtp;
+
+ rtp = (uint8_t *)&rvals;
+ rdata[4] = rdata[8] = ~(*rtp++);
+ rdata[5] = rdata[14] = ~(*rtp++);
+ rdata[6] = rdata[13] = ~(*rtp++);
+ rdata[7] = rdata[10] = ~(*rtp);
+
+ if ((error = stk_byte_add(args, rdata, sizeof (rdata))) != 0)
+ return (error);
+
+ if (args->brand_nroot != NULL &&
+ (error = stk_add(args, args->brand_nroot,
+ UIO_SYSSPACE)) != 0)
+ return (error);
}
/*
@@ -1723,7 +1790,7 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
/*
* Fill in the aux vector now that we know the user stack addresses
* for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
- * AT_SUN_EMULATOR strings.
+ * AT_SUN_EMULATOR strings, as well as the AT_RANDOM array.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if (args->to_model == DATAMODEL_NATIVE) {
@@ -1736,6 +1803,11 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
if (args->emulator != NULL)
ADDAUX(*a,
AT_SUN_EMULATOR, (long)&ustrp[*--offp])
+ ADDAUX(*a, AT_RANDOM, (long)&ustrp[*--offp])
+ if (args->brand_nroot != NULL) {
+ ADDAUX(*a,
+ AT_SUN_BRAND_NROOT, (long)&ustrp[*--offp])
+ }
} else {
auxv32_t **a = (auxv32_t **)auxvpp;
ADDAUX(*a,
@@ -1748,6 +1820,11 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
if (args->emulator != NULL)
ADDAUX(*a, AT_SUN_EMULATOR,
(int)(uintptr_t)&ustrp[*--offp])
+ ADDAUX(*a, AT_RANDOM, (int)(uintptr_t)&ustrp[*--offp])
+ if (args->brand_nroot != NULL) {
+ ADDAUX(*a, AT_SUN_BRAND_NROOT,
+ (int)(uintptr_t)&ustrp[*--offp])
+ }
}
}
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index c5d54b5978..15087ae0f7 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -345,6 +345,8 @@ proc_exit(int why, int what)
refstr_t *cwd;
hrtime_t hrutime, hrstime;
int evaporate;
+ brand_t *orig_brand = NULL;
+ void *brand_data = NULL;
/*
* Stop and discard the process's lwps except for the current one,
@@ -370,14 +372,22 @@ proc_exit(int why, int what)
DTRACE_PROC1(exit, int, why);
/*
- * Will perform any brand specific proc exit processing, since this
- * is always the last lwp, will also perform lwp_exit and free brand
- * data
+ * Will perform any brand specific proc exit processing. Since this
+ * is always the last lwp, will also perform lwp_exit and free
+ * brand_data, except in the case that the brand has a b_exit_with_sig
+ * handler. In this case we free the brand_data later within this
+ * function.
*/
+ mutex_enter(&p->p_lock);
if (PROC_IS_BRANDED(p)) {
+ orig_brand = p->p_brand;
+ if (p->p_brand_data != NULL && orig_brand->b_data_size > 0) {
+ brand_data = p->p_brand_data;
+ }
lwp_detach_brand_hdlrs(lwp);
brand_clearbrand(p, B_FALSE);
}
+ mutex_exit(&p->p_lock);
/*
* Don't let init exit unless zone_start_init() failed its exec, or
@@ -393,10 +403,10 @@ proc_exit(int why, int what)
if (z->zone_restart_init == B_TRUE) {
if (restart_init(what, why) == 0)
return (0);
- } else {
- (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
- CRED());
}
+
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, CRED());
}
/*
@@ -839,8 +849,60 @@ proc_exit(int why, int what)
mutex_exit(&p->p_lock);
if (!evaporate) {
- p->p_pidflag &= ~CLDPEND;
- sigcld(p, sqp);
+ /*
+ * The brand specific code only happens when the brand has a
+ * function to call in place of sigcld, the data itself still
+ * existed, and the parent of the exiting process is not the
+ * global zone init. If the parent is the global zone init,
+ * then the process was reparented, and we don't want brand
+ * code delivering possibly strange signals to init. Also, init
+ * is not branded, so any brand specific exit data will not be
+ * picked up by init anyway.
+ * It is assumed by this code that any brand where
+ * b_exit_with_sig == NULL, will free its own brand_data rather
+ * than letting this piece of code free it.
+ */
+ if (orig_brand != NULL &&
+ orig_brand->b_ops->b_exit_with_sig != NULL &&
+ brand_data != NULL && p->p_ppid != 1) {
+ /*
+ * The code for _fini that could unload the brand_t
+ * blocks until the count of zones using the module
+ * reaches zero. Zones decrement the refcount on their
+ * brands only after all user tasks in that zone have
+ * exited and been waited on. The decrement on the
+ * brand's refcount happen in zone_destroy(). That
+ * depends on zone_shutdown() having been completed.
+ * zone_shutdown() includes a call to zone_empty(),
+ * where the zone waits for itself to reach the state
+ * ZONE_IS_EMPTY. This state is only set in either
+ * zone_shutdown(), when there are no user processes as
+ * the zone enters this function, or in
+ * zone_task_rele(). zone_task_rele() is called from
+ * code triggered by waiting on processes, not by the
+ * processes exiting through proc_exit(). This means
+ * all the branded processes that could exist for a
+ * specific brand_t must exit and get reaped before the
+ * refcount on the brand_t can reach 0. _fini will
+ * never unload the corresponding brand module before
+ * proc_exit finishes execution for all processes
+ * branded with a particular brand_t, which makes the
+ * operation below safe to do. Brands that wish to use
+ * this mechanism must wait in _fini as described
+ * above.
+ */
+ orig_brand->b_ops->b_exit_with_sig(p,
+ sqp, brand_data);
+ } else {
+ p->p_pidflag &= ~CLDPEND;
+ sigcld(p, sqp);
+ }
+ if (brand_data != NULL) {
+ kmem_free(brand_data, orig_brand->b_data_size);
+ brand_data = NULL;
+ orig_brand = NULL;
+ }
+
} else {
/*
* Do what sigcld() would do if the disposition
@@ -950,7 +1012,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
pp = ttoproc(curthread);
/*
- * lock parent mutex so that sibling chain can be searched.
+ * Anytime you are looking for a process, you take pidlock to prevent
+ * things from changing as you look.
*/
mutex_enter(&pidlock);
@@ -981,6 +1044,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
@@ -1031,6 +1099,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
case CLD_TRAPPED:
diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c
index f5e92cfd94..0c4c0bcad6 100644
--- a/usr/src/uts/common/os/grow.c
+++ b/usr/src/uts/common/os/grow.c
@@ -19,7 +19,10 @@
* CDDL HEADER END
*/
-/* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
@@ -52,6 +55,7 @@
#include <sys/fcntl.h>
#include <sys/lwpchan_impl.h>
#include <sys/nbmlock.h>
+#include <sys/brand.h>
#include <vm/hat.h>
#include <vm/as.h>
@@ -522,6 +526,20 @@ choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
return (0);
}
+caddr_t
+map_userlimit(proc_t *pp, struct as *as, int flags)
+{
+ if (flags & _MAP_LOW32) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_map32limit != NULL) {
+ return ((caddr_t)(uintptr_t)BROP(pp)->b_map32limit(pp));
+ } else {
+ return ((caddr_t)_userlimit32);
+ }
+ }
+
+ return (as->a_userlimit);
+}
+
/*
* Used for MAP_ANON - fast way to get anonymous pages
@@ -537,8 +555,6 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
return (EACCES);
if ((flags & MAP_FIXED) != 0) {
- caddr_t userlimit;
-
/*
* Use the user address. First verify that
* the address to be used is page aligned.
@@ -547,9 +563,8 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
return (EINVAL);
- userlimit = flags & _MAP_LOW32 ?
- (caddr_t)USERLIMIT32 : as->a_userlimit;
- switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
+ switch (valid_usr_range(*addrp, len, uprot, as,
+ map_userlimit(as->a_proc, as, flags))) {
case RANGE_OKAY:
break;
case RANGE_BADPROT:
@@ -717,8 +732,6 @@ smmap_common(caddr_t *addrp, size_t len,
* If the user specified an address, do some simple checks here
*/
if ((flags & MAP_FIXED) != 0) {
- caddr_t userlimit;
-
/*
* Use the user address. First verify that
* the address to be used is page aligned.
@@ -726,10 +739,8 @@ smmap_common(caddr_t *addrp, size_t len,
*/
if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
return (EINVAL);
-
- userlimit = flags & _MAP_LOW32 ?
- (caddr_t)USERLIMIT32 : as->a_userlimit;
- switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
+ switch (valid_usr_range(*addrp, len, uprot, as,
+ map_userlimit(curproc, as, flags))) {
case RANGE_OKAY:
break;
case RANGE_BADPROT:
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index 149f5f8a88..86e9045887 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013 Gary Mills
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -249,8 +250,7 @@ log_init(void)
*/
printf("\rSunOS Release %s Version %s %u-bit\n",
utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *));
- printf("Copyright (c) 1983, 2010, Oracle and/or its affiliates. "
- "All rights reserved.\n");
+ printf("Copyright (c) 2010-2014, Joyent Inc. All rights reserved.\n");
#ifdef DEBUG
printf("DEBUG enabled\n");
#endif
diff --git a/usr/src/uts/common/os/msacct.c b/usr/src/uts/common/os/msacct.c
index 928c6b3bb4..66994321f7 100644
--- a/usr/src/uts/common/os/msacct.c
+++ b/usr/src/uts/common/os/msacct.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -566,27 +567,18 @@ cpu_update_pct(kthread_t *t, hrtime_t newtime)
*/
do {
- if (T_ONPROC(t) && t->t_waitrq == 0) {
- hrlb = t->t_hrtime;
+ pctcpu = t->t_pctcpu;
+ hrlb = t->t_hrtime;
+ delta = newtime - hrlb;
+ if (delta < 0) {
+ newtime = gethrtime_unscaled();
delta = newtime - hrlb;
- if (delta < 0) {
- newtime = gethrtime_unscaled();
- delta = newtime - hrlb;
- }
- t->t_hrtime = newtime;
- scalehrtime(&delta);
- pctcpu = t->t_pctcpu;
+ }
+ t->t_hrtime = newtime;
+ scalehrtime(&delta);
+ if (T_ONPROC(t) && t->t_waitrq == 0) {
npctcpu = cpu_grow(pctcpu, delta);
} else {
- hrlb = t->t_hrtime;
- delta = newtime - hrlb;
- if (delta < 0) {
- newtime = gethrtime_unscaled();
- delta = newtime - hrlb;
- }
- t->t_hrtime = newtime;
- scalehrtime(&delta);
- pctcpu = t->t_pctcpu;
npctcpu = cpu_decay(pctcpu, delta);
}
} while (atomic_cas_32(&t->t_pctcpu, pctcpu, npctcpu) != pctcpu);
diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c
index b8467fbe13..5c556bffef 100644
--- a/usr/src/uts/common/os/netstack.c
+++ b/usr/src/uts/common/os/netstack.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -220,10 +221,37 @@ netstack_unregister(int moduleid)
* instances can use this module.
*/
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
+ boolean_t created = B_FALSE;
nm_state_t *nms = &ns->netstack_m_state[moduleid];
mutex_enter(&ns->netstack_lock);
- if (ns_reg[moduleid].nr_shutdown != NULL &&
+
+ /*
+ * We need to be careful here. We could actually have a netstack
+ * being created as we speak waiting for us to let go of this
+ * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
+ * have gotten to the point of completing it yet. If
+ * NSS_CREATE_NEEDED, we can safely just remove it here and
+ * never create the module. However, if NSS_CREATE_INPROGRESS is
+ * set, we need to still flag this module for shutdown and
+ * deletion, just as though it had reached NSS_CREATE_COMPLETED.
+ *
+ * It is safe to do that because of two different guarantees
+ * that exist in the system. The first is that before we do a
+ * create, shutdown, or destroy, we ensure that nothing else is
+ * in progress in the system for this netstack and wait for it
+ * to complete. Secondly, because the zone is being created, we
+ * know that the following call to apply_all_netstack will block
+ * on the zone finishing its initialization.
+ */
+ if (nms->nms_flags & NSS_CREATE_NEEDED)
+ nms->nms_flags &= ~NSS_CREATE_NEEDED;
+
+ if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
+ nms->nms_flags & NSS_CREATE_COMPLETED)
+ created = B_TRUE;
+
+ if (ns_reg[moduleid].nr_shutdown != NULL && created &&
(nms->nms_flags & NSS_CREATE_COMPLETED) &&
(nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
@@ -231,8 +259,7 @@ netstack_unregister(int moduleid)
netstack_t *, ns, int, moduleid);
}
if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
- ns_reg[moduleid].nr_destroy != NULL &&
- (nms->nms_flags & NSS_CREATE_COMPLETED) &&
+ ns_reg[moduleid].nr_destroy != NULL && created &&
(nms->nms_flags & NSS_DESTROY_ALL) == 0) {
nms->nms_flags |= NSS_DESTROY_NEEDED;
DTRACE_PROBE2(netstack__destroy__needed,
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index b555bb82b7..39db5cb27d 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -112,6 +112,18 @@ pid_lookup(pid_t pid)
return (pidp);
}
+struct pid *
+pid_find(pid_t pid)
+{
+ struct pid *pidp;
+
+ mutex_enter(&pidlinklock);
+ pidp = pid_lookup(pid);
+ mutex_exit(&pidlinklock);
+
+ return (pidp);
+}
+
void
pid_setmin(void)
{
diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c
index 05deaaf43b..40db66fb8d 100644
--- a/usr/src/uts/common/os/policy.c
+++ b/usr/src/uts/common/os/policy.c
@@ -2575,3 +2575,12 @@ secpolicy_ppp_config(const cred_t *cr)
return (secpolicy_net_config(cr, B_FALSE));
return (PRIV_POLICY(cr, PRIV_SYS_PPP_CONFIG, B_FALSE, EPERM, NULL));
}
+
+int
+secpolicy_hyprlofs_control(const cred_t *cr)
+{
+ if (PRIV_POLICY(cr, PRIV_HYPRLOFS_CONTROL, B_FALSE, EPERM, NULL))
+ return (EPERM);
+ return (0);
+}
+
diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs
index bfacce1739..f510050a01 100644
--- a/usr/src/uts/common/os/priv_defs
+++ b/usr/src/uts/common/os/priv_defs
@@ -177,6 +177,10 @@ privilege PRIV_GRAPHICS_MAP
Allows a process to perform privileged mappings through a
graphics device.
+privilege PRIV_HYPRLOFS_CONTROL
+
+ Allows a process to manage hyprlofs entries.
+
privilege PRIV_IPC_DAC_READ
Allows a process to read a System V IPC
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index 0b79c3765a..6a94a2eecd 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -60,6 +60,7 @@
#include <sys/cyclic.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
const k_sigset_t nullsmask = {0, 0, 0};
@@ -1413,6 +1414,9 @@ psig(void)
DTRACE_PROC3(signal__handle, int, sig, k_siginfo_t *,
sip, void (*)(void), func);
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_psig_to_proc)
+ BROP(p)->b_psig_to_proc(p, t, sig);
+
lwp->lwp_cursig = 0;
lwp->lwp_extsig = 0;
if (lwp->lwp_curinfo) {
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index c6ebe8b110..4c99f92411 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -24,6 +24,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -3177,6 +3178,7 @@ job_control_type(int cmd)
case JAGENT: /* Obsolete */
case JTRUN: /* Obsolete */
case JXTPROTO: /* Obsolete */
+ case TIOCSETLD:
return (JCSETP);
}
@@ -8311,7 +8313,7 @@ chkrd:
}
*reventsp = (short)retevents;
- if (retevents) {
+ if (retevents && !(events & POLLET)) {
if (headlocked)
mutex_exit(&stp->sd_lock);
return (0);
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index e3965f7d82..b9c2ea492e 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -23,6 +23,7 @@
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -1089,18 +1090,20 @@ char **syscallnames;
systrace_sysent_t *systrace_sysent;
void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
/*ARGSUSED*/
void
systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7)
{}
/*ARGSUSED*/
int64_t
dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6,
+ uintptr_t arg7)
{
systrace_sysent_t *sy = &systrace_sysent[curthread->t_sysnum];
dtrace_id_t id;
@@ -1108,7 +1111,8 @@ dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
proc_t *p;
if ((id = sy->stsy_entry) != DTRACE_IDNONE)
- (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+ (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7);
/*
* We want to explicitly allow DTrace consumers to stop a process
@@ -1122,14 +1126,15 @@ dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
}
mutex_exit(&p->p_lock);
- rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5);
+ rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7);
if (ttolwp(curthread)->lwp_errno != 0)
rval = -1;
if ((id = sy->stsy_return) != DTRACE_IDNONE)
(*systrace_probe)(id, (uintptr_t)rval, (uintptr_t)rval,
- (uintptr_t)((int64_t)rval >> 32), 0, 0, 0);
+ (uintptr_t)((int64_t)rval >> 32), 0, 0, 0, 0, 0);
return (rval);
}
@@ -1141,7 +1146,8 @@ systrace_sysent_t *systrace_sysent32;
/*ARGSUSED*/
int64_t
dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6,
+ uintptr_t arg7)
{
systrace_sysent_t *sy = &systrace_sysent32[curthread->t_sysnum];
dtrace_id_t id;
@@ -1149,7 +1155,8 @@ dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
proc_t *p;
if ((id = sy->stsy_entry) != DTRACE_IDNONE)
- (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+ (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
/*
* We want to explicitly allow DTrace consumers to stop a process
@@ -1163,14 +1170,15 @@ dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
}
mutex_exit(&p->p_lock);
- rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5);
+ rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
if (ttolwp(curthread)->lwp_errno != 0)
rval = -1;
if ((id = sy->stsy_return) != DTRACE_IDNONE)
(*systrace_probe)(id, (uintptr_t)rval, (uintptr_t)rval,
- (uintptr_t)((uint64_t)rval >> 32), 0, 0, 0);
+ (uintptr_t)((uint64_t)rval >> 32), 0, 0, 0, 0, 0);
return (rval);
}
@@ -1198,5 +1206,5 @@ dtrace_systrace_rtt(void)
}
if ((id = sy->stsy_return) != DTRACE_IDNONE)
- (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
+ (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
}
diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c
index a554f8c3f3..0a6fe0ef96 100644
--- a/usr/src/uts/common/os/vmem.c
+++ b/usr/src/uts/common/os/vmem.c
@@ -1618,7 +1618,7 @@ vmem_destroy(vmem_t *vmp)
leaked = vmem_size(vmp, VMEM_ALLOC);
if (leaked != 0)
- cmn_err(CE_WARN, "vmem_destroy('%s'): leaked %lu %s",
+ cmn_err(CE_WARN, "!vmem_destroy('%s'): leaked %lu %s",
vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ?
"identifiers" : "bytes");
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 5d146c03b5..c4f6934cb4 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent Inc. All rights reserved.
+ * Copyright 2014, Joyent Inc. All rights reserved.
*/
/*
@@ -370,21 +370,18 @@ static char *zone_ref_subsys_names[] = {
rctl_hndl_t rc_zone_cpu_shares;
rctl_hndl_t rc_zone_locked_mem;
rctl_hndl_t rc_zone_max_swap;
+rctl_hndl_t rc_zone_phys_mem;
rctl_hndl_t rc_zone_max_lofi;
rctl_hndl_t rc_zone_cpu_cap;
+rctl_hndl_t rc_zone_cpu_baseline;
+rctl_hndl_t rc_zone_cpu_burst_time;
+rctl_hndl_t rc_zone_zfs_io_pri;
rctl_hndl_t rc_zone_nlwps;
rctl_hndl_t rc_zone_nprocs;
rctl_hndl_t rc_zone_shmmax;
rctl_hndl_t rc_zone_shmmni;
rctl_hndl_t rc_zone_semmni;
rctl_hndl_t rc_zone_msgmni;
-/*
- * Synchronization primitives used to synchronize between mounts and zone
- * creation/destruction.
- */
-static int mounts_in_progress;
-static kcondvar_t mount_cv;
-static kmutex_t mount_lock;
const char * const zone_default_initname = "/sbin/init";
static char * const zone_prefix = "/zone/";
@@ -424,23 +421,27 @@ static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
* Version 5 alters the zone_boot system call, and converts its old
* bootargs parameter to be set by the zone_setattr API instead.
* Version 6 adds the flag argument to zone_create.
+ * Version 7 adds the requested zoneid to zone_create.
*/
-static const int ZONE_SYSCALL_API_VERSION = 6;
+static const int ZONE_SYSCALL_API_VERSION = 7;
/*
* Certain filesystems (such as NFS and autofs) need to know which zone
* the mount is being placed in. Because of this, we need to be able to
- * ensure that a zone isn't in the process of being created such that
- * nfs_mount() thinks it is in the global zone, while by the time it
- * gets added the list of mounted zones, it ends up on zoneA's mount
- * list.
+ * ensure that a zone isn't in the process of being created/destroyed such
+ * that nfs_mount() thinks it is in the global/NGZ zone, while by the time
+ * it gets added the list of mounted zones, it ends up on the wrong zone's
+ * mount list. Since a zone can't reside on an NFS file system, we don't
+ * have to worry about the zonepath itself.
*
* The following functions: block_mounts()/resume_mounts() and
* mount_in_progress()/mount_completed() are used by zones and the VFS
- * layer (respectively) to synchronize zone creation and new mounts.
+ * layer (respectively) to synchronize zone state transitions and new
+ * mounts within a zone. This syncronization is on a per-zone basis, so
+ * activity for one zone will not interfere with activity for another zone.
*
* The semantics are like a reader-reader lock such that there may
- * either be multiple mounts (or zone creations, if that weren't
+ * either be multiple mounts (or zone state transitions, if that weren't
* serialized by zonehash_lock) in progress at the same time, but not
* both.
*
@@ -448,10 +449,8 @@ static const int ZONE_SYSCALL_API_VERSION = 6;
* taking too long.
*
* The semantics are such that there is unfair bias towards the
- * "current" operation. This means that zone creations may starve if
- * there is a rapid succession of new mounts coming in to the system, or
- * there is a remote possibility that zones will be created at such a
- * rate that new mounts will not be able to proceed.
+ * "current" operation. This means that zone halt may starve if
+ * there is a rapid succession of new mounts coming in to the zone.
*/
/*
* Prevent new mounts from progressing to the point of calling
@@ -459,7 +458,7 @@ static const int ZONE_SYSCALL_API_VERSION = 6;
* them to complete.
*/
static int
-block_mounts(void)
+block_mounts(zone_t *zp)
{
int retval = 0;
@@ -468,19 +467,21 @@ block_mounts(void)
* called with zonehash_lock held.
*/
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
- mutex_enter(&mount_lock);
- while (mounts_in_progress > 0) {
- if (cv_wait_sig(&mount_cv, &mount_lock) == 0)
+ mutex_enter(&zp->zone_mount_lock);
+ while (zp->zone_mounts_in_progress > 0) {
+ if (cv_wait_sig(&zp->zone_mount_cv, &zp->zone_mount_lock) == 0)
goto signaled;
}
/*
* A negative value of mounts_in_progress indicates that mounts
- * have been blocked by (-mounts_in_progress) different callers.
+ * have been blocked by (-mounts_in_progress) different callers
+ * (remotely possible if two threads enter zone_shutdown at the same
+ * time).
*/
- mounts_in_progress--;
+ zp->zone_mounts_in_progress--;
retval = 1;
signaled:
- mutex_exit(&mount_lock);
+ mutex_exit(&zp->zone_mount_lock);
return (retval);
}
@@ -489,26 +490,26 @@ signaled:
* Allow them to progress if we were the last obstacle.
*/
static void
-resume_mounts(void)
+resume_mounts(zone_t *zp)
{
- mutex_enter(&mount_lock);
- if (++mounts_in_progress == 0)
- cv_broadcast(&mount_cv);
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ if (++zp->zone_mounts_in_progress == 0)
+ cv_broadcast(&zp->zone_mount_cv);
+ mutex_exit(&zp->zone_mount_lock);
}
/*
- * The VFS layer is busy with a mount; zones should wait until all
- * mounts are completed to progress.
+ * The VFS layer is busy with a mount; this zone should wait until all
+ * of its mounts are completed to progress.
*/
void
-mount_in_progress(void)
+mount_in_progress(zone_t *zp)
{
- mutex_enter(&mount_lock);
- while (mounts_in_progress < 0)
- cv_wait(&mount_cv, &mount_lock);
- mounts_in_progress++;
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ while (zp->zone_mounts_in_progress < 0)
+ cv_wait(&zp->zone_mount_cv, &zp->zone_mount_lock);
+ zp->zone_mounts_in_progress++;
+ mutex_exit(&zp->zone_mount_lock);
}
/*
@@ -516,12 +517,12 @@ mount_in_progress(void)
* callers if this is the last mount.
*/
void
-mount_completed(void)
+mount_completed(zone_t *zp)
{
- mutex_enter(&mount_lock);
- if (--mounts_in_progress == 0)
- cv_broadcast(&mount_cv);
- mutex_exit(&mount_lock);
+ mutex_enter(&zp->zone_mount_lock);
+ if (--zp->zone_mounts_in_progress == 0)
+ cv_broadcast(&zp->zone_mount_cv);
+ mutex_exit(&zp->zone_mount_lock);
}
/*
@@ -1381,6 +1382,114 @@ static rctl_ops_t zone_cpu_cap_ops = {
/*ARGSUSED*/
static rctl_qty_t
+zone_cpu_base_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (cpucaps_zone_get_base(p->p_zone));
+}
+
+/*
+ * The zone cpu base is used to set the baseline CPU for the zone
+ * so we can track when the zone is bursting.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ return (cpucaps_zone_set_base(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_base_ops = {
+ rcop_no_action,
+ zone_cpu_base_get,
+ zone_cpu_base_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
+zone_cpu_burst_time_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (cpucaps_zone_get_burst_time(p->p_zone));
+}
+
+/*
+ * The zone cpu burst time is used to set the amount of time CPU(s) can be
+ * bursting for the zone.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_burst_time_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ return (cpucaps_zone_set_burst_time(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_burst_time_ops = {
+ rcop_no_action,
+ zone_cpu_burst_time_get,
+ zone_cpu_burst_time_set,
+ rcop_no_test
+};
+
+/*
+ * zone.zfs-io-pri resource control support (IO priority).
+ */
+/*ARGSUSED*/
+static rctl_qty_t
+zone_zfs_io_pri_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_zfs_io_pri);
+}
+
+/*ARGSUSED*/
+static int
+zone_zfs_io_pri_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ /*
+ * set priority to the new value.
+ */
+ zone->zone_zfs_io_pri = nv;
+ return (0);
+}
+
+static rctl_ops_t zone_zfs_io_pri_ops = {
+ rcop_no_action,
+ zone_zfs_io_pri_get,
+ zone_zfs_io_pri_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
zone_lwps_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nlwps;
@@ -1675,6 +1784,39 @@ static rctl_ops_t zone_max_swap_ops = {
/*ARGSUSED*/
static rctl_qty_t
+zone_phys_mem_usage(rctl_t *rctl, struct proc *p)
+{
+ rctl_qty_t q;
+ zone_t *z = p->p_zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ /* No additional lock because not enforced in the kernel */
+ q = z->zone_phys_mem;
+ return (q);
+}
+
+/*ARGSUSED*/
+static int
+zone_phys_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+ if (e->rcep_p.zone == NULL)
+ return (0);
+ e->rcep_p.zone->zone_phys_mem_ctl = nv;
+ return (0);
+}
+
+static rctl_ops_t zone_phys_mem_ops = {
+ rcop_no_action,
+ zone_phys_mem_usage,
+ zone_phys_mem_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
@@ -1768,6 +1910,20 @@ zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
}
static int
+zone_physmem_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_kstat_t *zk = ksp->ks_data;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ zk->zk_usage.value.ui64 = zone->zone_phys_mem;
+ zk->zk_value.value.ui64 = zone->zone_phys_mem_ctl;
+ return (0);
+}
+
+static int
zone_nprocs_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
@@ -1796,7 +1952,7 @@ zone_swapresv_kstat_update(kstat_t *ksp, int rw)
}
static kstat_t *
-zone_kstat_create_common(zone_t *zone, char *name,
+zone_rctl_kstat_create_common(zone_t *zone, char *name,
int (*updatefunc) (kstat_t *, int))
{
kstat_t *ksp;
@@ -1822,6 +1978,230 @@ zone_kstat_create_common(zone_t *zone, char *name,
}
static int
+zone_vfs_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_vfs_kstat_t *zvp = ksp->ks_data;
+ kstat_io_t *kiop = &zone->zone_vfs_rwstats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ /*
+ * Extract the VFS statistics from the kstat_io_t structure used by
+ * kstat_runq_enter() and related functions. Since the slow ops
+ * counters are updated directly by the VFS layer, there's no need to
+ * copy those statistics here.
+ *
+ * Note that kstat_runq_enter() and the related functions use
+ * gethrtime_unscaled(), so scale the time here.
+ */
+ zvp->zv_nread.value.ui64 = kiop->nread;
+ zvp->zv_reads.value.ui64 = kiop->reads;
+ zvp->zv_rtime.value.ui64 = kiop->rtime;
+ zvp->zv_rlentime.value.ui64 = kiop->rlentime;
+ zvp->zv_nwritten.value.ui64 = kiop->nwritten;
+ zvp->zv_writes.value.ui64 = kiop->writes;
+ zvp->zv_wtime.value.ui64 = kiop->wtime;
+ zvp->zv_wlentime.value.ui64 = kiop->wlentime;
+
+ scalehrtime((hrtime_t *)&zvp->zv_rtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_rlentime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wlentime.value.ui64);
+
+ return (0);
+}
+
+static kstat_t *
+zone_vfs_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_vfs_kstat_t *zvp;
+
+ if ((ksp = kstat_create_zone("zone_vfs", zone->zone_id,
+ zone->zone_name, "zone_vfs", KSTAT_TYPE_NAMED,
+ sizeof (zone_vfs_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zvp = ksp->ks_data = kmem_zalloc(sizeof (zone_vfs_kstat_t), KM_SLEEP);
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ ksp->ks_lock = &zone->zone_vfs_lock;
+ zone->zone_vfs_stats = zvp;
+
+ /* The kstat "name" field is not large enough for a full zonename */
+ kstat_named_init(&zvp->zv_zonename, "zonename", KSTAT_DATA_STRING);
+ kstat_named_setstr(&zvp->zv_zonename, zone->zone_name);
+ kstat_named_init(&zvp->zv_nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_reads, "reads", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rtime, "rtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rlentime, "rlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_nwritten, "nwritten", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_writes, "writes", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wtime, "wtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wlentime, "wlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_10ms_ops, "10ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_100ms_ops, "100ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_1s_ops, "1s_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_10s_ops, "10s_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_delay_cnt, "delay_cnt", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_delay_time, "delay_time", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_vfs_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static int
+zone_zfs_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_zfs_kstat_t *zzp = ksp->ks_data;
+ kstat_io_t *kiop = &zone->zone_zfs_rwstats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ /*
+ * Extract the ZFS statistics from the kstat_io_t structure used by
+ * kstat_runq_enter() and related functions. Since the I/O throttle
+ * counters are updated directly by the ZFS layer, there's no need to
+ * copy those statistics here.
+ *
+ * Note that kstat_runq_enter() and the related functions use
+ * gethrtime_unscaled(), so scale the time here.
+ */
+ zzp->zz_nread.value.ui64 = kiop->nread;
+ zzp->zz_reads.value.ui64 = kiop->reads;
+ zzp->zz_rtime.value.ui64 = kiop->rtime;
+ zzp->zz_rlentime.value.ui64 = kiop->rlentime;
+ zzp->zz_nwritten.value.ui64 = kiop->nwritten;
+ zzp->zz_writes.value.ui64 = kiop->writes;
+
+ scalehrtime((hrtime_t *)&zzp->zz_rtime.value.ui64);
+ scalehrtime((hrtime_t *)&zzp->zz_rlentime.value.ui64);
+
+ return (0);
+}
+
+static kstat_t *
+zone_zfs_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_zfs_kstat_t *zzp;
+
+ if ((ksp = kstat_create_zone("zone_zfs", zone->zone_id,
+ zone->zone_name, "zone_zfs", KSTAT_TYPE_NAMED,
+ sizeof (zone_zfs_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zzp = ksp->ks_data = kmem_zalloc(sizeof (zone_zfs_kstat_t), KM_SLEEP);
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ ksp->ks_lock = &zone->zone_zfs_lock;
+ zone->zone_zfs_stats = zzp;
+
+ /* The kstat "name" field is not large enough for a full zonename */
+ kstat_named_init(&zzp->zz_zonename, "zonename", KSTAT_DATA_STRING);
+ kstat_named_setstr(&zzp->zz_zonename, zone->zone_name);
+ kstat_named_init(&zzp->zz_nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_reads, "reads", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_rtime, "rtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_rlentime, "rlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_nwritten, "nwritten", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_writes, "writes", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_waittime, "waittime", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_zfs_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static int
+zone_mcap_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_mcap_kstat_t *zmp = ksp->ks_data;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ zmp->zm_rss.value.ui64 = zone->zone_phys_mem;
+ zmp->zm_phys_cap.value.ui64 = zone->zone_phys_mem_ctl;
+ zmp->zm_swap.value.ui64 = zone->zone_max_swap;
+ zmp->zm_swap_cap.value.ui64 = zone->zone_max_swap_ctl;
+ zmp->zm_nover.value.ui64 = zone->zone_mcap_nover;
+ zmp->zm_pagedout.value.ui64 = zone->zone_mcap_pagedout;
+ zmp->zm_pgpgin.value.ui64 = zone->zone_pgpgin;
+ zmp->zm_anonpgin.value.ui64 = zone->zone_anonpgin;
+ zmp->zm_execpgin.value.ui64 = zone->zone_execpgin;
+ zmp->zm_fspgin.value.ui64 = zone->zone_fspgin;
+ zmp->zm_anon_alloc_fail.value.ui64 = zone->zone_anon_alloc_fail;
+ zmp->zm_pf_throttle.value.ui64 = zone->zone_pf_throttle;
+ zmp->zm_pf_throttle_usec.value.ui64 = zone->zone_pf_throttle_usec;
+
+ return (0);
+}
+
+static kstat_t *
+zone_mcap_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_mcap_kstat_t *zmp;
+
+ if ((ksp = kstat_create_zone("memory_cap", zone->zone_id,
+ zone->zone_name, "zone_memory_cap", KSTAT_TYPE_NAMED,
+ sizeof (zone_mcap_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_mcap_kstat_t), KM_SLEEP);
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ ksp->ks_lock = &zone->zone_mcap_lock;
+ zone->zone_mcap_stats = zmp;
+
+ /* The kstat "name" field is not large enough for a full zonename */
+ kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
+ kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
+ kstat_named_init(&zmp->zm_rss, "rss", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_phys_cap, "physcap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_swap, "swap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_swap_cap, "swapcap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_nover, "nover", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pagedout, "pagedout", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_anon_alloc_fail, "anon_alloc_fail",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pf_throttle, "n_pf_throttle",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pf_throttle_usec, "n_pf_throttle_usec",
+ KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_mcap_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static int
zone_misc_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
@@ -1900,13 +2280,30 @@ zone_misc_kstat_create(zone_t *zone)
static void
zone_kstat_create(zone_t *zone)
{
- zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
+ zone->zone_lockedmem_kstat = zone_rctl_kstat_create_common(zone,
"lockedmem", zone_lockedmem_kstat_update);
- zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
+ zone->zone_swapresv_kstat = zone_rctl_kstat_create_common(zone,
"swapresv", zone_swapresv_kstat_update);
- zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
+ zone->zone_physmem_kstat = zone_rctl_kstat_create_common(zone,
+ "physicalmem", zone_physmem_kstat_update);
+ zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
"nprocs", zone_nprocs_kstat_update);
+ if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
+ zone->zone_vfs_stats = kmem_zalloc(
+ sizeof (zone_vfs_kstat_t), KM_SLEEP);
+ }
+
+ if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
+ zone->zone_zfs_stats = kmem_zalloc(
+ sizeof (zone_zfs_kstat_t), KM_SLEEP);
+ }
+
+ if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
+ zone->zone_mcap_stats = kmem_zalloc(
+ sizeof (zone_mcap_kstat_t), KM_SLEEP);
+ }
+
if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
zone->zone_misc_stats = kmem_zalloc(
sizeof (zone_misc_kstat_t), KM_SLEEP);
@@ -1933,8 +2330,17 @@ zone_kstat_delete(zone_t *zone)
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_swapresv_kstat,
sizeof (zone_kstat_t));
+ zone_kstat_delete_common(&zone->zone_physmem_kstat,
+ sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_nprocs_kstat,
sizeof (zone_kstat_t));
+
+ zone_kstat_delete_common(&zone->zone_vfs_ksp,
+ sizeof (zone_vfs_kstat_t));
+ zone_kstat_delete_common(&zone->zone_zfs_ksp,
+ sizeof (zone_zfs_kstat_t));
+ zone_kstat_delete_common(&zone->zone_mcap_ksp,
+ sizeof (zone_mcap_kstat_t));
zone_kstat_delete_common(&zone->zone_misc_ksp,
sizeof (zone_misc_kstat_t));
}
@@ -1970,6 +2376,8 @@ zone_zsd_init(void)
zone0.zone_locked_mem_ctl = UINT64_MAX;
ASSERT(zone0.zone_max_swap == 0);
zone0.zone_max_swap_ctl = UINT64_MAX;
+ zone0.zone_phys_mem = 0;
+ zone0.zone_phys_mem_ctl = UINT64_MAX;
zone0.zone_max_lofi = 0;
zone0.zone_max_lofi_ctl = UINT64_MAX;
zone0.zone_shmmax = 0;
@@ -1993,8 +2401,9 @@ zone_zsd_init(void)
zone0.zone_initname = initname;
zone0.zone_lockedmem_kstat = NULL;
zone0.zone_swapresv_kstat = NULL;
+ zone0.zone_physmem_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
-
+ zone0.zone_zfs_io_pri = 1;
zone0.zone_stime = 0;
zone0.zone_utime = 0;
zone0.zone_wtime = 0;
@@ -2105,6 +2514,21 @@ zone_init(void)
RCTL_GLOBAL_INFINITE,
MAXCAP, MAXCAP, &zone_cpu_cap_ops);
+ rc_zone_cpu_baseline = rctl_register("zone.cpu-baseline",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ MAXCAP, MAXCAP, &zone_cpu_base_ops);
+
+ rc_zone_cpu_burst_time = rctl_register("zone.cpu-burst-time",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ INT_MAX, INT_MAX, &zone_cpu_burst_time_ops);
+
+ rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ 1024, 1024, &zone_zfs_io_pri_ops);
+
rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_lwps_ops);
@@ -2146,6 +2570,20 @@ zone_init(void)
rde = rctl_dict_lookup("zone.cpu-shares");
(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+ /*
+ * Create a rctl_val with PRIVILEGED, NOACTION, value = 1. Then attach
+ * this at the head of the rctl_dict_entry for ``zone.zfs-io-priority'.
+ */
+ dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
+ bzero(dval, sizeof (rctl_val_t));
+ dval->rcv_value = 1;
+ dval->rcv_privilege = RCPRIV_PRIVILEGED;
+ dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
+ dval->rcv_action_recip_pid = -1;
+
+ rde = rctl_dict_lookup("zone.zfs-io-priority");
+ (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+
rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2156,6 +2594,11 @@ zone_init(void)
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_swap_ops);
+ rc_zone_phys_mem = rctl_register("zone.max-physical-memory",
+ RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
+ RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
+ &zone_phys_mem_ops);
+
rc_zone_max_lofi = rctl_register("zone.max-lofi",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2177,6 +2620,7 @@ zone_init(void)
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
+ zone0.zone_init_status = -1;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
/*
@@ -2256,6 +2700,8 @@ zone_init(void)
static void
zone_free(zone_t *zone)
{
+ zone_dl_t *zdl;
+
ASSERT(zone != global_zone);
ASSERT(zone->zone_ntasks == 0);
ASSERT(zone->zone_nlwps == 0);
@@ -2284,6 +2730,19 @@ zone_free(zone_t *zone)
list_destroy(&zone->zone_ref_list);
zone_free_zsd(zone);
zone_free_datasets(zone);
+
+ /*
+ * While dlmgmtd should have removed all of these, it could have left
+ * something behind or crashed. In which case it's not safe for us to
+ * assume that the list is empty which list_destroy() will ASSERT. We
+ * clean up for our userland comrades which may have crashed, or worse,
+ * been disabled by SMF.
+ */
+ while ((zdl = list_remove_head(&zone->zone_dl_list)) != NULL) {
+ if (zdl->zdl_net != NULL)
+ nvlist_free(zdl->zdl_net);
+ kmem_free(zdl, sizeof (zone_dl_t));
+ }
list_destroy(&zone->zone_dl_list);
if (zone->zone_rootvp != NULL)
@@ -2328,12 +2787,18 @@ zone_free(zone_t *zone)
static void
zone_status_set(zone_t *zone, zone_status_t status)
{
+ timestruc_t now;
+ uint64_t t;
nvlist_t *nvl = NULL;
ASSERT(MUTEX_HELD(&zone_status_lock));
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
status >= zone_status_get(zone));
+ /* Current time since Jan 1 1970 but consumers expect NS */
+ gethrestime(&now);
+ t = (now.tv_sec * NANOSEC) + now.tv_nsec;
+
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
@@ -2341,7 +2806,7 @@ zone_status_set(zone_t *zone, zone_status_t status)
nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
zone_status_table[zone->zone_status]) ||
nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
- nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
+ nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, t) ||
sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
#ifdef DEBUG
@@ -2467,14 +2932,65 @@ zone_set_initname(zone_t *zone, const char *zone_initname)
return (0);
}
+/*
+ * The zone_set_mcap_nover and zone_set_mcap_pageout functions are used
+ * to provide the physical memory capping kstats. Since physical memory
+ * capping is currently implemented in userland, that code uses the setattr
+ * entry point to increment the kstats. We always simply increment nover
+ * every time that setattr is called and we always add in the input value
+ * to zone_mcap_pagedout every time that is called.
+ */
+/*ARGSUSED*/
static int
-zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
+zone_set_mcap_nover(zone_t *zone, const uint64_t *zone_nover)
{
- uint64_t mcap;
- int err = 0;
+ zone->zone_mcap_nover++;
+
+ return (0);
+}
+
+static int
+zone_set_mcap_pageout(zone_t *zone, const uint64_t *zone_pageout)
+{
+ uint64_t pageout;
+ int err;
- if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
- zone->zone_phys_mcap = mcap;
+ if ((err = copyin(zone_pageout, &pageout, sizeof (uint64_t))) == 0)
+ zone->zone_mcap_pagedout += pageout;
+
+ return (err);
+}
+
+/*
+ * The zone_set_page_fault_delay function is used to set the number of usecs
+ * to throttle page faults. This is normally 0 but can be set to a non-0 value
+ * by the user-land memory capping code when the zone is over its physcial
+ * memory cap.
+ */
+static int
+zone_set_page_fault_delay(zone_t *zone, const uint32_t *pfdelay)
+{
+ uint32_t dusec;
+ int err;
+
+ if ((err = copyin(pfdelay, &dusec, sizeof (uint32_t))) == 0)
+ zone->zone_pg_flt_delay = dusec;
+
+ return (err);
+}
+
+/*
+ * The zone_set_rss function is used to set the zone's RSS when we do the
+ * fast, approximate calculation in user-land.
+ */
+static int
+zone_set_rss(zone_t *zone, const uint64_t *prss)
+{
+ uint64_t rss;
+ int err;
+
+ if ((err = copyin(prss, &rss, sizeof (uint64_t))) == 0)
+ zone->zone_phys_mem = rss;
return (err);
}
@@ -2886,6 +3402,12 @@ getzoneid(void)
return (curproc->p_zone->zone_id);
}
+zoneid_t
+getzonedid(void)
+{
+ return (curproc->p_zone->zone_did);
+}
+
/*
* Internal versions of zone_find_by_*(). These don't zone_hold() or
* check the validity of a zone's state.
@@ -4262,7 +4784,7 @@ zone_create(const char *zone_name, const char *zone_root,
caddr_t rctlbuf, size_t rctlbufsz,
caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
int match, uint32_t doi, const bslabel_t *label,
- int flags)
+ int flags, zoneid_t zone_did)
{
struct zsched_arg zarg;
nvlist_t *rctls = NULL;
@@ -4285,6 +4807,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
zoneid = zone->zone_id = id_alloc(zoneid_space);
+ zone->zone_did = zone_did;
zone->zone_status = ZONE_IS_UNINITIALIZED;
zone->zone_pool = pool_default;
zone->zone_pool_mod = gethrtime();
@@ -4292,6 +4815,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
zone->zone_restart_init = B_TRUE;
+ zone->zone_init_status = -1;
zone->zone_brand = &native_brand;
zone->zone_initname = NULL;
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -4353,10 +4877,14 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_locked_mem_ctl = UINT64_MAX;
zone->zone_max_swap = 0;
zone->zone_max_swap_ctl = UINT64_MAX;
+ zone->zone_phys_mem = 0;
+ zone->zone_phys_mem_ctl = UINT64_MAX;
zone->zone_max_lofi = 0;
zone->zone_max_lofi_ctl = UINT64_MAX;
- zone0.zone_lockedmem_kstat = NULL;
- zone0.zone_swapresv_kstat = NULL;
+ zone->zone_lockedmem_kstat = NULL;
+ zone->zone_swapresv_kstat = NULL;
+ zone->zone_physmem_kstat = NULL;
+ zone->zone_zfs_io_pri = 1;
/*
* Zsched initializes the rctls.
@@ -4410,7 +4938,7 @@ zone_create(const char *zone_name, const char *zone_root,
return (zone_create_error(error, 0, extended_error));
}
- if (block_mounts() == 0) {
+ if (block_mounts(zone) == 0) {
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
@@ -4561,7 +5089,7 @@ zone_create(const char *zone_name, const char *zone_root,
/*
* The zone is fully visible, so we can let mounts progress.
*/
- resume_mounts();
+ resume_mounts(zone);
if (rctls)
nvlist_free(rctls);
@@ -4577,7 +5105,7 @@ errout:
continuelwps(pp);
mutex_exit(&pp->p_lock);
- resume_mounts();
+ resume_mounts(zone);
if (rctls)
nvlist_free(rctls);
/*
@@ -4655,6 +5183,7 @@ zone_boot(zoneid_t zoneid)
static int
zone_empty(zone_t *zone)
{
+ int cnt = 0;
int waitstatus;
/*
@@ -4665,7 +5194,16 @@ zone_empty(zone_t *zone)
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
while ((waitstatus = zone_status_timedwait_sig(zone,
ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
- killall(zone->zone_id);
+ boolean_t force = B_FALSE;
+
+ /* Every 30 seconds, try harder */
+ if (cnt++ >= 30) {
+ cmn_err(CE_WARN, "attempt to force kill zone %d\n",
+ zone->zone_id);
+ force = B_TRUE;
+ cnt = 0;
+ }
+ killall(zone->zone_id, force);
}
/*
* return EINTR if we were signaled
@@ -4732,15 +5270,6 @@ zone_shutdown(zoneid_t zoneid)
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
- /*
- * Block mounts so that VFS_MOUNT() can get an accurate view of
- * the zone's status with regards to ZONE_IS_SHUTTING down.
- *
- * e.g. NFS can fail the mount if it determines that the zone
- * has already begun the shutdown sequence.
- */
- if (block_mounts() == 0)
- return (set_errno(EINTR));
mutex_enter(&zonehash_lock);
/*
* Look for zone under hash lock to prevent races with other
@@ -4748,9 +5277,30 @@ zone_shutdown(zoneid_t zoneid)
*/
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
- resume_mounts();
return (set_errno(EINVAL));
}
+
+ /*
+ * We have to drop zonehash_lock before calling block_mounts.
+ * Hold the zone so we can continue to use the zone_t.
+ */
+ zone_hold(zone);
+ mutex_exit(&zonehash_lock);
+
+ /*
+ * Block mounts so that VFS_MOUNT() can get an accurate view of
+ * the zone's status with regards to ZONE_IS_SHUTTING down.
+ *
+ * e.g. NFS can fail the mount if it determines that the zone
+ * has already begun the shutdown sequence.
+ *
+ */
+ if (block_mounts(zone) == 0) {
+ zone_rele(zone);
+ return (set_errno(EINTR));
+ }
+
+ mutex_enter(&zonehash_lock);
mutex_enter(&zone_status_lock);
status = zone_status_get(zone);
/*
@@ -4759,7 +5309,8 @@ zone_shutdown(zoneid_t zoneid)
if (status < ZONE_IS_READY) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
+ zone_rele(zone);
return (set_errno(EINVAL));
}
/*
@@ -4769,7 +5320,8 @@ zone_shutdown(zoneid_t zoneid)
if (status >= ZONE_IS_DOWN) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
+ zone_rele(zone);
return (0);
}
/*
@@ -4804,10 +5356,9 @@ zone_shutdown(zoneid_t zoneid)
}
}
}
- zone_hold(zone); /* so we can use the zone_t later */
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
- resume_mounts();
+ resume_mounts(zone);
if (error = zone_empty(zone)) {
zone_rele(zone);
@@ -5403,14 +5954,6 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
error = EFAULT;
}
break;
- case ZONE_ATTR_PHYS_MCAP:
- size = sizeof (zone->zone_phys_mcap);
- if (bufsize > size)
- bufsize = size;
- if (buf != NULL &&
- copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
- error = EFAULT;
- break;
case ZONE_ATTR_SCHED_CLASS:
mutex_enter(&class_lock);
@@ -5465,6 +6008,14 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
}
kmem_free(zbuf, bufsize);
break;
+ case ZONE_ATTR_DID:
+ size = sizeof (zoneid_t);
+ if (bufsize > size)
+ bufsize = size;
+
+ if (buf != NULL && copyout(&zone->zone_did, buf, bufsize) != 0)
+ error = EFAULT;
+ break;
default:
if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
size = bufsize;
@@ -5496,10 +6047,11 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
return (set_errno(EPERM));
/*
- * Only the ZONE_ATTR_PHYS_MCAP attribute can be set on the
- * global zone.
+ * Only the ZONE_ATTR_PMCAP_NOVER and ZONE_ATTR_PMCAP_PAGEOUT
+ * attributes can be set on the global zone.
*/
- if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
+ if (zoneid == GLOBAL_ZONEID &&
+ attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT) {
return (set_errno(EINVAL));
}
@@ -5516,7 +6068,9 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
* non-global zones.
*/
zone_status = zone_status_get(zone);
- if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY) {
+ if (attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT &&
+ attr != ZONE_ATTR_PG_FLT_DELAY && attr != ZONE_ATTR_RSS &&
+ zone_status > ZONE_IS_READY) {
err = EINVAL;
goto done;
}
@@ -5538,8 +6092,17 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
case ZONE_ATTR_FS_ALLOWED:
err = zone_set_fs_allowed(zone, (const char *)buf);
break;
- case ZONE_ATTR_PHYS_MCAP:
- err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
+ case ZONE_ATTR_PMCAP_NOVER:
+ err = zone_set_mcap_nover(zone, (const uint64_t *)buf);
+ break;
+ case ZONE_ATTR_PMCAP_PAGEOUT:
+ err = zone_set_mcap_pageout(zone, (const uint64_t *)buf);
+ break;
+ case ZONE_ATTR_PG_FLT_DELAY:
+ err = zone_set_page_fault_delay(zone, (const uint32_t *)buf);
+ break;
+ case ZONE_ATTR_RSS:
+ err = zone_set_rss(zone, (const uint64_t *)buf);
break;
case ZONE_ATTR_SCHED_CLASS:
err = zone_set_sched_class(zone, (const char *)buf);
@@ -6260,6 +6823,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
zs.doi = zs32.doi;
zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
zs.flags = zs32.flags;
+ zs.zoneid = zs32.zoneid;
#else
panic("get_udatamodel() returned bogus result\n");
#endif
@@ -6270,7 +6834,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
(caddr_t)zs.rctlbuf, zs.rctlbufsz,
(caddr_t)zs.zfsbuf, zs.zfsbufsz,
zs.extended_error, zs.match, zs.doi,
- zs.label, zs.flags));
+ zs.label, zs.flags, zs.zoneid));
case ZONE_BOOT:
return (zone_boot((zoneid_t)(uintptr_t)arg1));
case ZONE_DESTROY:
@@ -6371,6 +6935,7 @@ zone_ki_call_zoneadmd(struct zarg *zargp)
bcopy(zone->zone_name, zone_name, zone_namelen);
zoneid = zone->zone_id;
uniqid = zone->zone_uniqid;
+ arg.status = zone->zone_init_status;
/*
* zoneadmd may be down, but at least we can empty out the zone.
* We can ignore the return value of zone_empty() since we're called
@@ -6548,7 +7113,7 @@ zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp)
* zone_ki_call_zoneadmd() will do a more thorough job of this
* later.
*/
- killall(zone->zone_id);
+ killall(zone->zone_id, B_FALSE);
/*
* Now, create the thread to contact zoneadmd and do the rest of the
* work. This thread can't be created in our zone otherwise
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 656494c228..ce449efd20 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -22,6 +22,7 @@
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2013, Joyent, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
include $(SRC)/uts/Makefile.uts
@@ -212,6 +213,7 @@ CHKHDRS= \
emul64cmd.h \
emul64var.h \
epm.h \
+ epoll.h \
errno.h \
errorq.h \
errorq_impl.h \
@@ -243,6 +245,7 @@ CHKHDRS= \
flock.h \
flock_impl.h \
fork.h \
+ frameio.h \
fss.h \
fsspriocntl.h \
fsid.h \
@@ -268,6 +271,7 @@ CHKHDRS= \
idmap.h \
ieeefp.h \
id_space.h \
+ inotify.h \
instance.h \
int_const.h \
int_fmtio.h \
@@ -635,6 +639,8 @@ CHKHDRS= \
vmem.h \
vmem_impl.h \
vmsystm.h \
+ vnd.h \
+ vnd_errno.h \
vnic.h \
vnic_impl.h \
vnode.h \
@@ -852,13 +858,14 @@ FSHDRS= \
cachefs_log.h \
decomp.h \
dv_node.h \
- sdev_impl.h \
fifonode.h \
hsfs_isospec.h \
hsfs_node.h \
hsfs_rrip.h \
hsfs_spec.h \
hsfs_susp.h \
+ hyprlofs.h \
+ hyprlofs_info.h \
lofs_info.h \
lofs_node.h \
mntdata.h \
@@ -868,6 +875,8 @@ FSHDRS= \
pc_label.h \
pc_node.h \
pxfs_ki.h \
+ sdev_impl.h \
+ sdev_plugin.h \
snode.h \
swapnode.h \
tmp.h \
diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h
index 5b3bba08c2..a02240ecea 100644
--- a/usr/src/uts/common/sys/aggr_impl.h
+++ b/usr/src/uts/common/sys/aggr_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
#ifndef _SYS_AGGR_IMPL_H
@@ -307,6 +308,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *);
extern void aggr_port_init_callbacks(aggr_port_t *);
extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *,
+ boolean_t);
extern void aggr_tx_ring_update(void *, uintptr_t);
extern void aggr_tx_notify_thread(void *);
diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h
index 3a2e705850..e875cd10d9 100644
--- a/usr/src/uts/common/sys/auxv.h
+++ b/usr/src/uts/common/sys/auxv.h
@@ -29,7 +29,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_AUXV_H
@@ -78,6 +78,9 @@ typedef struct {
#define AT_FLAGS 8 /* processor flags */
#define AT_ENTRY 9 /* a.out entry point */
+/* First introduced on Linux */
+#define AT_RANDOM 25 /* address of 16 random bytes */
+
/*
* These relate to the original PPC ABI document; Linux reused
* the values for other things (see below), so disambiguation of
@@ -110,6 +113,16 @@ typedef struct {
* AT_UCACHEBSIZE 21 (moved from 12)
*
* AT_IGNOREPPC 22
+ *
+ * On Linux:
+ * AT_* values 18 through 22 are reserved
+ * AT_SECURE 23 secure mode boolean
+ * AT_BASE_PLATFORM 24 string identifying real platform, may
+ * differ from AT_PLATFORM.
+ * AT_HWCAP2 26 extension of AT_HWCAP
+ * AT_EXECFN 31 filename of program
+ * AT_SYSINFO 32
+ * AT_SYSINFO_EHDR 33 The vDSO location
*/
/*
@@ -186,6 +199,8 @@ extern uint_t getisax(uint32_t *, uint_t);
#define AT_SUN_BRAND_AUX1 2020
#define AT_SUN_BRAND_AUX2 2021
#define AT_SUN_BRAND_AUX3 2022
+#define AT_SUN_BRAND_AUX4 2025
+#define AT_SUN_BRAND_NROOT 2024
/*
* Note that 2023 is reserved for the AT_SUN_HWCAP2 word defined above.
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index badc3faff8..3486ae864d 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_BRAND_H
@@ -106,7 +107,7 @@ struct brand_ops {
void (*b_init_brand_data)(zone_t *);
void (*b_free_brand_data)(zone_t *);
int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void (*b_setbrand)(struct proc *);
int (*b_getattr)(zone_t *, int, void *, size_t *);
int (*b_setattr)(zone_t *, int, void *, size_t);
@@ -124,7 +125,13 @@ struct brand_ops {
struct cred *cred, int brand_action);
void (*b_sigset_native_to_brand)(sigset_t *);
void (*b_sigset_brand_to_native)(sigset_t *);
+ void (*b_psig_to_proc)(proc_t *, kthread_t *, int);
int b_nsig;
+ void (*b_exit_with_sig)(proc_t *, sigqueue_t *, void *);
+ boolean_t (*b_wait_filter)(proc_t *, proc_t *);
+ boolean_t (*b_native_exec)(uint8_t, const char **);
+ void (*b_ptrace_exectrap)(proc_t *);
+ uint32_t (*b_map32limit)(proc_t *);
};
/*
@@ -135,6 +142,7 @@ typedef struct brand {
char *b_name;
struct brand_ops *b_ops;
struct brand_mach_ops *b_machops;
+ size_t b_data_size;
} brand_t;
extern brand_t native_brand;
@@ -179,7 +187,7 @@ extern void brand_solaris_copy_procdata(proc_t *, proc_t *,
struct brand *);
extern int brand_solaris_elfexec(vnode_t *, execa_t *, uarg_t *,
intpdata_t *, int, long *, int, caddr_t, cred_t *, int,
- struct brand *, char *, char *, char *, char *, char *);
+ struct brand *, char *, char *, char *);
extern void brand_solaris_exec(struct brand *);
extern int brand_solaris_fini(char **, struct modlinkage *,
struct brand *);
diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h
index a9191aed7c..cb8a6012fc 100644
--- a/usr/src/uts/common/sys/buf.h
+++ b/usr/src/uts/common/sys/buf.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -186,6 +187,7 @@ struct biostats {
#define B_STARTED 0x2000000 /* io:::start probe called for buf */
#define B_ABRWRITE 0x4000000 /* Application based recovery active */
#define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
+#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */
/*
* There is some confusion over the meaning of B_FREE and B_INVAL and what
@@ -198,6 +200,12 @@ struct biostats {
* between the sole use of these two flags. In both cases, IO will be done
* if the page is not yet committed to storage.
*
+ * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is
+ * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no
+ * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then
+ * the mapping for the page is only invalidated for the current process.
+ * In this case, the page is not destroyed unless this was the final mapping.
+ *
* In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
* should be used.
*
diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h
index 6063ff4380..6bc042108c 100644
--- a/usr/src/uts/common/sys/cpucaps.h
+++ b/usr/src/uts/common/sys/cpucaps.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_H
@@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *);
*/
extern int cpucaps_project_set(kproject_t *, rctl_qty_t);
extern int cpucaps_zone_set(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t);
/*
* Get current CPU usage for a project/zone.
*/
extern rctl_qty_t cpucaps_project_get(kproject_t *);
extern rctl_qty_t cpucaps_zone_get(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_base(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *);
/*
* Scheduling class hooks into CPU caps framework.
diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h
index 95afd21827..2cd4ed644d 100644
--- a/usr/src/uts/common/sys/cpucaps_impl.h
+++ b/usr/src/uts/common/sys/cpucaps_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_IMPL_H
@@ -66,8 +67,12 @@ typedef struct cpucap {
waitq_t cap_waitq; /* waitq for capped threads */
kstat_t *cap_kstat; /* cpucaps specific kstat */
int64_t cap_gen; /* zone cap specific */
+ hrtime_t cap_chk_value; /* effective CPU usage cap */
hrtime_t cap_value; /* scaled CPU usage cap */
hrtime_t cap_usage; /* current CPU usage */
+ hrtime_t cap_base; /* base CPU for burst */
+ u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */
+ u_longlong_t cap_bursting; /* # of ticks currently bursting */
disp_lock_t cap_usagelock; /* protects cap_usage above */
/*
* Per cap statistics.
@@ -75,6 +80,7 @@ typedef struct cpucap {
hrtime_t cap_maxusage; /* maximum cap usage */
u_longlong_t cap_below; /* # of ticks spend below the cap */
u_longlong_t cap_above; /* # of ticks spend above the cap */
+ u_longlong_t cap_above_base; /* # of ticks spent above the base */
} cpucap_t;
/*
diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h
index 5056f9a511..914f132dc0 100644
--- a/usr/src/uts/common/sys/cred.h
+++ b/usr/src/uts/common/sys/cred.h
@@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *);
extern gid_t crgetrgid(const cred_t *);
extern gid_t crgetsgid(const cred_t *);
extern zoneid_t crgetzoneid(const cred_t *);
+extern zoneid_t crgetzonedid(const cred_t *);
extern projid_t crgetprojid(const cred_t *);
extern cred_t *crgetmapped(const cred_t *);
diff --git a/usr/src/uts/common/sys/ctf_api.h b/usr/src/uts/common/sys/ctf_api.h
index 04d73c3181..ab648d5489 100644
--- a/usr/src/uts/common/sys/ctf_api.h
+++ b/usr/src/uts/common/sys/ctf_api.h
@@ -169,7 +169,9 @@ extern const char *ctf_errmsg(int);
extern int ctf_version(int);
extern int ctf_func_info(ctf_file_t *, ulong_t, ctf_funcinfo_t *);
+extern int ctf_func_info_by_id(ctf_file_t *, ctf_id_t, ctf_funcinfo_t *);
extern int ctf_func_args(ctf_file_t *, ulong_t, uint_t, ctf_id_t *);
+extern int ctf_func_args_by_id(ctf_file_t *, ctf_id_t, uint_t, ctf_id_t *);
extern ctf_id_t ctf_lookup_by_name(ctf_file_t *, const char *);
extern ctf_id_t ctf_lookup_by_symbol(ctf_file_t *, ulong_t);
@@ -240,6 +242,34 @@ extern ctf_file_t *ctf_modopen(struct module *, int *);
#endif
+typedef struct ctf_ihelem {
+ ushort_t ih_type; /* type ID number */
+ ushort_t ih_value; /* type ID number in a different fp */
+ ushort_t ih_next; /* index of next element in hash chain */
+} ctf_ihelem_t;
+
+typedef struct ctf_idhash {
+ ushort_t *ih_buckets; /* hash bucket array (chain indices) */
+ ctf_ihelem_t *ih_chains; /* hash chains buffer */
+ ushort_t ih_nbuckets; /* number of elements in bucket array */
+ ushort_t ih_nelems; /* number of elements in hash table */
+ uint_t ih_free; /* index of next free hash element */
+} ctf_idhash_t;
+
+extern int ctf_idhash_create(ctf_idhash_t *, ulong_t);
+extern void ctf_idhash_clear(ctf_idhash_t *);
+extern int ctf_idhash_define(ctf_idhash_t *, ushort_t, ushort_t);
+extern int ctf_idhash_insert(ctf_idhash_t *, ushort_t, ushort_t);
+extern ctf_ihelem_t *ctf_idhash_lookup(ctf_idhash_t *, ushort_t);
+extern uint_t ctf_idhash_size(const ctf_idhash_t *);
+extern void ctf_idhash_destroy(ctf_idhash_t *);
+
+typedef struct ctf_idhash_iter ctf_idhash_iter_t;
+extern int ctf_idhash_iter_init(ctf_idhash_t *, ctf_idhash_iter_t **);
+extern const ctf_ihelem_t *ctf_idhash_iter(ctf_idhash_t *,
+ ctf_idhash_iter_t *);
+extern void ctf_idhash_iter_fini(ctf_idhash_t *, ctf_idhash_iter_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/devpoll.h b/usr/src/uts/common/sys/devpoll.h
index 36c815c69f..4e4c76d9b0 100644
--- a/usr/src/uts/common/sys/devpoll.h
+++ b/usr/src/uts/common/sys/devpoll.h
@@ -24,11 +24,13 @@
* All rights reserved.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_DEVPOLL_H
#define _SYS_DEVPOLL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/poll_impl.h>
#include <sys/types32.h>
@@ -39,8 +41,10 @@ extern "C" {
/* /dev/poll ioctl */
#define DPIOC (0xD0 << 8)
-#define DP_POLL (DPIOC | 1) /* poll on fds in cached in /dev/poll */
+#define DP_POLL (DPIOC | 1) /* poll on fds cached via /dev/poll */
#define DP_ISPOLLED (DPIOC | 2) /* is this fd cached in /dev/poll */
+#define DP_PPOLL (DPIOC | 3) /* ppoll on fds cached via /dev/poll */
+#define DP_EPOLLCOMPAT (DPIOC | 4) /* turn on epoll compatibility */
#define DEVPOLLSIZE 1000 /* /dev/poll table size increment */
@@ -51,14 +55,21 @@ typedef struct dvpoll {
pollfd_t *dp_fds; /* pollfd array */
nfds_t dp_nfds; /* num of pollfd's in dp_fds[] */
int dp_timeout; /* time out in milisec */
+ sigset_t *dp_setp; /* sigset, if any */
} dvpoll_t;
typedef struct dvpoll32 {
caddr32_t dp_fds; /* pollfd array */
uint32_t dp_nfds; /* num of pollfd's in dp_fds[] */
int32_t dp_timeout; /* time out in milisec */
+ caddr32_t dp_setp; /* sigset, if any */
} dvpoll32_t;
+typedef struct dvpoll_epollfd {
+ pollfd_t dpep_pollfd; /* must be first member */
+ uint64_t dpep_data; /* data payload */
+} dvpoll_epollfd_t;
+
#ifdef _KERNEL
typedef struct dp_entry {
@@ -71,6 +82,7 @@ typedef struct dp_entry {
} dp_entry_t;
#define DP_WRITER_PRESENT 0x1 /* a write is in progress */
+#define DP_ISEPOLLCOMPAT 0x2 /* epoll compatibility mode */
#define DP_REFRELE(dpep) { \
mutex_enter(&(dpep)->dpe_lock); \
diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h
index f5c990e7c0..2178ad1f0d 100644
--- a/usr/src/uts/common/sys/dktp/dadk.h
+++ b/usr/src/uts/common/sys/dktp/dadk.h
@@ -65,6 +65,8 @@ struct dadk {
kstat_t *dad_errstats; /* error stats */
kmutex_t dad_cmd_mutex;
int dad_cmd_count;
+ uint32_t dad_err_cnt; /* number of recent errors */
+ hrtime_t dad_last_log; /* time of last error log */
};
#define DAD_SECSIZ dad_phyg.g_secsiz
diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h
index fb2a0749d3..4cd93be56e 100644
--- a/usr/src/uts/common/sys/dld.h
+++ b/usr/src/uts/common/sys/dld.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DLD_H
@@ -191,6 +192,7 @@ typedef struct dld_ioc_rename {
datalink_id_t dir_linkid1;
datalink_id_t dir_linkid2;
char dir_link[MAXLINKNAMELEN];
+ boolean_t dir_zoneinit;
} dld_ioc_rename_t;
/*
@@ -203,6 +205,7 @@ typedef struct dld_ioc_rename {
typedef struct dld_ioc_zid {
zoneid_t diz_zid;
datalink_id_t diz_linkid;
+ boolean_t diz_transient;
} dld_ioc_zid_t;
/*
@@ -350,6 +353,7 @@ typedef struct dld_hwgrpinfo {
*/
typedef int (*dld_capab_func_t)(void *, uint_t, void *, uint_t);
+#define DI_DIRECT_RAW 0x1
/*
* Direct Tx/Rx capability.
*/
@@ -374,6 +378,9 @@ typedef struct dld_capab_direct_s {
/* flow control "can I put on a ring" callback */
uintptr_t di_tx_fctl_df; /* canput-like callback */
void *di_tx_fctl_dh;
+
+ /* flags that control our behavior */
+ uint_t di_flags;
} dld_capab_direct_t;
/*
diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h
index a76a927e59..81708aad38 100644
--- a/usr/src/uts/common/sys/dld_impl.h
+++ b/usr/src/uts/common/sys/dld_impl.h
@@ -53,7 +53,8 @@ typedef enum {
typedef enum {
DLD_UNINITIALIZED,
DLD_PASSIVE,
- DLD_ACTIVE
+ DLD_ACTIVE,
+ DLD_EXCLUSIVE
} dld_passivestate_t;
/*
@@ -256,6 +257,8 @@ extern void dld_str_rx_unitdata(void *, mac_resource_handle_t,
extern void dld_str_notify_ind(dld_str_t *);
extern mac_tx_cookie_t str_mdata_fastpath_put(dld_str_t *, mblk_t *,
uintptr_t, uint16_t);
+extern mac_tx_cookie_t str_mdata_raw_fastpath_put(dld_str_t *, mblk_t *,
+ uintptr_t, uint16_t);
extern int dld_flow_ctl_callb(dld_str_t *, uint64_t,
int (*func)(), void *);
diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h
index 5bc2bd41c5..dddac5b878 100644
--- a/usr/src/uts/common/sys/dlpi.h
+++ b/usr/src/uts/common/sys/dlpi.h
@@ -107,6 +107,7 @@ typedef struct dl_ipnetinfo {
#define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */
#define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */
#define DL_NOTIFY_CONF 0x116 /* Notification from upstream */
+#define DL_EXCLUSIVE_REQ 0x117 /* Make bind active */
/*
* Primitives used for Connectionless Service
@@ -388,6 +389,7 @@ typedef struct dl_ipnetinfo {
#define DL_PROMISC_PHYS 0x01 /* promiscuous mode at phys level */
#define DL_PROMISC_SAP 0x02 /* promiscuous mode at sap level */
#define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */
+#define DL_PROMISC_RX_ONLY 0x04 /* above only enabled for rx */
/*
* DLPI notification codes for DL_NOTIFY_REQ primitives.
@@ -1107,6 +1109,13 @@ typedef struct {
} dl_intr_mode_req_t;
/*
+ * DL_EXCLUSIVE_REQ, M_PROTO type
+ */
+typedef struct {
+ t_uscalar_t dl_primitive;
+} dl_exclusive_req_t;
+
+/*
* CONNECTION-ORIENTED SERVICE PRIMITIVES
*/
@@ -1528,6 +1537,7 @@ union DL_primitives {
dl_control_ack_t control_ack;
dl_passive_req_t passive_req;
dl_intr_mode_req_t intr_mode_req;
+ dl_exclusive_req_t exclusive_req;
};
#define DL_INFO_REQ_SIZE sizeof (dl_info_req_t)
@@ -1596,6 +1606,7 @@ union DL_primitives {
#define DL_CONTROL_ACK_SIZE sizeof (dl_control_ack_t)
#define DL_PASSIVE_REQ_SIZE sizeof (dl_passive_req_t)
#define DL_INTR_MODE_REQ_SIZE sizeof (dl_intr_mode_req_t)
+#define DL_EXCLUSIVE_REQ_SIZE sizeof (dl_exclusive_req_t)
#ifdef _KERNEL
/*
diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h
index 6bd2bbe35a..8e99d0e9d8 100644
--- a/usr/src/uts/common/sys/dls.h
+++ b/usr/src/uts/common/sys/dls.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DLS_H
@@ -85,6 +86,7 @@ typedef struct dls_link_s dls_link_t;
#define DLS_PROMISC_SAP 0x00000001
#define DLS_PROMISC_MULTI 0x00000002
#define DLS_PROMISC_PHYS 0x00000004
+#define DLS_PROMISC_RX_ONLY 0x00000008
extern int dls_open(dls_link_t *, dls_dl_handle_t, dld_str_t *);
extern void dls_close(dld_str_t *);
@@ -106,11 +108,13 @@ extern void str_notify(void *, mac_notify_type_t);
extern int dls_devnet_open(const char *,
dls_dl_handle_t *, dev_t *);
+extern int dls_devnet_open_in_zone(const char *,
+ dls_dl_handle_t *, dev_t *, zoneid_t);
extern void dls_devnet_close(dls_dl_handle_t);
extern boolean_t dls_devnet_rebuild();
extern int dls_devnet_rename(datalink_id_t, datalink_id_t,
- const char *);
+ const char *, boolean_t);
extern int dls_devnet_create(mac_handle_t, datalink_id_t,
zoneid_t);
extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *,
@@ -127,7 +131,7 @@ extern uint16_t dls_devnet_vid(dls_dl_handle_t);
extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t);
extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *);
extern int dls_devnet_phydev(datalink_id_t, dev_t *);
-extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t);
+extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t);
extern zoneid_t dls_devnet_getzid(dls_dl_handle_t);
extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t);
extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t);
@@ -141,6 +145,8 @@ extern int dls_mgmt_update(const char *, uint32_t, boolean_t,
extern int dls_mgmt_get_linkinfo(datalink_id_t, char *,
datalink_class_t *, uint32_t *, uint32_t *);
extern int dls_mgmt_get_linkid(const char *, datalink_id_t *);
+extern int dls_mgmt_get_linkid_in_zone(const char *,
+ datalink_id_t *, zoneid_t);
extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t,
datalink_media_t, uint32_t);
extern int dls_devnet_macname2linkid(const char *,
diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h
index 60f51c47b5..d502b36a2d 100644
--- a/usr/src/uts/common/sys/dls_impl.h
+++ b/usr/src/uts/common/sys/dls_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DLS_IMPL_H
@@ -61,6 +62,7 @@ struct dls_link_s { /* Protected by */
uint_t dl_zone_ref;
link_tagmode_t dl_tagmode; /* atomic */
uint_t dl_nonip_cnt; /* SL */
+ uint_t dl_exclusive; /* SL */
};
typedef struct dls_head_s {
@@ -96,7 +98,8 @@ extern void dls_create_str_kstats(dld_str_t *);
extern int dls_stat_update(kstat_t *, dls_link_t *, int);
extern int dls_stat_create(const char *, int, const char *,
zoneid_t, int (*)(struct kstat *, int), void *,
- kstat_t **);
+ kstat_t **, zoneid_t);
+extern void dls_stat_delete(kstat_t *);
extern int dls_devnet_open_by_dev(dev_t, dls_link_t **,
dls_dl_handle_t *);
@@ -126,6 +129,7 @@ extern void dls_mgmt_init(void);
extern void dls_mgmt_fini(void);
extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *);
+extern int dls_exclusive_set(dld_str_t *, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h
index b4032c24d6..4f73d92118 100644
--- a/usr/src/uts/common/sys/dls_mgmt.h
+++ b/usr/src/uts/common/sys/dls_mgmt.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#ifndef _DLS_MGMT_H
@@ -165,6 +166,7 @@ typedef struct dlmgmt_door_getname {
typedef struct dlmgmt_door_getlinkid {
int ld_cmd;
char ld_link[MAXLINKNAMELEN];
+ zoneid_t ld_zoneid;
} dlmgmt_door_getlinkid_t;
typedef struct dlmgmt_door_getnext_s {
diff --git a/usr/src/uts/common/sys/epoll.h b/usr/src/uts/common/sys/epoll.h
new file mode 100644
index 0000000000..f2e4b90ab7
--- /dev/null
+++ b/usr/src/uts/common/sys/epoll.h
@@ -0,0 +1,89 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_EPOLL_H
+#define _SYS_EPOLL_H
+
+#include <sys/types.h>
+#include <sys/poll.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef union epoll_data {
+ void *ptr;
+ int fd;
+ uint32_t u32;
+ uint64_t u64;
+} epoll_data_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct epoll_event {
+ uint32_t events; /* events */
+ epoll_data_t data; /* user-specified data */
+} epoll_event_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+/*
+ * Define the EPOLL* constants in terms of their poll(2)/poll(7) equivalents.
+ * Note that the values match the equivalents in Linux to allow for any binary
+ * compatibility layers to not need to translate them.
+ */
+#define EPOLLIN 0x0001
+#define EPOLLPRI 0x0002
+#define EPOLLOUT 0x0004
+#define EPOLLRDNORM 0x0040
+#define EPOLLRDBAND 0x0080
+#define EPOLLWRNORM 0x0100
+#define EPOLLWRBAND 0x0200
+#define EPOLLMSG 0x0400 /* not used */
+#define EPOLLERR 0x0008
+#define EPOLLHUP 0x0010
+#define EPOLLRDHUP 0x2000
+
+#define EPOLLWAKEUP (1UL << 29) /* no meaning; silently ignored */
+#define EPOLLONESHOT (1UL << 30) /* translated to POLLONESHOT */
+#define EPOLLET (1UL << 31) /* translated to POLLET */
+
+#define EPOLL_CTL_ADD 1
+#define EPOLL_CTL_DEL 2
+#define EPOLL_CTL_MOD 3
+
+#define EPOLL_CLOEXEC 02000000
+
+#if !defined(_KERNEL)
+
+extern int epoll_create(int size);
+extern int epoll_create1(int flags);
+extern int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
+extern int epoll_wait(int epfd, struct epoll_event *events,
+ int maxevents, int timeout);
+extern int epoll_pwait(int epfd, struct epoll_event *events,
+ int maxevents, int timeout, const sigset_t *sigmask);
+
+#endif /* !_KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_EPOLL_H */
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index d36bc20481..73c5cccee7 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -26,6 +26,10 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_EXEC_H
#define _SYS_EXEC_H
@@ -102,6 +106,7 @@ typedef struct uarg {
vnode_t *ex_vp;
char *emulator;
char *brandname;
+ const char *brand_nroot;
char *auxp_auxflags; /* addr of auxflags auxv on the user stack */
char *auxp_brand; /* address of first brand auxv on user stack */
cred_t *pfcred;
@@ -239,14 +244,16 @@ extern void exec_set_sp(size_t);
extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
long *, int, caddr_t, cred_t *, int);
extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *, Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
#endif /* !_ELF32_COMPAT */
#if defined(_LP64)
extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
long *, int, caddr_t, cred_t *, int);
extern int mapexec32_brand(vnode_t *, uarg_t *, Elf32_Ehdr *, Elf32_Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
#endif /* _LP64 */
/*
diff --git a/usr/src/uts/common/sys/frameio.h b/usr/src/uts/common/sys/frameio.h
new file mode 100644
index 0000000000..54e6dbeedf
--- /dev/null
+++ b/usr/src/uts/common/sys/frameio.h
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FRAMEIO_H
+#define _SYS_FRAMEIO_H
+
+/*
+ * Frame I/O definitions
+ */
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+/* Kernel only headers */
+#include <sys/stream.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * An individual frame vector component. Collections of these are used to make
+ * ioctls.
+ */
+typedef struct framevec {
+ void *fv_buf; /* Buffer with data */
+ size_t fv_buflen; /* Size of the buffer */
+ size_t fv_actlen; /* Amount of buffer consumed, ignore on error */
+} framevec_t;
+
+/*
+ * The base unit used with frameio.
+ */
+typedef struct frameio {
+ uint_t fio_version; /* Should always be FRAMEIO_CURRENT_VERSION */
+ uint_t fio_nvpf; /* How many vectors make up one frame */
+ uint_t fio_nvecs; /* The total number of vectors */
+ framevec_t fio_vecs[]; /* C99 VLA */
+} frameio_t;
+
+
+#define FRAMEIO_VERSION_ONE 1
+#define FRAMEIO_CURRENT_VERSION FRAMEIO_VERSION_ONE
+
+#define FRAMEIO_NVECS_MAX 32
+
+/*
+ * Definitions for kernel modules to include as helpers. These are consolidation
+ * private.
+ */
+#ifdef _KERNEL
+
+/*
+ * 32-bit versions for 64-bit kernels
+ */
+typedef struct framevec32 {
+ caddr32_t fv_buf;
+ size32_t fv_buflen;
+ size32_t fv_actlen;
+} framevec32_t;
+
+typedef struct frameio32 {
+ uint_t fio_version;
+ uint_t fio_vecspframe;
+ uint_t fio_nvecs;
+ framevec32_t fio_vecs[];
+} frameio32_t;
+
+/*
+ * Describe the different ways that vectors should map to frames.
+ */
+typedef enum frameio_write_mblk_map {
+ MAP_BLK_FRAME
+} frameio_write_mblk_map_t;
+
+int frameio_init(void);
+void frameio_fini(void);
+frameio_t *frameio_alloc(int);
+void frameio_free(frameio_t *);
+int frameio_hdr_copyin(frameio_t *, int, const void *, uint_t);
+int frameio_mblk_chain_read(frameio_t *, mblk_t **, int *, int);
+int frameio_mblk_chain_write(frameio_t *, frameio_write_mblk_map_t, mblk_t *,
+ int *, int);
+int frameio_hdr_copyout(frameio_t *, int, void *, uint_t);
+size_t frameio_frame_length(frameio_t *, framevec_t *);
+void frameio_mark_consumed(frameio_t *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FRAMEIO_H */
diff --git a/usr/src/uts/common/sys/fs/bootfs_impl.h b/usr/src/uts/common/sys/fs/bootfs_impl.h
new file mode 100644
index 0000000000..5726f1428a
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/bootfs_impl.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_BOOTFS_IMPL_H
+#define _SYS_FS_BOOTFS_IMPL_H
+
+#include <sys/types.h>
+#include <sys/list.h>
+#include <sys/avl.h>
+#include <sys/vnode.h>
+#include <sys/vfs_opreg.h>
+#include <sys/kstat.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The bootfs node is the file system specific version of the vnode for the
+ * bootfs file system. Because the bootfs file system is entirely a read-only
+ * file system, this structure requires no locking as the contents are
+ * immutable.
+ */
+typedef struct bootfs_node {
+ char *bvn_name; /* entry name */
+ struct vnode *bvn_vnp; /* Corresponding vnode */
+ avl_tree_t bvn_dir; /* directory entries, if VDIR */
+ avl_node_t bvn_link; /* dirent link */
+ list_node_t bvn_alink; /* link for all nodes */
+ uint64_t bvn_addr; /* Address in pmem */
+ uint64_t bvn_size; /* Size of the file */
+ struct bootfs_node *bvn_parent; /* .. */
+ vattr_t bvn_attr; /* attributes for the node */
+} bootfs_node_t;
+
+typedef struct bootfs_stat {
+ kstat_named_t bfss_nfiles;
+ kstat_named_t bfss_ndirs;
+ kstat_named_t bfss_nbytes;
+ kstat_named_t bfss_ndups;
+ kstat_named_t bfss_ndiscards;
+} bootfs_stat_t;
+
+typedef struct bootfs {
+ vfs_t *bfs_vfsp;
+ char *bfs_mntpath;
+ bootfs_node_t *bfs_rootvn;
+ kstat_t *bfs_kstat;
+ list_t bfs_nodes;
+ minor_t bfs_minor;
+ uint_t bfs_ninode;
+ bootfs_stat_t bfs_stat;
+} bootfs_t;
+
+extern void bootfs_construct(bootfs_t *);
+extern void bootfs_destruct(bootfs_t *);
+extern int bootfs_node_constructor(void *, void *, int);
+extern void bootfs_node_destructor(void *, void *);
+
+extern struct vnodeops *bootfs_vnodeops;
+extern const fs_operation_def_t bootfs_vnodeops_template[];
+extern kmem_cache_t *bootfs_node_cache;
+extern major_t bootfs_major;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_BOOTFS_IMPL_H */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h
new file mode 100644
index 0000000000..b8c4149df2
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_H
+#define _SYS_FS_HYPRLOFS_H
+
+#include <sys/param.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hyprlofs ioctl numbers.
+ */
+#define HYPRLOFS_IOC ('H' << 8)
+
+#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1)
+#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2)
+#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3)
+#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4)
+
+typedef struct {
+ char *hle_path;
+ uint_t hle_plen;
+ char *hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry_t;
+
+typedef struct {
+ hyprlofs_entry_t *hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries_t;
+
+typedef struct {
+ char hce_path[MAXPATHLEN];
+ char hce_name[MAXPATHLEN];
+} hyprlofs_curr_entry_t;
+
+typedef struct {
+ hyprlofs_curr_entry_t *hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries_t;
+
+#ifdef _KERNEL
+typedef struct {
+ caddr32_t hle_path;
+ uint_t hle_plen;
+ caddr32_t hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry32_t;
+
+typedef struct {
+ caddr32_t hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries32_t;
+
+typedef struct {
+ caddr32_t hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries32_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_H */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h
new file mode 100644
index 0000000000..38389f77d9
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h
@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_INFO_H
+#define _SYS_FS_HYPRLOFS_INFO_H
+
+#include <sys/t_lock.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <sys/vfs_opreg.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hlnode is the file system dependent node for hyprlofs.
+ * It is modeled on the tmpfs tmpnode.
+ *
+ * hln_rwlock protects access of the directory list at hln_dir
+ * as well as syncronizing read/writes to directory hlnodes.
+ * hln_tlock protects updates to hln_mode and hln_nlink.
+ * hln_tlock doesn't require any hlnode locks.
+ */
+typedef struct hlnode {
+ struct hlnode *hln_back; /* linked list of hlnodes */
+ struct hlnode *hln_forw; /* linked list of hlnodes */
+ union {
+ struct {
+ struct hldirent *un_dirlist; /* dirent list */
+ uint_t un_dirents; /* number of dirents */
+ } un_dirstruct;
+ vnode_t *un_realvp; /* real vnode */
+ } un_hlnode;
+ vnode_t *hln_vnode; /* vnode for this hlnode */
+ int hln_gen; /* pseudo gen num for hlfid */
+ int hln_looped; /* flag indicating loopback */
+ vattr_t hln_attr; /* attributes */
+ krwlock_t hln_rwlock; /* rw - serialize mods and */
+ /* directory updates */
+ kmutex_t hln_tlock; /* time, flag, and nlink lock */
+} hlnode_t;
+
+/*
+ * hyprlofs per-mount data structure.
+ * All fields are protected by hlm_contents.
+ */
+typedef struct {
+ vfs_t *hlm_vfsp; /* filesystem's vfs struct */
+ hlnode_t *hlm_rootnode; /* root hlnode */
+ char *hlm_mntpath; /* name of hyprlofs mount point */
+ dev_t hlm_dev; /* unique dev # of mounted `device' */
+ uint_t hlm_gen; /* pseudo generation number for files */
+ kmutex_t hlm_contents; /* lock for hlfsmount structure */
+} hlfsmount_t;
+
+/*
+ * hyprlofs directories are made up of a linked list of hldirent structures
+ * hanging off directory hlnodes. File names are not fixed length,
+ * but are null terminated.
+ */
+typedef struct hldirent {
+ hlnode_t *hld_hlnode; /* hlnode for this file */
+ struct hldirent *hld_next; /* next directory entry */
+ struct hldirent *hld_prev; /* prev directory entry */
+ uint_t hld_offset; /* "offset" of dir entry */
+ uint_t hld_hash; /* a hash of td_name */
+ struct hldirent *hld_link; /* linked via the hash table */
+ hlnode_t *hld_parent; /* parent, dir we are in */
+ char *hld_name; /* must be null terminated */
+ /* max length is MAXNAMELEN */
+} hldirent_t;
+
+/*
+ * hlfid overlays the fid structure (for VFS_VGET)
+ */
+typedef struct {
+ uint16_t hlfid_len;
+ ino32_t hlfid_ino;
+ int32_t hlfid_gen;
+} hlfid_t;
+
+/*
+ * File system independent to hyprlofs conversion macros
+ */
+#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data)
+#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data)
+#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data)
+#define HLNTOV(tp) ((tp)->hln_vnode)
+#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp)
+#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp))
+#define hlnode_rele(tp) VN_RELE(HLNTOV(tp))
+
+#define hln_dir un_hlnode.un_dirstruct.un_dirlist
+#define hln_dirents un_hlnode.un_dirstruct.un_dirents
+#define hln_realvp un_hlnode.un_realvp
+
+/*
+ * Attributes
+ */
+#define hln_mask hln_attr.va_mask
+#define hln_type hln_attr.va_type
+#define hln_mode hln_attr.va_mode
+#define hln_uid hln_attr.va_uid
+#define hln_gid hln_attr.va_gid
+#define hln_fsid hln_attr.va_fsid
+#define hln_nodeid hln_attr.va_nodeid
+#define hln_nlink hln_attr.va_nlink
+#define hln_size hln_attr.va_size
+#define hln_atime hln_attr.va_atime
+#define hln_mtime hln_attr.va_mtime
+#define hln_ctime hln_attr.va_ctime
+#define hln_rdev hln_attr.va_rdev
+#define hln_blksize hln_attr.va_blksize
+#define hln_nblocks hln_attr.va_nblocks
+#define hln_seq hln_attr.va_seq
+
+/*
+ * enums
+ */
+enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */
+enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */
+
+/*
+ * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs
+ * leaves free for the rest of the system. The default value for
+ * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a
+ * different number of pages. Since hyprlofs doesn't actually use much
+ * memory, its unlikely this ever needs to be patched.
+ */
+#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */
+
+extern size_t hyprlofs_minfree; /* Anonymous memory in pages */
+
+extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *,
+ cred_t *);
+extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *);
+extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op,
+ cred_t *);
+extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *);
+extern void hyprlofs_dirtrunc(hlnode_t *);
+extern int hyprlofs_taccess(void *, int, cred_t *);
+extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op,
+ vnode_t *, vattr_t *, hlnode_t **, cred_t *);
+
+extern struct vnodeops *hyprlofs_vnodeops;
+extern const struct fs_operation_def hyprlofs_vnodeops_template[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_INFO_H */
diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h
index 19a147d88a..d5bd0162c7 100644
--- a/usr/src/uts/common/sys/fs/sdev_impl.h
+++ b/usr/src/uts/common/sys/fs/sdev_impl.h
@@ -35,6 +35,7 @@ extern "C" {
#include <sys/vfs_opreg.h>
#include <sys/list.h>
#include <sys/nvpair.h>
+#include <sys/fs/sdev_plugin.h>
/*
* sdev_nodes are the file-system specific part of the
@@ -126,6 +127,21 @@ typedef struct sdev_local_data {
struct sdev_dprof sdev_lprof; /* profile for multi-inst */
} sdev_local_data_t;
+/* sdev_flags */
+typedef enum sdev_flags {
+ SDEV_BUILD = 0x0001, /* directory cache out-of-date */
+ SDEV_GLOBAL = 0x0002, /* global /dev nodes */
+ SDEV_PERSIST = 0x0004, /* backing store persisted node */
+ SDEV_NO_NCACHE = 0x0008, /* do not include in neg. cache */
+ SDEV_DYNAMIC = 0x0010, /* special-purpose vnode ops */
+ /* (ex: pts) */
+ SDEV_VTOR = 0x0020, /* validate sdev_nodes during search */
+ SDEV_ATTR_INVALID = 0x0040, /* invalid node attributes, */
+ /* need update */
+ SDEV_SUBDIR = 0x0080, /* match all subdirs under here */
+ SDEV_ZONED = 0x0100 /* zoned subdir */
+} sdev_flags_t;
+
/*
* /dev filesystem sdev_node defines
*/
@@ -148,7 +164,7 @@ typedef struct sdev_node {
ino64_t sdev_ino; /* inode */
uint_t sdev_nlink; /* link count */
int sdev_state; /* state of this node */
- int sdev_flags; /* flags bit */
+ sdev_flags_t sdev_flags; /* flags bit */
kmutex_t sdev_lookup_lock; /* node creation synch lock */
kcondvar_t sdev_lookup_cv; /* node creation sync cv */
@@ -159,7 +175,7 @@ typedef struct sdev_node {
struct sdev_global_data sdev_globaldata;
struct sdev_local_data sdev_localdata;
} sdev_instance_data;
-
+ list_node_t sdev_plist; /* link on plugin list */
void *sdev_private;
} sdev_node_t;
@@ -190,29 +206,11 @@ typedef enum {
SDEV_READY
} sdev_node_state_t;
-/* sdev_flags */
-#define SDEV_BUILD 0x0001 /* directory cache out-of-date */
-#define SDEV_GLOBAL 0x0002 /* global /dev nodes */
-#define SDEV_PERSIST 0x0004 /* backing store persisted node */
-#define SDEV_NO_NCACHE 0x0008 /* do not include in neg. cache */
-#define SDEV_DYNAMIC 0x0010 /* special-purpose vnode ops */
- /* (ex: pts) */
-#define SDEV_VTOR 0x0020 /* validate sdev_nodes during search */
-#define SDEV_ATTR_INVALID 0x0040 /* invalid node attributes, */
- /* need update */
-#define SDEV_SUBDIR 0x0080 /* match all subdirs under here */
-#define SDEV_ZONED 0x0100 /* zoned subdir */
-
/* sdev_lookup_flags */
#define SDEV_LOOKUP 0x0001 /* node creation in progress */
#define SDEV_READDIR 0x0002 /* VDIR readdir in progress */
#define SDEV_LGWAITING 0x0004 /* waiting for devfsadm completion */
-#define SDEV_VTOR_INVALID -1
-#define SDEV_VTOR_SKIP 0
-#define SDEV_VTOR_VALID 1
-#define SDEV_VTOR_STALE 2
-
/* convenient macros */
#define SDEV_IS_GLOBAL(dv) \
(dv->sdev_flags & SDEV_GLOBAL)
@@ -364,8 +362,13 @@ extern void sdev_devfsadmd_thread(struct sdev_node *, struct sdev_node *,
extern int devname_profile_update(char *, size_t);
extern struct sdev_data *sdev_find_mntinfo(char *);
void sdev_mntinfo_rele(struct sdev_data *);
+typedef void (*sdev_mnt_walk_f)(struct sdev_node *, void *);
+void sdev_mnt_walk(sdev_mnt_walk_f, void *);
extern struct vnodeops *devpts_getvnodeops(void);
extern struct vnodeops *devvt_getvnodeops(void);
+extern void sdev_plugin_nodeready(struct sdev_node *);
+extern int sdev_plugin_init(void);
+extern int sdev_plugin_fini(void);
/*
* boot states - warning, the ordering here is significant
@@ -510,6 +513,23 @@ extern void sdev_nc_path_exists(sdev_nc_list_t *, char *);
extern void sdev_modctl_dump_files(void);
/*
+ * plugin and legacy vtab stuff
+ */
+/* directory dependent vop table */
+typedef struct sdev_vop_table {
+ char *vt_name; /* subdirectory name */
+ const fs_operation_def_t *vt_service; /* vnodeops table */
+ struct vnodeops **vt_global_vops; /* global container for vop */
+ int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
+ int vt_flags;
+} sdev_vop_table_t;
+
+extern struct sdev_vop_table vtab[];
+extern struct vnodeops *sdev_get_vop(struct sdev_node *);
+extern void sdev_set_no_negcache(struct sdev_node *);
+extern void *sdev_get_vtor(struct sdev_node *dv);
+
+/*
* globals
*/
extern kmutex_t sdev_lock;
@@ -522,6 +542,7 @@ extern struct vnodeops *devipnet_vnodeops;
extern struct vnodeops *devvt_vnodeops;
extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */
extern struct vnodeops *devzvol_vnodeops;
+extern int sdev_vnodeops_tbl_size;
extern const fs_operation_def_t sdev_vnodeops_tbl[];
extern const fs_operation_def_t devpts_vnodeops_tbl[];
diff --git a/usr/src/uts/common/sys/fs/sdev_plugin.h b/usr/src/uts/common/sys/fs/sdev_plugin.h
new file mode 100644
index 0000000000..8783df58e6
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/sdev_plugin.h
@@ -0,0 +1,106 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_SDEV_PLUGIN_H
+#define _SYS_SDEV_PLUGIN_H
+
+/*
+ * Kernel sdev plugin interface
+ */
+
+#ifdef _KERNEL
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef uintptr_t sdev_plugin_hdl_t;
+typedef uintptr_t sdev_ctx_t;
+
+/*
+ * Valid return values for sdev_plugin_validate_t.
+ */
+typedef enum sdev_plugin_validate {
+ SDEV_VTOR_INVALID = -1,
+ SDEV_VTOR_SKIP = 0,
+ SDEV_VTOR_VALID = 1,
+ SDEV_VTOR_STALE = 2
+} sdev_plugin_validate_t;
+
+/*
+ * Valid flags
+ */
+typedef enum sdev_plugin_flags {
+ SDEV_PLUGIN_NO_NCACHE = 0x1,
+ SDEV_PLUGIN_SUBDIR = 0x2
+} sdev_plugin_flags_t;
+
+#define SDEV_PLUGIN_FLAGS_MASK 0x3
+
+/*
+ * Functions a module must implement
+ */
+typedef sdev_plugin_validate_t (*sp_valid_f)(sdev_ctx_t);
+typedef int (*sp_filldir_f)(sdev_ctx_t);
+typedef void (*sp_inactive_f)(sdev_ctx_t);
+
+#define SDEV_PLUGIN_VERSION 1
+
+typedef struct sdev_plugin_ops {
+ int spo_version;
+ sdev_plugin_flags_t spo_flags;
+ sp_valid_f spo_validate;
+ sp_filldir_f spo_filldir;
+ sp_inactive_f spo_inactive;
+} sdev_plugin_ops_t;
+
+extern sdev_plugin_hdl_t sdev_plugin_register(const char *, sdev_plugin_ops_t *,
+ int *);
+extern int sdev_plugin_unregister(sdev_plugin_hdl_t);
+
+typedef enum sdev_ctx_flags {
+ SDEV_CTX_GLOBAL = 0x2 /* node belongs to the GZ */
+} sdev_ctx_flags_t;
+
+/*
+ * Context helper functions
+ */
+extern sdev_ctx_flags_t sdev_ctx_flags(sdev_ctx_t);
+extern const char *sdev_ctx_name(sdev_ctx_t);
+extern const char *sdev_ctx_path(sdev_ctx_t);
+extern enum vtype sdev_ctx_vtype(sdev_ctx_t);
+extern const void *sdev_ctx_vtype_data(sdev_ctx_t);
+
+/*
+ * Callbacks to manipulate nodes
+ */
+extern int sdev_plugin_mkdir(sdev_ctx_t, char *);
+extern int sdev_plugin_mknod(sdev_ctx_t, char *, mode_t, dev_t);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SDEV_PLUGIN_H */
diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h
index 68dd67c61e..4a6e86bdd9 100644
--- a/usr/src/uts/common/sys/fs/tmp.h
+++ b/usr/src/uts/common/sys/fs/tmp.h
@@ -68,29 +68,28 @@ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */
/*
* tmpfs_minfree is the amount (in pages) of anonymous memory that tmpfs
- * leaves free for the rest of the system. E.g. in a system with 32MB of
- * configured swap space, if 16MB were reserved (leaving 16MB free),
- * tmpfs could allocate up to 16MB - tmpfs_minfree. The default value
- * for tmpfs_minfree is btopr(TMPMINFREE) but it can cautiously patched
- * to a different number of pages.
- * NB: If tmpfs allocates too much swap space, other processes will be
- * unable to execute.
+ * leaves free for the rest of the system. In antiquity, this number could be
+ * relevant on a system-wide basis, as physical DRAM was routinely exhausted;
+ * however, in more modern times, the relative growth of DRAM with respect to
+ * application footprint means that this number is only likely to become
+ * factor in a virtualized OS environment (e.g., a zone) -- and even then only
+ * when DRAM and swap have both been capped low to allow for maximum tenancy.
+ * TMPMINFREE -- the value from which tmpfs_minfree is derived -- should
+ * therefore be configured to a value that is roughly the smallest practical
+ * value for memory + swap minus the largest reasonable size for tmpfs in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow tmpfs to consume
+ * no more than seven-eighths of this, yielding a TMPMINFREE of 16MB. Care
+ * should be exercised in changing this: tuning this value too high will
+ * result in spurious ENOSPC errors in tmpfs in small zones (a problem that
+ * can induce cascading failure surprisingly often); tuning this value too low
+ * will result in tmpfs consumption alone to alone induce application-level
+ * memory allocation failure.
*/
-#define TMPMINFREE 2 * 1024 * 1024 /* 2 Megabytes */
+#define TMPMINFREE 16 * 1024 * 1024 /* 16 Megabytes */
extern size_t tmpfs_minfree; /* Anonymous memory in pages */
-/*
- * tmpfs can allocate only a certain percentage of kernel memory,
- * which is used for tmpnodes, directories, file names, etc.
- * This is statically set as TMPMAXFRACKMEM of physical memory.
- * The actual number of allocatable bytes can be patched in tmpfs_maxkmem.
- */
-#define TMPMAXFRACKMEM 25 /* 1/25 of physical memory */
-
-extern size_t tmp_kmemspace;
-extern size_t tmpfs_maxkmem; /* Allocatable kernel memory in bytes */
-
extern void tmpnode_init(struct tmount *, struct tmpnode *,
struct vattr *, struct cred *);
extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t);
@@ -101,8 +100,6 @@ extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *,
enum dr_op, struct cred *);
extern void tdirinit(struct tmpnode *, struct tmpnode *);
extern void tdirtrunc(struct tmpnode *);
-extern void *tmp_memalloc(size_t, int);
-extern void tmp_memfree(void *, size_t);
extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int);
extern int tmp_taccess(void *, int, struct cred *);
extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *,
diff --git a/usr/src/uts/common/sys/gsqueue.h b/usr/src/uts/common/sys/gsqueue.h
new file mode 100644
index 0000000000..40ef4ce982
--- /dev/null
+++ b/usr/src/uts/common/sys/gsqueue.h
@@ -0,0 +1,65 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_GSQUEUE_H
+#define _SYS_GSQUEUE_H
+
+/*
+ * Standard interfaces to serializaion queues for everyone (except IP).
+ */
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef struct gsqueue gsqueue_t;
+typedef struct gsqueue_set gsqueue_set_t;
+
+typedef void (*gsqueue_cb_f)(gsqueue_set_t *, gsqueue_t *, void *, boolean_t);
+typedef void (*gsqueue_proc_f)(void *, mblk_t *, gsqueue_t *, void *);
+
+extern gsqueue_set_t *gsqueue_set_create(uint_t, pri_t);
+extern void gsqueue_set_destroy(gsqueue_set_t *);
+extern gsqueue_t *gsqueue_set_get(gsqueue_set_t *, uint_t);
+
+extern uintptr_t gsqueue_set_cb_add(gsqueue_set_t *, gsqueue_cb_f, void *);
+extern int gsqueue_set_cb_remove(gsqueue_set_t *, uintptr_t);
+
+#define GSQUEUE_FILL 0x0001
+#define GSQUEUE_NODRAIN 0x0002
+#define GSQUEUE_PROCESS 0x0004
+
+extern void gsqueue_enter_one(gsqueue_t *, mblk_t *, gsqueue_proc_f, void *,
+ int, uint8_t);
+
+/*
+ * The default wait is inherited from IP. This determines the amount of time
+ * that must pass after queuing work, before we wake up the worker thread. This
+ * value is in milliseconds.
+ */
+#define GSQUEUE_DEFAULT_WAIT 10
+#define GSQUEUE_DEFAULT_PRIORITY MAXCLSYSPRI
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_GSQUEUE_H */
diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h
new file mode 100644
index 0000000000..8acc1a7280
--- /dev/null
+++ b/usr/src/uts/common/sys/inotify.h
@@ -0,0 +1,153 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Header file to support for the inotify facility. Note that this facility
+ * is designed to be binary compatible with the Linux inotify facility; values
+ * for constants here should therefore exactly match those found in Linux, and
+ * this facility shouldn't be extended independently of Linux.
+ */
+
+#ifndef _SYS_INOTIFY_H
+#define _SYS_INOTIFY_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Events that can be explicitly requested on any inotify watch.
+ */
+#define IN_ACCESS 0x00000001
+#define IN_MODIFY 0x00000002
+#define IN_ATTRIB 0x00000004
+#define IN_CLOSE_WRITE 0x00000008
+#define IN_CLOSE_NOWRITE 0x00000010
+#define IN_OPEN 0x00000020
+#define IN_MOVED_FROM 0x00000040
+#define IN_MOVED_TO 0x00000080
+#define IN_CREATE 0x00000100
+#define IN_DELETE 0x00000200
+#define IN_DELETE_SELF 0x00000400
+#define IN_MOVE_SELF 0x00000800
+
+/*
+ * Events that can be sent to an inotify watch -- requested or not.
+ */
+#define IN_UNMOUNT 0x00002000
+#define IN_Q_OVERFLOW 0x00004000
+#define IN_IGNORED 0x00008000
+
+/*
+ * Flags that can modify an inotify event.
+ */
+#define IN_ONLYDIR 0x01000000
+#define IN_DONT_FOLLOW 0x02000000
+#define IN_EXCL_UNLINK 0x04000000
+#define IN_MASK_ADD 0x20000000
+#define IN_ISDIR 0x40000000
+#define IN_ONESHOT 0x80000000
+
+/*
+ * Helpful constants.
+ */
+#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE)
+#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO)
+#define IN_ALL_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \
+ IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF)
+
+#define IN_CHILD_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN)
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */
+
+struct inotify_event {
+ int32_t wd; /* watch descriptor */
+ uint32_t mask; /* mask of events */
+ uint32_t cookie; /* event association cookie, if any */
+ uint32_t len; /* size of name field */
+ char name[]; /* optional NUL-terminated name */
+};
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8))
+#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */
+#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */
+#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */
+#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */
+
+#ifndef _LP64
+#ifndef _LITTLE_ENDIAN
+#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name
+#else
+#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad
+#endif
+#else
+#define INOTIFY_PTR(type, name) type *name
+#endif
+
+typedef struct inotify_addwatch {
+ int inaw_fd; /* open fd for object */
+ uint32_t inaw_mask; /* desired mask */
+} inotify_addwatch_t;
+
+typedef struct inotify_addchild {
+ INOTIFY_PTR(char, inac_name); /* pointer to name */
+ int inac_fd; /* open fd for parent */
+} inotify_addchild_t;
+
+#ifndef _KERNEL
+
+extern int inotify_init(void);
+extern int inotify_init1(int);
+extern int inotify_add_watch(int, const char *, uint32_t);
+extern int inotify_rm_watch(int, int);
+
+#else
+
+#define IN_UNMASKABLE \
+ (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR)
+
+#define IN_MODIFIERS \
+ (IN_EXCL_UNLINK | IN_ONESHOT)
+
+#define IN_FLAGS \
+ (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD)
+
+#define IN_REMOVAL (1ULL << 32)
+#define INOTIFYMNRN_INOTIFY 0
+#define INOTIFYMNRN_CLONE 1
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_INOTIFY_H */
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index bd09077f1b..5803ad58d4 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -213,6 +213,7 @@ typedef enum {
MAC_PROP_MAX_RXHWCLNT_AVAIL,
MAC_PROP_MAX_TXHWCLNT_AVAIL,
MAC_PROP_IB_LINKMODE,
+ MAC_PROP_VN_PROMISC_FILTERED,
MAC_PROP_SECONDARY_ADDRS,
MAC_PROP_PRIVATE = -1
} mac_prop_id_t;
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index ae25df6a0d..ce1a6f1d7c 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#ifndef _SYS_MAC_CLIENT_IMPL_H
#define _SYS_MAC_CLIENT_IMPL_H
@@ -137,6 +140,7 @@ struct mac_client_impl_s { /* Protected by */
flow_entry_t *mci_flent_list; /* mci_rw_lock */
uint_t mci_nflents; /* mci_rw_lock */
uint_t mci_nvids; /* mci_rw_lock */
+ volatile uint32_t mci_vidcache; /* VID cache */
/* Resource Management Functions */
mac_resource_add_t mci_resource_add; /* SL */
@@ -280,6 +284,28 @@ extern int mac_tx_percpu_cnt;
} \
}
+/*
+ * To allow the hot path to not grab any additional locks, we keep a single
+ * entry VLAD ID cache that caches whether or not a given VID belongs to a
+ * MAC client.
+ */
+#define MCIP_VIDCACHE_VALIDSHIFT 31
+#define MCIP_VIDCACHE_VIDSHIFT 1
+#define MCIP_VIDCACHE_VIDMASK (UINT16_MAX << MCIP_VIDCACHE_VIDSHIFT)
+#define MCIP_VIDCACHE_BOOLSHIFT 0
+
+#define MCIP_VIDCACHE_INVALID 0
+
+#define MCIP_VIDCACHE_CACHE(vid, bool) \
+ ((1U << MCIP_VIDCACHE_VALIDSHIFT) | \
+ ((vid) << MCIP_VIDCACHE_VIDSHIFT) | \
+ ((bool) ? (1U << MCIP_VIDCACHE_BOOLSHIFT) : 0))
+
+#define MCIP_VIDCACHE_ISVALID(v) ((v) & (1U << MCIP_VIDCACHE_VALIDSHIFT))
+#define MCIP_VIDCACHE_VID(v) \
+ (((v) & MCIP_VIDCACHE_VIDMASK) >> MCIP_VIDCACHE_VIDSHIFT)
+#define MCIP_VIDCACHE_BOOL(v) ((v) & (1U << MCIP_VIDCACHE_BOOLSHIFT))
+
#define MAC_TAG_NEEDED(mcip) \
(((mcip)->mci_state_flags & MCIS_TAG_DISABLE) == 0 && \
(mcip)->mci_nvids == 1) \
@@ -302,6 +328,7 @@ extern int mac_tx_percpu_cnt;
/* Mac protection flags */
#define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001
+#define MPT_FLAG_PROMISC_FILTERED 0x0002
/* in mac_client.c */
extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *);
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 3c9c6b77a4..d19c19e43d 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -894,6 +894,8 @@ extern void mac_protect_fini(mac_client_impl_t *);
extern int mac_set_resources(mac_handle_t, mac_resource_props_t *);
extern void mac_get_resources(mac_handle_t, mac_resource_props_t *);
extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *);
+extern void mac_set_promisc_filtered(mac_client_handle_t, boolean_t);
+extern boolean_t mac_get_promisc_filtered(mac_client_handle_t);
extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *);
extern void mac_set_pool_effective(boolean_t, cpupart_t *,
diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h
index a4cca68098..8f4cd1639f 100644
--- a/usr/src/uts/common/sys/mman.h
+++ b/usr/src/uts/common/sys/mman.h
@@ -25,6 +25,7 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -330,6 +331,7 @@ struct memcntl_mha32 {
#define MS_SYNC 0x4 /* wait for msync */
#define MS_ASYNC 0x1 /* return immediately */
#define MS_INVALIDATE 0x2 /* invalidate caches */
+#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */
#if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__)
/* functions to mctl */
diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h
index e95ef3fccc..d215d88790 100644
--- a/usr/src/uts/common/sys/mntent.h
+++ b/usr/src/uts/common/sys/mntent.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
*
* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
* All Rights Reserved
@@ -47,6 +48,7 @@ extern "C" {
#define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */
#define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */
#define MNTTYPE_LOFS "lofs" /* Loop back file system */
+#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */
#define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */
#define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */
#define MNTTYPE_SWAP "swap" /* Swap file system */
diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h
index 93b5fc3e01..ea85c78f6b 100644
--- a/usr/src/uts/common/sys/neti.h
+++ b/usr/src/uts/common/sys/neti.h
@@ -44,6 +44,8 @@ extern "C" {
#define NHF_INET "NHF_INET"
#define NHF_INET6 "NHF_INET6"
#define NHF_ARP "NHF_ARP"
+#define NHF_VND_INET "NHF_VND_INET"
+#define NHF_VND_INET6 "NHF_VND_INET6"
/*
* Event identification
diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h
index 2c77e1be96..73f29d1e63 100644
--- a/usr/src/uts/common/sys/netstack.h
+++ b/usr/src/uts/common/sys/netstack.h
@@ -81,7 +81,8 @@ typedef id_t netstackid_t;
#define NS_IPSECESP 16
#define NS_IPNET 17
#define NS_ILB 18
-#define NS_MAX (NS_ILB+1)
+#define NS_VND 19
+#define NS_MAX (NS_VND+1)
/*
* State maintained for each module which tracks the state of
diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h
index 01c25610dd..12bdc29079 100644
--- a/usr/src/uts/common/sys/param.h
+++ b/usr/src/uts/common/sys/param.h
@@ -103,7 +103,7 @@ extern "C" {
#define DEFAULT_MAXPID 999999
#define DEFAULT_JUMPPID 100000
#else
-#define DEFAULT_MAXPID 30000
+#define DEFAULT_MAXPID 99999
#define DEFAULT_JUMPPID 0
#endif
diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h
index 276c4d386e..597c166b31 100644
--- a/usr/src/uts/common/sys/policy.h
+++ b/usr/src/uts/common/sys/policy.h
@@ -173,6 +173,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *,
const vattr_t *, cred_t *);
int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t);
int secpolicy_xvm_control(const cred_t *);
+int secpolicy_hyprlofs_control(const cred_t *);
int secpolicy_basic_exec(const cred_t *, vnode_t *);
int secpolicy_basic_fork(const cred_t *);
diff --git a/usr/src/uts/common/sys/poll.h b/usr/src/uts/common/sys/poll.h
index 9fff78a966..efc8457a6a 100644
--- a/usr/src/uts/common/sys/poll.h
+++ b/usr/src/uts/common/sys/poll.h
@@ -30,6 +30,10 @@
* All rights reserved.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_POLL_H
#define _SYS_POLL_H
@@ -59,6 +63,7 @@ typedef unsigned long nfds_t;
#define POLLWRNORM POLLOUT
#define POLLRDBAND 0x0080 /* out-of-band data is readable */
#define POLLWRBAND 0x0100 /* out-of-band data is writeable */
+#define POLLRDHUP 0x4000 /* read-side hangup */
#define POLLNORM POLLRDNORM
@@ -70,7 +75,13 @@ typedef unsigned long nfds_t;
#define POLLHUP 0x0010 /* fd has been hung up on */
#define POLLNVAL 0x0020 /* invalid pollfd entry */
-#define POLLREMOVE 0x0800 /* remove a cached poll fd from /dev/poll */
+/*
+ * These events will never be specified in revents, but may be specified in
+ * events to control /dev/poll behavior.
+ */
+#define POLLREMOVE 0x0800 /* remove cached /dev/poll fd */
+#define POLLONESHOT 0x1000 /* /dev/poll should one-shot this fd */
+#define POLLET 0x2000 /* edge-triggered /dev/poll fd */
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h
index ede99d0df2..2e866ec4d4 100644
--- a/usr/src/uts/common/sys/poll_impl.h
+++ b/usr/src/uts/common/sys/poll_impl.h
@@ -24,11 +24,13 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_POLL_IMPL_H
#define _SYS_POLL_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Caching Poll Subsystem:
*
@@ -160,6 +162,7 @@ typedef struct polldat {
int pd_nsets; /* num of xref sets, used by poll(2) */
xref_t *pd_ref; /* ptr to xref info, 1 for each set */
struct port_kevent *pd_portev; /* associated port event struct */
+ uint64_t pd_epolldata; /* epoll data, if any */
} polldat_t;
/*
@@ -187,7 +190,8 @@ typedef struct pollcache {
} pollcache_t;
/* pc_flag */
-#define T_POLLWAKE 0x02 /* pollwakeup() occurred */
+#define PC_POLLWAKE 0x02 /* pollwakeup() occurred */
+#define PC_WRITEWANTED 0x04 /* writer wishes to modify the pollcache_t */
#if defined(_KERNEL)
/*
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index f1a2fc5485..ee5892066b 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -346,7 +347,12 @@ typedef struct proc {
struct zone *p_zone; /* zone in which process lives */
struct vnode *p_execdir; /* directory that p_exec came from */
struct brand *p_brand; /* process's brand */
- void *p_brand_data; /* per-process brand state */
+
+ /* per-process brand state */
+ union {
+ void *__brand_data;
+ int __exit_data;
+ } __p_brand_data;
/* additional lock to protect p_sessp (but not its contents) */
kmutex_t p_splock;
@@ -361,7 +367,8 @@ typedef struct proc {
*/
struct user p_user; /* (see sys/user.h) */
} proc_t;
-
+#define p_brand_data __p_brand_data.__brand_data
+#define p_exit_data __p_brand_data.__exit_data
#define PROC_T /* headers relying on proc_t are OK */
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 6c79ee266d..ba8b2b1210 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -126,6 +126,12 @@ extern void ptms_logp(char *, uintptr_t);
#define DDBGP(a, b)
#endif
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+ boolean_t (*ppocb_func)(ptmptsopencb_arg_t);
+ ptmptsopencb_arg_t ppocb_arg;
+} ptmptsopencb_t;
+
#endif /* _KERNEL */
typedef struct pt_own {
@@ -157,6 +163,19 @@ typedef struct pt_own {
#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */
#define OWNERPT (('P'<<8)|5) /* set owner/group for slave device */
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ * The return value of the callback function when it's invoked
+ * with the opaque argument passed to it will indicate if the
+ * pts slave device is currently open.
+ */
+#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */
+
+#endif /* _KERNEL */
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h
index d69b23aade..666fbcd8a4 100644
--- a/usr/src/uts/common/sys/resource.h
+++ b/usr/src/uts/common/sys/resource.h
@@ -23,6 +23,7 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -191,6 +192,7 @@ struct rusage {
#define _RUSAGESYS_GETRUSAGE_CHLD 1 /* rusage child process */
#define _RUSAGESYS_GETRUSAGE_LWP 2 /* rusage lwp */
#define _RUSAGESYS_GETVMUSAGE 3 /* getvmusage */
+#define _RUSAGESYS_INVALMAP 4 /* vm_map_inval */
#if defined(_SYSCALL32)
diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h
index 590551f6f2..bbe2121d0e 100644
--- a/usr/src/uts/common/sys/socket.h
+++ b/usr/src/uts/common/sys/socket.h
@@ -39,6 +39,9 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#ifndef _SYS_SOCKET_H
#define _SYS_SOCKET_H
@@ -293,8 +296,9 @@ struct linger {
#define AF_INET_OFFLOAD 30 /* Sun private; do not use */
#define AF_TRILL 31 /* TRILL interface */
#define AF_PACKET 32 /* PF_PACKET Linux socket interface */
+#define AF_LX_NETLINK 33 /* Linux-compatible netlink */
-#define AF_MAX 32
+#define AF_MAX 33
/*
* Protocol families, same as address families for now.
@@ -334,6 +338,7 @@ struct linger {
#define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */
#define PF_TRILL AF_TRILL
#define PF_PACKET AF_PACKET
+#define PF_LX_NETLINK AF_LX_NETLINK
#define PF_MAX AF_MAX
diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h
index f1bd429815..35e1cf64c7 100644
--- a/usr/src/uts/common/sys/squeue.h
+++ b/usr/src/uts/common/sys/squeue.h
@@ -29,6 +29,17 @@
extern "C" {
#endif
+/*
+ * Originally in illumos, we had an IP-centric view of the serialization queue
+ * abstraction. While that has useful properties, the implementation of squeues
+ * hardcodes various parts of the implementation of IP into it which makes it
+ * unsuitable for other consumers. To enable them, we created another interface,
+ * but opted not to port all of the functionality that IP uses in the form of
+ * ip_squeue.c As other consumers need the functionality that IP has in squeues,
+ * then we'll come up with more genericized methods and add that functionality
+ * to <sys/gsqueue.h>. Please do not continue to use this header.
+ */
+
#include <sys/types.h>
#include <sys/processor.h>
#include <sys/stream.h>
@@ -76,12 +87,13 @@ typedef enum {
struct ip_recv_attr_s;
extern void squeue_init(void);
-extern squeue_t *squeue_create(clock_t, pri_t);
+extern squeue_t *squeue_create(clock_t, pri_t, boolean_t);
extern void squeue_bind(squeue_t *, processorid_t);
extern void squeue_unbind(squeue_t *);
extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *,
uint32_t, struct ip_recv_attr_s *, int, uint8_t);
extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t);
+extern void squeue_destroy(squeue_t *);
struct conn_s;
extern int squeue_synch_enter(struct conn_s *, mblk_t *);
diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h
index 22550886eb..d2418bbc15 100644
--- a/usr/src/uts/common/sys/squeue_impl.h
+++ b/usr/src/uts/common/sys/squeue_impl.h
@@ -117,6 +117,7 @@ struct squeue_s {
squeue_set_t *sq_set; /* managed by squeue creator */
pri_t sq_priority; /* squeue thread priority */
+ boolean_t sq_isip; /* use IP-centric features */
/* Keep the debug-only fields at the end of the structure */
#ifdef DEBUG
@@ -165,6 +166,7 @@ struct squeue_s {
#define SQS_POLL_RESTART_DONE 0x01000000
#define SQS_POLL_THR_QUIESCE 0x02000000
#define SQS_PAUSE 0x04000000 /* The squeue has been paused */
+#define SQS_EXIT 0x08000000 /* squeue is being torn down */
#define SQS_WORKER_THR_CONTROL \
(SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP)
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index 8d1fd5e730..18d2284b4a 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -462,6 +462,7 @@ extern size_t strlcat(char *, const char *, size_t);
extern size_t strlcpy(char *, const char *, size_t);
extern size_t strspn(const char *, const char *);
extern size_t strcspn(const char *, const char *);
+extern char *strsep(char **, const char *);
extern int bcmp(const void *, const void *, size_t) __PURE;
extern int stoi(char **);
extern void numtos(ulong_t, char *);
diff --git a/usr/src/uts/common/sys/systrace.h b/usr/src/uts/common/sys/systrace.h
index d43974451e..17e509d4d8 100644
--- a/usr/src/uts/common/sys/systrace.h
+++ b/usr/src/uts/common/sys/systrace.h
@@ -22,13 +22,12 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_SYSTRACE_H
#define _SYS_SYSTRACE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dtrace.h>
#ifdef __cplusplus
@@ -47,16 +46,18 @@ extern systrace_sysent_t *systrace_sysent;
extern systrace_sysent_t *systrace_sysent32;
extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#ifdef _SYSCALL32_IMPL
extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#endif
#endif
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index 09be20858d..889a7096cd 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -361,6 +361,24 @@ extern pid_t tcgetsid(int);
#define TCSETSF (_TIOC|16)
/*
+ * linux terminal ioctls we need to be aware of
+ */
+#define TIOCSETLD (_TIOC|123) /* set line discipline parms */
+#define TIOCGETLD (_TIOC|124) /* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+/*
* NTP PPS ioctls
*/
#define TIOCGPPS (_TIOC|125)
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 188230d61e..c7f460e7c7 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -68,6 +68,8 @@ typedef struct ctxop {
void (*free_op)(void *, int); /* function which frees the context */
void *arg; /* argument to above functions, ctx pointer */
struct ctxop *next; /* next context ops */
+ hrtime_t save_ts; /* timestamp of last save */
+ hrtime_t restore_ts; /* timestamp of last restore */
} ctxop_t;
/*
diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h
index d5168c9b2c..c14a3bf11e 100644
--- a/usr/src/uts/common/sys/uadmin.h
+++ b/usr/src/uts/common/sys/uadmin.h
@@ -23,6 +23,7 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -159,7 +160,7 @@ extern kmutex_t ualock;
extern void mdboot(int, int, char *, boolean_t);
extern void mdpreboot(int, int, char *);
extern int kadmin(int, int, void *, cred_t *);
-extern void killall(zoneid_t);
+extern void killall(zoneid_t, boolean_t);
#endif
extern int uadmin(int, int, uintptr_t);
diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h
index 1aa4a8ee6d..c2954cbc29 100644
--- a/usr/src/uts/common/sys/vm_usage.h
+++ b/usr/src/uts/common/sys/vm_usage.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_VM_USAGE_H
@@ -79,8 +80,9 @@ extern "C" {
/* zoneid */
#define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */
/* euser */
+#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */
-#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */
+#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */
typedef struct vmusage {
id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */
@@ -108,6 +110,7 @@ extern int getvmusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres);
int vm_getusage(uint_t, time_t, vmusage_t *, size_t *, int);
void vm_usage_init();
+int vm_map_inval(pid_t, caddr_t, size_t);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h
index 6122b6cd2f..c7b41730b6 100644
--- a/usr/src/uts/common/sys/vmsystm.h
+++ b/usr/src/uts/common/sys/vmsystm.h
@@ -19,6 +19,9 @@
* CDDL HEADER END
*/
/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -159,6 +162,8 @@ extern void *boot_virt_alloc(void *addr, size_t size);
extern size_t exec_get_spslew(void);
+extern caddr_t map_userlimit(proc_t *pp, struct as *as, int flags);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/vnd.h b/usr/src/uts/common/sys/vnd.h
new file mode 100644
index 0000000000..bc7c9c3122
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd.h
@@ -0,0 +1,141 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_H
+#define _SYS_VND_H
+
+#include <sys/types.h>
+#include <sys/vnd_errno.h>
+#include <sys/frameio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * We distinguish between normal ioctls and private ioctls we issues to out
+ * streams version. Streams ioctls have the upper bit set in the lowest byte.
+ * Note that there are no STREAMs ioctls for userland and all definitions
+ * related to them are not present in this file.
+ */
+#define VND_IOC (('v' << 24) | ('n' << 16) | ('d' << 8))
+
+/*
+ * Attach the current minor instance to a given dlpi datalink identified by a
+ * vnd_ioc_name_t argument. This fails if it's already been attached. Note that
+ * unlike the other ioctls, this is passed directly as opposed to every other
+ * function which is passed as a pointer to the value.
+ */
+#define VND_IOC_ATTACH (VND_IOC | 0x1)
+
+#define VND_NAMELEN 32
+
+typedef struct vnd_ioc_attach {
+ char via_name[VND_NAMELEN];
+ zoneid_t via_zoneid;
+ uint32_t via_errno;
+} vnd_ioc_attach_t;
+
+/*
+ * Link the current minor instance into the /devices name space.
+ *
+ * This ioctl adds entries into /devices with a name of the form z%d:%s vil_zid,
+ * vil_name. The device will be namespaced to the zone. The global zone will be
+ * able to see all minor nodes. In the zone, only the /dev entries will exist.
+ * At this time, a given device can only have one link at a time. Note that a
+ * user cannot specify the zone to pass in, rather it is the zone that the
+ * device was attached in.
+ */
+#define VND_IOC_LINK (VND_IOC | 0x2)
+
+typedef struct vnd_ioc_link {
+ char vil_name[VND_NAMELEN];
+ uint32_t vil_errno;
+} vnd_ioc_link_t;
+
+/*
+ * Unlink the opened minor instance from the /devices name space. A zone may use
+ * this to unlink an extent entry in /dev; however, they will not be able to
+ * link it in again.
+ */
+#define VND_IOC_UNLINK (VND_IOC | 0x3)
+typedef struct vnd_ioc_unlink {
+ uint32_t viu_errno;
+} vnd_ioc_unlink_t;
+
+/*
+ * Controls to get and set the current buffer recieve buffer size.
+ */
+typedef struct vnd_ioc_buf {
+ uint64_t vib_size;
+ uint32_t vib_filler;
+ uint32_t vib_errno;
+} vnd_ioc_buf_t;
+
+#define VND_IOC_GETRXBUF (VND_IOC | 0x04)
+#define VND_IOC_SETRXBUF (VND_IOC | 0x05)
+#define VND_IOC_GETMAXBUF (VND_IOC | 0x06)
+#define VND_IOC_GETTXBUF (VND_IOC | 0x07)
+#define VND_IOC_SETTXBUF (VND_IOC | 0x08)
+#define VND_IOC_GETMINTU (VND_IOC | 0x09)
+#define VND_IOC_GETMAXTU (VND_IOC | 0x0a)
+
+/*
+ * Information and listing ioctls
+ *
+ * This gets information about all of the active vnd instances. vl_actents is
+ * always updated to the number around and vl_nents is the number of
+ * vnd_ioc_info_t elements are allocated in vl_ents.
+ */
+typedef struct vnd_ioc_info {
+ uint32_t vii_version;
+ zoneid_t vii_zone;
+ char vii_name[VND_NAMELEN];
+ char vii_datalink[VND_NAMELEN];
+} vnd_ioc_info_t;
+
+typedef struct vnd_ioc_list {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ vnd_ioc_info_t *vl_ents;
+} vnd_ioc_list_t;
+
+#ifdef _KERNEL
+
+typedef struct vnd_ioc_list32 {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ caddr32_t vl_ents;
+} vnd_ioc_list32_t;
+
+#endif /* _KERNEL */
+
+#define VND_IOC_LIST (VND_IOC | 0x20)
+
+/*
+ * Framed I/O ioctls
+ *
+ * Users should use the standard frameio_t as opposed to a vnd specific type.
+ * This is a consolidation private ioctl pending futher stability in the form of
+ * specific system work.
+ */
+#define VND_IOC_FRAMEIO_READ (VND_IOC | 0x30)
+#define VND_IOC_FRAMEIO_WRITE (VND_IOC | 0x31)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_H */
diff --git a/usr/src/uts/common/sys/vnd_errno.h b/usr/src/uts/common/sys/vnd_errno.h
new file mode 100644
index 0000000000..89e5fc2543
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd_errno.h
@@ -0,0 +1,72 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_ERRNO_H
+#define _SYS_VND_ERRNO_H
+
+/*
+ * This header contains all of the available vnd errors.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum vnd_errno {
+ VND_E_SUCCESS = 0, /* no error */
+ VND_E_NOMEM, /* no memory */
+ VND_E_NODATALINK, /* no such datalink */
+ VND_E_NOTETHER, /* not DL_ETHER */
+ VND_E_DLPIINVAL, /* Unknown DLPI failures */
+ VND_E_ATTACHFAIL, /* DL_ATTACH_REQ failed */
+ VND_E_BINDFAIL, /* DL_BIND_REQ failed */
+ VND_E_PROMISCFAIL, /* DL_PROMISCON_REQ failed */
+ VND_E_DIRECTFAIL, /* DLD_CAPAB_DIRECT enable failed */
+ VND_E_CAPACKINVAL, /* bad dl_capability_ack_t */
+ VND_E_SUBCAPINVAL, /* bad dl_capability_sub_t */
+ VND_E_DLDBADVERS, /* bad dld version */
+ VND_E_KSTATCREATE, /* failed to create kstats */
+ VND_E_NODEV, /* no such vnd link */
+ VND_E_NONETSTACK, /* netstack doesn't exist */
+ VND_E_ASSOCIATED, /* device already associated */
+ VND_E_ATTACHED, /* device already attached */
+ VND_E_LINKED, /* device already linked */
+ VND_E_BADNAME, /* invalid name */
+ VND_E_PERM, /* can't touch this */
+ VND_E_NOZONE, /* no such zone */
+ VND_E_STRINIT, /* failed to initialize vnd stream module */
+ VND_E_NOTATTACHED, /* device not attached */
+ VND_E_NOTLINKED, /* device not linked */
+ VND_E_LINKEXISTS, /* another device has the same link name */
+ VND_E_MINORNODE, /* failed to create minor node */
+ VND_E_BUFTOOBIG, /* requested buffer size is too large */
+ VND_E_BUFTOOSMALL, /* requested buffer size is too small */
+ VND_E_DLEXCL, /* unable to get dlpi excl access */
+ VND_E_DIRECTNOTSUP,
+ /* DLD direct capability not suported over data link */
+ VND_E_BADPROPSIZE, /* invalid property size */
+ VND_E_BADPROP, /* invalid property */
+ VND_E_PROPRDONLY, /* property is read only */
+ VND_E_SYS, /* unexpected system error */
+ VND_E_CAPABPASS,
+ /* capabilities invalid, pass-through module detected */
+ VND_E_UNKNOWN /* unknown error */
+} vnd_errno_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_ERRNO_H */
diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h
index af9516fe52..c1c12a084e 100644
--- a/usr/src/uts/common/sys/vnode.h
+++ b/usr/src/uts/common/sys/vnode.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -735,7 +735,8 @@ typedef enum vnevent {
VE_LINK = 6, /* Link with vnode's name as source */
VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
- VE_TRUNCATE = 9 /* Truncate */
+ VE_TRUNCATE = 9, /* Truncate */
+ VE_RENAME_SRC_DIR = 10 /* Rename with vnode as source dir */
} vnevent_t;
/*
@@ -1290,7 +1291,8 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_create(vnode_t *, caller_context_t *);
void vnevent_link(vnode_t *, caller_context_t *);
-void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct);
+void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *,
+ caller_context_t *ct);
void vnevent_mountedover(vnode_t *, caller_context_t *);
void vnevent_truncate(vnode_t *, caller_context_t *);
int vnevent_support(vnode_t *, caller_context_t *);
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 7e567c0cdb..ce7f2cc93d 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -23,6 +23,7 @@
* Copyright 2013, Joyent, Inc. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_ZONE_H
@@ -97,13 +98,17 @@ extern "C" {
#define ZONE_ATTR_INITNAME 9
#define ZONE_ATTR_BOOTARGS 10
#define ZONE_ATTR_BRAND 11
-#define ZONE_ATTR_PHYS_MCAP 12
+#define ZONE_ATTR_PMCAP_NOVER 12
#define ZONE_ATTR_SCHED_CLASS 13
#define ZONE_ATTR_FLAGS 14
#define ZONE_ATTR_HOSTID 15
#define ZONE_ATTR_FS_ALLOWED 16
#define ZONE_ATTR_NETWORK 17
+#define ZONE_ATTR_DID 18
+#define ZONE_ATTR_PMCAP_PAGEOUT 19
#define ZONE_ATTR_INITNORESTART 20
+#define ZONE_ATTR_PG_FLT_DELAY 21
+#define ZONE_ATTR_RSS 22
/* Start of the brand-specific attribute namespace */
#define ZONE_ATTR_BRAND_ATTRS 32768
@@ -184,6 +189,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
caddr32_t label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def32;
#endif
typedef struct {
@@ -200,6 +206,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
const bslabel_t *label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def;
/* extended error information */
@@ -244,9 +251,12 @@ typedef enum zone_cmd {
typedef struct zone_cmd_arg {
uint64_t uniqid; /* unique "generation number" */
zone_cmd_t cmd; /* requested action */
- uint32_t _pad; /* need consistent 32/64 bit alignmt */
+ int status; /* init status on shutdown */
+ uint32_t debug; /* enable brand hook debug */
char locale[MAXPATHLEN]; /* locale in which to render messages */
char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */
+ /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */
+ int pad;
} zone_cmd_arg_t;
/*
@@ -372,7 +382,7 @@ typedef struct zone_dataset {
} zone_dataset_t;
/*
- * structure for zone kstats
+ * structure for rctl zone kstats
*/
typedef struct zone_kstat {
kstat_named_t zk_zonename;
@@ -383,6 +393,58 @@ typedef struct zone_kstat {
struct cpucap;
typedef struct {
+ hrtime_t cycle_start;
+ uint_t cycle_cnt;
+ hrtime_t zone_avg_cnt;
+} sys_zio_cntr_t;
+
+typedef struct {
+ kstat_named_t zv_zonename;
+ kstat_named_t zv_nread;
+ kstat_named_t zv_reads;
+ kstat_named_t zv_rtime;
+ kstat_named_t zv_rlentime;
+ kstat_named_t zv_nwritten;
+ kstat_named_t zv_writes;
+ kstat_named_t zv_wtime;
+ kstat_named_t zv_wlentime;
+ kstat_named_t zv_10ms_ops;
+ kstat_named_t zv_100ms_ops;
+ kstat_named_t zv_1s_ops;
+ kstat_named_t zv_10s_ops;
+ kstat_named_t zv_delay_cnt;
+ kstat_named_t zv_delay_time;
+} zone_vfs_kstat_t;
+
+typedef struct {
+ kstat_named_t zz_zonename;
+ kstat_named_t zz_nread;
+ kstat_named_t zz_reads;
+ kstat_named_t zz_rtime;
+ kstat_named_t zz_rlentime;
+ kstat_named_t zz_nwritten;
+ kstat_named_t zz_writes;
+ kstat_named_t zz_waittime;
+} zone_zfs_kstat_t;
+
+typedef struct {
+ kstat_named_t zm_zonename;
+ kstat_named_t zm_rss;
+ kstat_named_t zm_phys_cap;
+ kstat_named_t zm_swap;
+ kstat_named_t zm_swap_cap;
+ kstat_named_t zm_nover;
+ kstat_named_t zm_pagedout;
+ kstat_named_t zm_pgpgin;
+ kstat_named_t zm_anonpgin;
+ kstat_named_t zm_execpgin;
+ kstat_named_t zm_fspgin;
+ kstat_named_t zm_anon_alloc_fail;
+ kstat_named_t zm_pf_throttle;
+ kstat_named_t zm_pf_throttle_usec;
+} zone_mcap_kstat_t;
+
+typedef struct {
kstat_named_t zm_zonename; /* full name, kstat truncates name */
kstat_named_t zm_utime;
kstat_named_t zm_stime;
@@ -440,6 +502,7 @@ typedef struct zone {
*/
list_node_t zone_linkage;
zoneid_t zone_id; /* ID of zone */
+ zoneid_t zone_did; /* persistent debug ID of zone */
uint_t zone_ref; /* count of zone_hold()s on zone */
uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */
/*
@@ -492,10 +555,11 @@ typedef struct zone {
kcondvar_t zone_cv; /* used to signal state changes */
struct proc *zone_zsched; /* Dummy kernel "zsched" process */
pid_t zone_proc_initpid; /* pid of "init" for this zone */
- char *zone_initname; /* fs path to 'init' */
+ char *zone_initname; /* fs path to 'init' */
+ int zone_init_status; /* init's exit status */
int zone_boot_err; /* for zone_boot() if boot fails */
char *zone_bootargs; /* arguments passed via zone_boot() */
- uint64_t zone_phys_mcap; /* physical memory cap */
+ rctl_qty_t zone_phys_mem_ctl; /* current phys. memory limit */
/*
* zone_kthreads is protected by zone_status_lock.
*/
@@ -544,6 +608,37 @@ typedef struct zone {
list_t zone_dl_list;
netstack_t *zone_netstack;
struct cpucap *zone_cpucap; /* CPU caps data */
+
+ /*
+ * Data and counters used for ZFS fair-share disk IO.
+ */
+ rctl_qty_t zone_zfs_io_pri; /* ZFS IO priority */
+ uint_t zone_zfs_queued[2]; /* sync I/O enqueued count */
+ uint64_t zone_zfs_weight; /* used to prevent starvation */
+ uint64_t zone_io_util; /* IO utilization metric */
+ boolean_t zone_io_util_above_avg; /* IO util percent > avg. */
+ uint16_t zone_io_delay; /* IO delay on logical r/w */
+ kmutex_t zone_stg_io_lock; /* protects IO window data */
+ sys_zio_cntr_t zone_rd_ops; /* Counters for ZFS reads, */
+ sys_zio_cntr_t zone_wr_ops; /* writes and */
+ sys_zio_cntr_t zone_lwr_ops; /* logical writes. */
+
+ /*
+ * kstats and counters for VFS ops and bytes.
+ */
+ kmutex_t zone_vfs_lock; /* protects VFS statistics */
+ kstat_t *zone_vfs_ksp;
+ kstat_io_t zone_vfs_rwstats;
+ zone_vfs_kstat_t *zone_vfs_stats;
+
+ /*
+ * kstats for ZFS I/O ops and bytes.
+ */
+ kmutex_t zone_zfs_lock; /* protects ZFS statistics */
+ kstat_t *zone_zfs_ksp;
+ kstat_io_t zone_zfs_rwstats;
+ zone_zfs_kstat_t *zone_zfs_stats;
+
/*
* Solaris Auditing per-zone audit context
*/
@@ -563,6 +658,27 @@ typedef struct zone {
kstat_t *zone_nprocs_kstat;
/*
+ * kstats and counters for physical memory capping.
+ */
+ rctl_qty_t zone_phys_mem; /* current bytes of phys. mem. (RSS) */
+ kstat_t *zone_physmem_kstat;
+ uint64_t zone_mcap_nover; /* # of times over phys. cap */
+ uint64_t zone_mcap_pagedout; /* bytes of mem. paged out */
+ kmutex_t zone_mcap_lock; /* protects mcap statistics */
+ kstat_t *zone_mcap_ksp;
+ zone_mcap_kstat_t *zone_mcap_stats;
+ uint64_t zone_pgpgin; /* pages paged in */
+ uint64_t zone_anonpgin; /* anon pages paged in */
+ uint64_t zone_execpgin; /* exec pages paged in */
+ uint64_t zone_fspgin; /* fs pages paged in */
+ uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */
+ uint64_t zone_pf_throttle; /* cnt of page flt throttles */
+ uint64_t zone_pf_throttle_usec; /* time of page flt throttles */
+
+ /* Num usecs to throttle page fault when zone is over phys. mem cap */
+ uint32_t zone_pg_flt_delay;
+
+ /*
* Misc. kstats and counters for zone cpu-usage aggregation.
* The zone_Xtime values are the sum of the micro-state accounting
* values for all threads that are running or have run in the zone.
@@ -600,6 +716,14 @@ typedef struct zone {
* DTrace-private per-zone state
*/
int zone_dtrace_getf; /* # of unprivileged getf()s */
+
+ /*
+ * Synchronization primitives used to synchronize between mounts and
+ * zone creation/destruction.
+ */
+ int zone_mounts_in_progress;
+ kcondvar_t zone_mount_cv;
+ kmutex_t zone_mount_lock;
} zone_t;
/*
@@ -632,6 +756,7 @@ extern zone_t *zone_find_by_name(char *);
extern zone_t *zone_find_by_any_path(const char *, boolean_t);
extern zone_t *zone_find_by_path(const char *);
extern zoneid_t getzoneid(void);
+extern zoneid_t getzonedid(void);
extern zone_t *zone_find_by_id_nolock(zoneid_t);
extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
@@ -819,13 +944,14 @@ extern int zone_dataset_visible(const char *, int *);
extern int zone_kadmin(int, int, const char *, cred_t *);
extern void zone_shutdown_global(void);
-extern void mount_in_progress(void);
-extern void mount_completed(void);
+extern void mount_in_progress(zone_t *);
+extern void mount_completed(zone_t *);
extern int zone_walk(int (*)(zone_t *, void *), void *);
extern rctl_hndl_t rc_zone_locked_mem;
extern rctl_hndl_t rc_zone_max_swap;
+extern rctl_hndl_t rc_zone_phys_mem;
extern rctl_hndl_t rc_zone_max_lofi;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c
index 9b4bd38baa..8ee5511fd0 100644
--- a/usr/src/uts/common/syscall/brandsys.c
+++ b/usr/src/uts/common/syscall/brandsys.c
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <sys/brand.h>
#include <sys/systm.h>
@@ -35,7 +37,7 @@
*/
int64_t
brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
- uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg4, uintptr_t arg5)
{
struct proc *p = curthread->t_procp;
int64_t rval = 0;
@@ -49,7 +51,7 @@ brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
return (set_errno(ENOSYS));
if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3,
- arg4, arg5, arg6)) != 0)
+ arg4, arg5)) != 0)
return (set_errno(err));
return (rval);
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 1ab3a8b65e..63c8b64ad0 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -21,6 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -116,13 +117,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
* MS_SYNC used to be defined to be zero but is now non-zero.
* For binary compatibility we still accept zero
* (the absence of MS_ASYNC) to mean the same thing.
+ * Binary compatibility is not an issue for MS_INVALCURPROC.
*/
iarg = (uintptr_t)arg;
if ((iarg & ~MS_INVALIDATE) == 0)
iarg |= MS_SYNC;
- if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
- ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
+ if (((iarg &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
+ ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
+ ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
+ (MS_INVALIDATE|MS_INVALCURPROC))) {
error = set_errno(EINVAL);
} else {
error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index 7f37529941..c33156a4fc 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -29,6 +29,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -525,13 +526,13 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
}
/*
- * If T_POLLWAKE is set, a pollwakeup() was performed on
+ * If PC_POLLWAKE is set, a pollwakeup() was performed on
* one of the file descriptors. This can happen only if
* one of the VOP_POLL() functions dropped pcp->pc_lock.
* The only current cases of this is in procfs (prpoll())
* and STREAMS (strpoll()).
*/
- if (pcp->pc_flag & T_POLLWAKE)
+ if (pcp->pc_flag & PC_POLLWAKE)
continue;
/*
@@ -886,9 +887,9 @@ retry:
}
/*
- * This function is called to inform a thread that
- * an event being polled for has occurred.
- * The pollstate lock on the thread should be held on entry.
+ * This function is called to inform a thread (or threads) that an event being
+ * polled on has occurred. The pollstate lock on the thread should be held
+ * on entry.
*/
void
pollnotify(pollcache_t *pcp, int fd)
@@ -896,8 +897,8 @@ pollnotify(pollcache_t *pcp, int fd)
ASSERT(fd < pcp->pc_mapsize);
ASSERT(MUTEX_HELD(&pcp->pc_lock));
BT_SET(pcp->pc_bitmap, fd);
- pcp->pc_flag |= T_POLLWAKE;
- cv_signal(&pcp->pc_cv);
+ pcp->pc_flag |= PC_POLLWAKE;
+ cv_broadcast(&pcp->pc_cv);
}
/*
@@ -2024,7 +2025,7 @@ retry:
*/
if ((pdp->pd_php != NULL) &&
(pollfdp[entry].events == pdp->pd_events) &&
- ((pcp->pc_flag & T_POLLWAKE) == 0)) {
+ ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
BT_CLEAR(pcp->pc_bitmap, fd);
}
/*
@@ -2251,7 +2252,7 @@ pollstate_destroy(pollstate_t *ps)
pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets);
ps->ps_pcacheset = NULL;
if (ps->ps_dpbuf != NULL) {
- kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize * sizeof (pollfd_t));
+ kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize);
ps->ps_dpbuf = NULL;
}
mutex_destroy(&ps->ps_lock);
diff --git a/usr/src/uts/common/syscall/rusagesys.c b/usr/src/uts/common/syscall/rusagesys.c
index 3e0e63f4c0..417c629168 100644
--- a/usr/src/uts/common/syscall/rusagesys.c
+++ b/usr/src/uts/common/syscall/rusagesys.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -257,6 +258,19 @@ rusagesys(int code, void *arg1, void *arg2, void *arg3, void *arg4)
case _RUSAGESYS_GETVMUSAGE:
return (vm_getusage((uint_t)(uintptr_t)arg1, (time_t)arg2,
(vmusage_t *)arg3, (size_t *)arg4, 0));
+ case _RUSAGESYS_INVALMAP:
+ /*
+ * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD
+ * handling so callers on SPARC should get simple sync
+ * handling with invalidation to all processes.
+ */
+#if defined(__sparc)
+ return (memcntl((caddr_t)arg2, (size_t)arg3, MC_SYNC,
+ (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0));
+#else
+ return (vm_map_inval((pid_t)(uintptr_t)arg1, (caddr_t)arg2,
+ (size_t)arg3));
+#endif
default:
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/syscall/sysconfig.c b/usr/src/uts/common/syscall/sysconfig.c
index 03f2fabe13..26ea859224 100644
--- a/usr/src/uts/common/syscall/sysconfig.c
+++ b/usr/src/uts/common/syscall/sysconfig.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -170,8 +171,8 @@ sysconfig(int which)
* even though rcapd can be used on the global zone too.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0)
- return (MIN(btop(curproc->p_zone->zone_phys_mcap),
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX)
+ return (MIN(btop(curproc->p_zone->zone_phys_mem_ctl),
physinstalled));
return (physinstalled);
@@ -179,26 +180,23 @@ sysconfig(int which)
case _CONFIG_AVPHYS_PAGES:
/*
* If the non-global zone has a phys. memory cap, use
- * the phys. memory cap - zone's current rss. We always
+ * the phys. memory cap - zone's rss. We always
* report the system-wide value for the global zone, even
- * though rcapd can be used on the global zone too.
+ * though memory capping can be used on the global zone too.
+ * We use the cached value for the RSS since vm_getusage()
+ * is so expensive and we don't need this value to be exact.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0) {
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX) {
pgcnt_t cap, rss, free;
- vmusage_t in_use;
- size_t cnt = 1;
- cap = btop(curproc->p_zone->zone_phys_mcap);
+ cap = btop(curproc->p_zone->zone_phys_mem_ctl);
if (cap > physinstalled)
return (freemem);
- if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt,
- FKIOCTL) != 0)
- in_use.vmu_rss_all = 0;
- rss = btop(in_use.vmu_rss_all);
+ rss = btop(curproc->p_zone->zone_phys_mem);
/*
- * Because rcapd implements a soft cap, it is possible
+ * Because this is a soft cap, it is possible
* for rss to be temporarily over the cap.
*/
if (cap > rss)
diff --git a/usr/src/uts/common/syscall/uadmin.c b/usr/src/uts/common/syscall/uadmin.c
index 2dda4001bf..68aa1a95f5 100644
--- a/usr/src/uts/common/syscall/uadmin.c
+++ b/usr/src/uts/common/syscall/uadmin.c
@@ -78,7 +78,7 @@ volatile int fastreboot_dryrun = 0;
* system with many zones.
*/
void
-killall(zoneid_t zoneid)
+killall(zoneid_t zoneid, boolean_t force)
{
proc_t *p;
@@ -108,7 +108,7 @@ killall(zoneid_t zoneid)
p->p_stat != SIDL &&
p->p_stat != SZOMB) {
mutex_enter(&p->p_lock);
- if (sigismember(&p->p_sig, SIGKILL)) {
+ if (!force && sigismember(&p->p_sig, SIGKILL)) {
mutex_exit(&p->p_lock);
p = p->p_next;
} else {
@@ -245,7 +245,7 @@ kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
*/
zone_shutdown_global();
- killall(ALL_ZONES);
+ killall(ALL_ZONES, B_FALSE);
/*
* If we are calling kadmin() from a kernel context then we
* do not release these resources.
diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h
index 1d91475e38..c908a9e16c 100644
--- a/usr/src/uts/common/vm/hat.h
+++ b/usr/src/uts/common/vm/hat.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -269,7 +270,12 @@ void hat_kpm_walk(void (*)(void *, void *, size_t), void *);
* call.
*
* int hat_pageunload(pp, forceflag)
- * unload all translations attached to pp.
+ * Unload all translations attached to pp. On x86 the bulk of the work is
+ * done by hat_page_inval.
+ *
+ * void hat_page_inval(pp, pgsz, curhat)
+ * Unload translations attached to pp. If curhat is provided, only the
+ * translation for that process is unloaded, otherwise all are unloaded.
*
* uint_t hat_pagesync(pp, flags)
* get hw stats from hardware into page struct and reset hw stats
@@ -291,6 +297,7 @@ void hat_page_setattr(struct page *, uint_t);
void hat_page_clrattr(struct page *, uint_t);
uint_t hat_page_getattr(struct page *, uint_t);
int hat_pageunload(struct page *, uint_t);
+void hat_page_inval(struct page *, uint_t, struct hat *);
uint_t hat_pagesync(struct page *, uint_t);
ulong_t hat_page_getshare(struct page *);
int hat_page_checkshare(struct page *, ulong_t);
@@ -460,6 +467,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t);
*/
#define HAT_ADV_PGUNLOAD 0x00
#define HAT_FORCE_PGUNLOAD 0x01
+#define HAT_CURPROC_PGUNLOAD 0x02
/*
* Attributes for hat_page_*attr, hat_setstats and
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 9426a125dc..d4c912e180 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -7254,7 +7255,8 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = svd->vpage;
offset = svd->offset + (uintptr_t)(addr - seg->s_base);
bflags = ((flags & MS_ASYNC) ? B_ASYNC : 0) |
- ((flags & MS_INVALIDATE) ? B_INVAL : 0);
+ ((flags & MS_INVALIDATE) ? B_INVAL : 0) |
+ ((flags & MS_INVALCURPROC) ? (B_INVALCURONLY | B_INVAL) : 0);
if (attr) {
pageprot = attr & ~(SHARED|PRIVATE);
@@ -7279,11 +7281,11 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = &svd->vpage[seg_page(seg, addr)];
} else if (svd->vp && svd->amp == NULL &&
- (flags & MS_INVALIDATE) == 0) {
+ (flags & (MS_INVALIDATE | MS_INVALCURPROC)) == 0) {
/*
- * No attributes, no anonymous pages and MS_INVALIDATE flag
- * is not on, just use one big request.
+ * No attributes, no anonymous pages and MS_INVAL* flags
+ * are not on, just use one big request.
*/
err = VOP_PUTPAGE(svd->vp, (offset_t)offset, len,
bflags, svd->cred, NULL);
@@ -7335,7 +7337,7 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
* might race in and lock the page after we unlock and before
* we do the PUTPAGE, then PUTPAGE simply does nothing.
*/
- if (flags & MS_INVALIDATE) {
+ if (flags & (MS_INVALIDATE | MS_INVALCURPROC)) {
if ((pp = page_lookup(vp, off, SE_SHARED)) != NULL) {
if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
page_unlock(pp);
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index fdf9f7790c..21ab9b237c 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -787,13 +788,21 @@ anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard)
pgcnt_t pswap_pages = 0;
proc_t *p = curproc;
- if (zone != NULL && takemem) {
+ if (zone != NULL) {
/* test zone.max-swap resource control */
mutex_enter(&p->p_lock);
if (rctl_incr_swap(p, zone, ptob(npages)) != 0) {
mutex_exit(&p->p_lock);
+
+ if (takemem)
+ atomic_add_64(&zone->zone_anon_alloc_fail, 1);
+
return (0);
}
+
+ if (!takemem)
+ rctl_decr_swap(zone, ptob(npages));
+
mutex_exit(&p->p_lock);
}
mutex_enter(&anoninfo_lock);
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index 01ad32e0b1..8caa257486 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -56,6 +57,7 @@
#include <sys/debug.h>
#include <sys/tnf_probe.h>
#include <sys/vtrace.h>
+#include <sys/ddi.h>
#include <vm/hat.h>
#include <vm/xhat.h>
@@ -879,6 +881,7 @@ as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
struct seg *segsav;
int as_lock_held;
klwp_t *lwp = ttolwp(curthread);
+ zone_t *zonep = curzone;
int is_xhat = 0;
int holding_wpage = 0;
extern struct seg_ops segdev_ops;
@@ -928,6 +931,23 @@ retry:
if (as == &kas)
CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
CPU_STATS_EXIT_K();
+ if (zonep->zone_pg_flt_delay != 0) {
+ /*
+ * The zone in which this process is running
+ * is currently over it's physical memory cap.
+ * Throttle page faults to help the user-land
+ * memory capper catch up. Note that
+ * drv_usectohz() rounds up.
+ */
+ atomic_add_64(&zonep->zone_pf_throttle, 1);
+ atomic_add_64(&zonep->zone_pf_throttle_usec,
+ zonep->zone_pg_flt_delay);
+ if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1))
+ drv_usecwait(zonep->zone_pg_flt_delay);
+ else
+ delay(drv_usectohz(
+ zonep->zone_pg_flt_delay));
+ }
break;
}
}
diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c
index 7233581227..39ace0b3c2 100644
--- a/usr/src/uts/common/vm/vm_pvn.c
+++ b/usr/src/uts/common/vm/vm_pvn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -431,7 +432,14 @@ pvn_write_done(page_t *plist, int flags)
page_io_unlock(pp);
page_unlock(pp);
}
- } else if (flags & B_INVAL) {
+ } else if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
+ /*
+ * If B_INVALCURONLY is set, then we handle that case
+ * in the next conditional if hat_page_is_mapped()
+ * indicates that there are no additional mappings
+ * to the page.
+ */
+
/*
* XXX - Failed writes with B_INVAL set are
* not handled appropriately.
@@ -572,8 +580,9 @@ pvn_write_done(page_t *plist, int flags)
}
/*
- * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
- * B_TRUNC, B_FORCE}. B_DELWRI indicates that this page is part of a kluster
+ * Flags are composed of {B_ASYNC, B_INVAL, B_INVALCURONLY, B_FREE,
+ * B_DONTNEED, B_DELWRI, B_TRUNC, B_FORCE}.
+ * B_DELWRI indicates that this page is part of a kluster
* operation and is only to be considered if it doesn't involve any
* waiting here. B_TRUNC indicates that the file is being truncated
* and so no i/o needs to be done. B_FORCE indicates that the page
@@ -627,13 +636,17 @@ pvn_getdirty(page_t *pp, int flags)
* If we want to free or invalidate the page then
* we need to unload it so that anyone who wants
* it will have to take a minor fault to get it.
+ * If we are only invalidating the page for the
+ * current process, then pass in a different flag.
* Otherwise, we're just writing the page back so we
* need to sync up the hardwre and software mod bit to
* detect any future modifications. We clear the
* software mod bit when we put the page on the dirty
* list.
*/
- if (flags & (B_INVAL | B_FREE)) {
+ if (flags & B_INVALCURONLY) {
+ (void) hat_pageunload(pp, HAT_CURPROC_PGUNLOAD);
+ } else if (flags & (B_INVAL | B_FREE)) {
(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
} else {
(void) hat_pagesync(pp, HAT_SYNC_ZERORM);
@@ -645,7 +658,7 @@ pvn_getdirty(page_t *pp, int flags)
* list after all.
*/
page_io_unlock(pp);
- if (flags & B_INVAL) {
+ if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
/*LINTED: constant in conditional context*/
VN_DISPOSE(pp, B_INVAL, 0, kcred);
} else if (flags & B_FREE) {
@@ -657,6 +670,9 @@ pvn_getdirty(page_t *pp, int flags)
* of VOP_PUTPAGE() who prefer freeing the
* page _only_ if no one else is accessing it.
* E.g. segmap_release()
+ * We also take this path for B_INVALCURONLY and
+ * let page_release call VN_DISPOSE if no one else is
+ * using the page.
*
* The above hat_ismod() check is useless because:
* (1) we may not be holding SE_EXCL lock;
@@ -681,7 +697,7 @@ pvn_getdirty(page_t *pp, int flags)
* We'll detect the fact that they used it when the
* i/o is done and avoid freeing the page.
*/
- if (flags & B_FREE)
+ if (flags & (B_FREE | B_INVALCURONLY))
page_downgrade(pp);
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
index d422f8d0e8..33c5383f68 100644
--- a/usr/src/uts/common/vm/vm_usage.c
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -25,6 +25,10 @@
*/
/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
* vm_usage
*
* This file implements the getvmusage() private system call.
@@ -114,7 +118,7 @@
* For accurate counting of map-shared and COW-shared pages.
*
* - visited private anons (refcnt > 1) for each collective.
- * (entity->vme_anon_hash)
+ * (entity->vme_anon)
* For accurate counting of COW-shared pages.
*
* The common accounting structure is the vmu_entity_t, which represents
@@ -152,6 +156,7 @@
#include <sys/vm_usage.h>
#include <sys/zone.h>
#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
#include <sys/avl.h>
#include <vm/anon.h>
#include <vm/as.h>
@@ -199,6 +204,14 @@ typedef struct vmu_object {
} vmu_object_t;
/*
+ * Node for tree of visited COW anons.
+ */
+typedef struct vmu_anon {
+ avl_node_t vma_node;
+ uintptr_t vma_addr;
+} vmu_anon_t;
+
+/*
* Entity by which to count results.
*
* The entity structure keeps the current rss/swap counts for each entity
@@ -221,7 +234,7 @@ typedef struct vmu_entity {
struct vmu_entity *vme_next_calc;
mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */
mod_hash_t *vme_amp_hash; /* shared amps visited for entity */
- mod_hash_t *vme_anon_hash; /* COW anons visited for entity */
+ avl_tree_t vme_anon; /* COW anons visited for entity */
vmusage_t vme_result; /* identifies entity and results */
} vmu_entity_t;
@@ -324,6 +337,23 @@ bounds_cmp(const void *bnd1, const void *bnd2)
}
/*
+ * Comparison routine for our AVL tree of anon structures.
+ */
+static int
+vmu_anon_cmp(const void *lhs, const void *rhs)
+{
+ const vmu_anon_t *l = lhs, *r = rhs;
+
+ if (l->vma_addr == r->vma_addr)
+ return (0);
+
+ if (l->vma_addr < r->vma_addr)
+ return (-1);
+
+ return (1);
+}
+
+/*
* Save a bound on the free list.
*/
static void
@@ -363,13 +393,18 @@ static void
vmu_free_entity(mod_hash_val_t val)
{
vmu_entity_t *entity = (vmu_entity_t *)val;
+ vmu_anon_t *anon;
+ void *cookie = NULL;
if (entity->vme_vnode_hash != NULL)
i_mod_hash_clear_nosync(entity->vme_vnode_hash);
if (entity->vme_amp_hash != NULL)
i_mod_hash_clear_nosync(entity->vme_amp_hash);
- if (entity->vme_anon_hash != NULL)
- i_mod_hash_clear_nosync(entity->vme_anon_hash);
+
+ while ((anon = avl_destroy_nodes(&entity->vme_anon, &cookie)) != NULL)
+ kmem_free(anon, sizeof (vmu_anon_t));
+
+ avl_destroy(&entity->vme_anon);
entity->vme_next = vmu_data.vmu_free_entities;
vmu_data.vmu_free_entities = entity;
@@ -485,10 +520,10 @@ vmu_alloc_entity(id_t id, int type, id_t zoneid)
"vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
sizeof (struct anon_map));
- if (entity->vme_anon_hash == NULL)
- entity->vme_anon_hash = mod_hash_create_ptrhash(
- "vmusage anon hash", VMUSAGE_HASH_SIZE,
- mod_hash_null_valdtor, sizeof (struct anon));
+ VERIFY(avl_first(&entity->vme_anon) == NULL);
+
+ avl_create(&entity->vme_anon, vmu_anon_cmp, sizeof (struct vmu_anon),
+ offsetof(struct vmu_anon, vma_node));
entity->vme_next = vmu_data.vmu_entities;
vmu_data.vmu_entities = entity;
@@ -518,7 +553,8 @@ vmu_alloc_zone(id_t id)
zone->vmz_id = id;
- if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
+ if ((vmu_data.vmu_calc_flags &
+ (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE)) != 0)
zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
@@ -613,21 +649,19 @@ vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
}
static int
-vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
+vmu_find_insert_anon(vmu_entity_t *entity, void *key)
{
- int ret;
- caddr_t val;
+ vmu_anon_t anon, *ap;
- ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
- (mod_hash_val_t *)&val);
+ anon.vma_addr = (uintptr_t)key;
- if (ret == 0)
+ if (avl_find(&entity->vme_anon, &anon, NULL) != NULL)
return (0);
- ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
- (mod_hash_val_t)key, (mod_hash_hndl_t)0);
+ ap = kmem_alloc(sizeof (vmu_anon_t), KM_SLEEP);
+ ap->vma_addr = (uintptr_t)key;
- ASSERT(ret == 0);
+ avl_add(&entity->vme_anon, ap);
return (1);
}
@@ -918,6 +952,8 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
next = AVL_NEXT(tree, next);
continue;
}
+
+ ASSERT(next->vmb_type == VMUSAGE_BOUND_UNKNOWN);
bound_type = next->vmb_type;
index = next->vmb_start;
while (index <= next->vmb_end) {
@@ -937,7 +973,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
(page = page_exists(vn, off)) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -947,8 +986,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
} else {
page_type = VMUSAGE_BOUND_NOT_INCORE;
}
+
if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
next->vmb_type = page_type;
+ bound_type = page_type;
} else if (next->vmb_type != page_type) {
/*
* If current bound type does not match page
@@ -1009,6 +1050,7 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
continue;
}
+ ASSERT(next->vmb_type == VMUSAGE_BOUND_UNKNOWN);
bound_type = next->vmb_type;
index = next->vmb_start;
while (index <= next->vmb_end) {
@@ -1024,7 +1066,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
if (vnode->v_pages != NULL &&
(page = page_exists(vnode, ptob(index))) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -1034,8 +1079,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
} else {
page_type = VMUSAGE_BOUND_NOT_INCORE;
}
+
if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
next->vmb_type = page_type;
+ bound_type = page_type;
} else if (next->vmb_type != page_type) {
/*
* If current bound type does not match page
@@ -1304,6 +1351,12 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
}
/*
+ * Pages on the free list aren't counted for the rss.
+ */
+ if (PP_ISFREE(page))
+ continue;
+
+ /*
* Assume anon structs with a refcnt
* of 1 are not COW shared, so there
* is no reason to track them per entity.
@@ -1320,8 +1373,7 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
* Track COW anons per entity so
* they are not double counted.
*/
- if (vmu_find_insert_anon(entity->vme_anon_hash,
- (caddr_t)ap) == 0)
+ if (vmu_find_insert_anon(entity, ap) == 0)
continue;
result->vmu_rss_all += (pgcnt << PAGESHIFT);
@@ -1461,8 +1513,9 @@ vmu_calculate_proc(proc_t *p)
entities = tmp;
}
if (vmu_data.vmu_calc_flags &
- (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
- VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
+ (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE |
+ VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
VMUSAGE_ALL_EUSERS)) {
ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
@@ -1594,8 +1647,7 @@ vmu_free_extra()
mod_hash_destroy_hash(te->vme_vnode_hash);
if (te->vme_amp_hash != NULL)
mod_hash_destroy_hash(te->vme_amp_hash);
- if (te->vme_anon_hash != NULL)
- mod_hash_destroy_hash(te->vme_anon_hash);
+ VERIFY(avl_first(&te->vme_anon) == NULL);
kmem_free(te, sizeof (vmu_entity_t));
}
while (vmu_data.vmu_free_zones != NULL) {
@@ -1739,12 +1791,34 @@ vmu_cache_rele(vmu_cache_t *cache)
}
/*
+ * When new data is calculated, update the phys_mem rctl usage value in the
+ * zones.
+ */
+static void
+vmu_update_zone_rctls(vmu_cache_t *cache)
+{
+ vmusage_t *rp;
+ size_t i = 0;
+ zone_t *zp;
+
+ for (rp = cache->vmc_results; i < cache->vmc_nresults; rp++, i++) {
+ if (rp->vmu_type == VMUSAGE_ZONE &&
+ rp->vmu_zoneid != ALL_ZONES) {
+ if ((zp = zone_find_by_id(rp->vmu_zoneid)) != NULL) {
+ zp->zone_phys_mem = rp->vmu_rss_all;
+ zone_rele(zp);
+ }
+ }
+ }
+}
+
+/*
* Copy out the cached results to a caller. Inspect the callers flags
* and zone to determine which cached results should be copied.
*/
static int
vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
- uint_t flags, int cpflg)
+ uint_t flags, id_t req_zone_id, int cpflg)
{
vmusage_t *result, *out_result;
vmusage_t dummy;
@@ -1763,7 +1837,7 @@ vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
/* figure out what results the caller is interested in. */
if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
types |= VMUSAGE_SYSTEM;
- if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
+ if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE))
types |= VMUSAGE_ZONE;
if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
VMUSAGE_COL_PROJECTS))
@@ -1826,26 +1900,33 @@ vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
continue;
}
- /* Skip "other zone" results if not requested */
- if (result->vmu_zoneid != curproc->p_zone->zone_id) {
- if (result->vmu_type == VMUSAGE_ZONE &&
- (flags & VMUSAGE_ALL_ZONES) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_PROJECTS &&
- (flags & (VMUSAGE_ALL_PROJECTS |
- VMUSAGE_COL_PROJECTS)) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_TASKS &&
- (flags & VMUSAGE_ALL_TASKS) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_RUSERS &&
- (flags & (VMUSAGE_ALL_RUSERS |
- VMUSAGE_COL_RUSERS)) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_EUSERS &&
- (flags & (VMUSAGE_ALL_EUSERS |
- VMUSAGE_COL_EUSERS)) == 0)
+ if (result->vmu_type == VMUSAGE_ZONE &&
+ flags & VMUSAGE_A_ZONE) {
+ /* Skip non-requested zone results */
+ if (result->vmu_zoneid != req_zone_id)
continue;
+ } else {
+ /* Skip "other zone" results if not requested */
+ if (result->vmu_zoneid != curproc->p_zone->zone_id) {
+ if (result->vmu_type == VMUSAGE_ZONE &&
+ (flags & VMUSAGE_ALL_ZONES) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_PROJECTS &&
+ (flags & (VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_COL_PROJECTS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_TASKS &&
+ (flags & VMUSAGE_ALL_TASKS) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_RUSERS &&
+ (flags & (VMUSAGE_ALL_RUSERS |
+ VMUSAGE_COL_RUSERS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_EUSERS &&
+ (flags & (VMUSAGE_ALL_EUSERS |
+ VMUSAGE_COL_EUSERS)) == 0)
+ continue;
+ }
}
count++;
if (out_result != NULL) {
@@ -1901,10 +1982,12 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
int cacherecent = 0;
hrtime_t now;
uint_t flags_orig;
+ id_t req_zone_id;
/*
* Non-global zones cannot request system wide and/or collated
- * results, or the system result, so munge the flags accordingly.
+ * results, or the system result, or usage of another zone, so munge
+ * the flags accordingly.
*/
flags_orig = flags;
if (curproc->p_zone != global_zone) {
@@ -1924,6 +2007,10 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
flags &= ~VMUSAGE_SYSTEM;
flags |= VMUSAGE_ZONE;
}
+ if (flags & VMUSAGE_A_ZONE) {
+ flags &= ~VMUSAGE_A_ZONE;
+ flags |= VMUSAGE_ZONE;
+ }
}
/* Check for unknown flags */
@@ -1934,6 +2021,21 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
if ((flags & VMUSAGE_MASK) == 0)
return (set_errno(EINVAL));
+ /* If requesting results for a specific zone, get the zone ID */
+ if (flags & VMUSAGE_A_ZONE) {
+ size_t bufsize;
+ vmusage_t zreq;
+
+ if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg))
+ return (set_errno(EFAULT));
+ /* Requested zone ID is passed in buf, so 0 len not allowed */
+ if (bufsize == 0)
+ return (set_errno(EINVAL));
+ if (ddi_copyin((caddr_t)buf, &zreq, sizeof (vmusage_t), cpflg))
+ return (set_errno(EFAULT));
+ req_zone_id = zreq.vmu_id;
+ }
+
mutex_enter(&vmu_data.vmu_lock);
now = gethrtime();
@@ -1953,7 +2055,7 @@ start:
mutex_exit(&vmu_data.vmu_lock);
ret = vmu_copyout_results(cache, buf, nres, flags_orig,
- cpflg);
+ req_zone_id, cpflg);
mutex_enter(&vmu_data.vmu_lock);
vmu_cache_rele(cache);
if (vmu_data.vmu_pending_waiters > 0)
@@ -2009,8 +2111,11 @@ start:
mutex_exit(&vmu_data.vmu_lock);
+ /* update zone's phys. mem. rctl usage */
+ vmu_update_zone_rctls(cache);
/* copy cache */
- ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg);
+ ret = vmu_copyout_results(cache, buf, nres, flags_orig,
+ req_zone_id, cpflg);
mutex_enter(&vmu_data.vmu_lock);
vmu_cache_rele(cache);
mutex_exit(&vmu_data.vmu_lock);
@@ -2030,3 +2135,185 @@ start:
vmu_data.vmu_pending_waiters--;
goto start;
}
+
+#if defined(__x86)
+/*
+ * Attempt to invalidate all of the pages in the mapping for the given process.
+ */
+static void
+map_inval(proc_t *p, struct seg *seg, caddr_t addr, size_t size)
+{
+ page_t *pp;
+ size_t psize;
+ u_offset_t off;
+ caddr_t eaddr;
+ struct vnode *vp;
+ struct segvn_data *svd;
+ struct hat *victim_hat;
+
+ ASSERT((addr + size) <= (seg->s_base + seg->s_size));
+
+ victim_hat = p->p_as->a_hat;
+ svd = (struct segvn_data *)seg->s_data;
+ vp = svd->vp;
+ psize = page_get_pagesize(seg->s_szc);
+
+ off = svd->offset + (uintptr_t)(addr - seg->s_base);
+
+ for (eaddr = addr + size; addr < eaddr; addr += psize, off += psize) {
+ pp = page_lookup_nowait(vp, off, SE_SHARED);
+
+ if (pp != NULL) {
+ /* following logic based on pvn_getdirty() */
+
+ if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
+ page_unlock(pp);
+ continue;
+ }
+
+ page_io_lock(pp);
+ hat_page_inval(pp, 0, victim_hat);
+ page_io_unlock(pp);
+
+ /*
+ * For B_INVALCURONLY-style handling we let
+ * page_release call VN_DISPOSE if no one else is using
+ * the page.
+ *
+ * A hat_ismod() check would be useless because:
+ * (1) we are not be holding SE_EXCL lock
+ * (2) we've not unloaded _all_ translations
+ *
+ * Let page_release() do the heavy-lifting.
+ */
+ (void) page_release(pp, 1);
+ }
+ }
+}
+
+/*
+ * vm_map_inval()
+ *
+ * Invalidate as many pages as possible within the given mapping for the given
+ * process. addr is expected to be the base address of the mapping and size is
+ * the length of the mapping. In some cases a mapping will encompass an
+ * entire segment, but at least for anon or stack mappings, these will be
+ * regions within a single large segment. Thus, the invalidation is oriented
+ * around a single mapping and not an entire segment.
+ *
+ * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD-style handling so
+ * this code is only applicable to x86.
+ */
+int
+vm_map_inval(pid_t pid, caddr_t addr, size_t size)
+{
+ int ret;
+ int error = 0;
+ proc_t *p; /* target proc */
+ struct as *as; /* target proc's address space */
+ struct seg *seg; /* working segment */
+
+ if (curproc->p_zone != global_zone || crgetruid(curproc->p_cred) != 0)
+ return (set_errno(EPERM));
+
+ /* If not a valid mapping address, return an error */
+ if ((caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK) != addr)
+ return (set_errno(EINVAL));
+
+again:
+ mutex_enter(&pidlock);
+ p = prfind(pid);
+ if (p == NULL) {
+ mutex_exit(&pidlock);
+ return (set_errno(ESRCH));
+ }
+
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if (panicstr != NULL) {
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ as = p->p_as;
+
+ /*
+ * Try to set P_PR_LOCK - prevents process "changing shape"
+ * - blocks fork
+ * - blocks sigkill
+ * - cannot be a system proc
+ * - must be fully created proc
+ */
+ ret = sprtrylock_proc(p);
+ if (ret == -1) {
+ /* Process in invalid state */
+ mutex_exit(&p->p_lock);
+ return (set_errno(ESRCH));
+ }
+
+ if (ret == 1) {
+ /*
+ * P_PR_LOCK is already set. Wait and try again. This also
+ * drops p_lock so p may no longer be valid since the proc may
+ * have exited.
+ */
+ sprwaitlock_proc(p);
+ goto again;
+ }
+
+ /* P_PR_LOCK is now set */
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ if ((seg = as_segat(as, addr)) == NULL) {
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (set_errno(ENOMEM));
+ }
+
+ /*
+ * The invalidation behavior only makes sense for vnode-backed segments.
+ */
+ if (seg->s_ops != &segvn_ops) {
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (0);
+ }
+
+ /*
+ * If the mapping is out of bounds of the segement return an error.
+ */
+ if ((addr + size) > (seg->s_base + seg->s_size)) {
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * Don't use MS_INVALCURPROC flag here since that would eventually
+ * initiate hat invalidation based on curthread. Since we're doing this
+ * on behalf of a different process, that would erroneously invalidate
+ * our own process mappings.
+ */
+ error = SEGOP_SYNC(seg, addr, size, 0, (uint_t)MS_ASYNC);
+ if (error == 0) {
+ /*
+ * Since we didn't invalidate during the sync above, we now
+ * try to invalidate all of the pages in the mapping.
+ */
+ map_inval(p, seg, addr, size);
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+
+ if (error)
+ (void) set_errno(error);
+ return (error);
+}
+#endif
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index 84158a06bd..a0207714f7 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -220,7 +220,7 @@ DBOOT_DEFS = -D_BOOT $(DBOOT_MACH_$(CLASS))
DBOOT_DEFS += -D_MACHDEP -D_KMEMUSER -U_KERNEL -D_I32LPx
DBOOT_FLAGS = $(CCVERBOSE) $(CERRWARN) $(CCNOAUTOINLINE)
-DBOOT_CC_INCL = -I$(SRC)/common $(INCLUDE_PATH)
+DBOOT_CC_INCL = -I$(SRC)/common -I$(SRC)/common/util $(INCLUDE_PATH)
DBOOT_AS_INCL = $(AS_INC_PATH)
DBOOT_AS = $(ONBLD_TOOLS)/bin/$(MACH)/aw
diff --git a/usr/src/uts/i86pc/dboot/dboot_startkern.c b/usr/src/uts/i86pc/dboot/dboot_startkern.c
index f5f8f95682..6abb7c6349 100644
--- a/usr/src/uts/i86pc/dboot/dboot_startkern.c
+++ b/usr/src/uts/i86pc/dboot/dboot_startkern.c
@@ -23,7 +23,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
@@ -34,6 +34,8 @@
#include <sys/mach_mmu.h>
#include <sys/multiboot.h>
#include <sys/sha1.h>
+#include <util/string.h>
+#include <util/strtolctype.h>
#if defined(__xpv)
@@ -61,6 +63,15 @@ extern int have_cpuid(void);
#define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
/*
+ * Region of memory that may be corrupted by external actors. This can go away
+ * once the firmware bug RICHMOND-16 is fixed and all systems with the bug are
+ * upgraded.
+ */
+#define CORRUPT_REGION_START 0xc700000
+#define CORRUPT_REGION_SIZE 0x100000
+#define CORRUPT_REGION_END (CORRUPT_REGION_START + CORRUPT_REGION_SIZE)
+
+/*
* This file contains code that runs to transition us from either a multiboot
* compliant loader (32 bit non-paging) or a XPV domain loader to
* regular kernel execution. Its task is to setup the kernel memory image
@@ -162,8 +173,12 @@ uint_t pcimemlists_used = 0;
struct boot_memlist rsvdmemlists[MAX_MEMLIST];
uint_t rsvdmemlists_used = 0;
-#define MAX_MODULES (10)
-struct boot_modules modules[MAX_MODULES];
+/*
+ * This should match what's in the bootloader. It's arbitrary, but GRUB
+ * in particular has limitations on how much space it can use before it
+ * stops working properly. This should be enough.
+ */
+struct boot_modules modules[MAX_BOOT_MODULES];
uint_t modules_used = 0;
/*
@@ -172,6 +187,8 @@ uint_t modules_used = 0;
uint_t prom_debug = 0;
uint_t map_debug = 0;
+static char noname[2] = "-";
+
/*
* Either hypervisor-specific or grub-specific code builds the initial
* memlists. This code does the sort/merge/link for final use.
@@ -805,13 +822,20 @@ digest_a2h(const char *ascii, uint8_t *digest)
* 4 GB, which should not be a problem.
*/
static int
-check_image_hash(const char *ascii, const void *image, size_t len)
+check_image_hash(uint_t midx)
{
+ const char *ascii;
+ const void *image;
+ size_t len;
SHA1_CTX ctx;
uint8_t digest[SHA1_DIGEST_LENGTH];
uint8_t baseline[SHA1_DIGEST_LENGTH];
unsigned int i;
+ ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
+ image = (const void *)(uintptr_t)modules[midx].bm_addr;
+ len = (size_t)modules[midx].bm_size;
+
digest_a2h(ascii, baseline);
SHA1Init(&ctx);
@@ -826,16 +850,80 @@ check_image_hash(const char *ascii, const void *image, size_t len)
return (0);
}
+static const char *
+type_to_str(boot_module_type_t type)
+{
+ switch (type) {
+ case BMT_ROOTFS:
+ return ("rootfs");
+ case BMT_FILE:
+ return ("file");
+ case BMT_HASH:
+ return ("hash");
+ default:
+ return ("unknown");
+ }
+}
+
static void
check_images(void)
{
- int i;
- char *hashes;
- mb_module_t *mod, *hashmod;
- char *hash;
+ uint_t i;
char displayhash[SHA1_ASCII_LENGTH + 1];
- size_t hashlen;
- size_t len;
+
+ for (i = 0; i < modules_used; i++) {
+ if (prom_debug) {
+ dboot_printf("module #%d: name %s type %s "
+ "addr %lx size %lx\n",
+ i, (char *)(uintptr_t)modules[i].bm_name,
+ type_to_str(modules[i].bm_type),
+ (ulong_t)modules[i].bm_addr,
+ (ulong_t)modules[i].bm_size);
+ }
+
+ if (modules[i].bm_type == BMT_HASH ||
+ modules[i].bm_hash == NULL) {
+ DBG_MSG("module has no hash; skipping check\n");
+ continue;
+ }
+ (void) memcpy(displayhash,
+ (void *)(uintptr_t)modules[i].bm_hash,
+ SHA1_ASCII_LENGTH);
+ displayhash[SHA1_ASCII_LENGTH] = '\0';
+ if (prom_debug) {
+ dboot_printf("checking expected hash [%s]: ",
+ displayhash);
+ }
+
+ if (check_image_hash(i) != 0)
+ dboot_panic("hash mismatch!\n");
+ else
+ DBG_MSG("OK\n");
+ }
+}
+
+/*
+ * Determine the module's starting address, size, name, and type, and fill the
+ * boot_modules structure. This structure is used by the bop code, except for
+ * hashes which are checked prior to transferring control to the kernel.
+ */
+static void
+process_module(mb_module_t *mod)
+{
+ int midx = modules_used++;
+ char *p, *q;
+
+ if (prom_debug) {
+ dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
+ midx, (char *)(mod->mod_name),
+ (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
+ }
+
+ if (mod->mod_start > mod->mod_end) {
+ dboot_panic("module #%d: module start address 0x%lx greater "
+ "than end address 0x%lx", midx,
+ (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
+ }
/*
* A brief note on lengths and sizes: GRUB, for reasons unknown, passes
@@ -851,47 +939,128 @@ check_images(void)
* we'll just cope with the bug. That means we won't actually hash the
* byte at mod_end, and we will expect that mod_end for the hash file
* itself is one greater than some multiple of 41 (40 bytes of ASCII
- * hash plus a newline for each module).
+ * hash plus a newline for each module). We set bm_size to the true
+ * correct number of bytes in each module, achieving exactly this.
*/
- if (mb_info->mods_count > 1) {
- mod = (mb_module_t *)mb_info->mods_addr;
- hashmod = mod + (mb_info->mods_count - 1);
- hashes = (char *)hashmod->mod_start;
- hashlen = (size_t)(hashmod->mod_end - hashmod->mod_start);
- hash = hashes;
- if (prom_debug) {
- dboot_printf("Hash module found at %lx size %lx\n",
- (ulong_t)hashes, (ulong_t)hashlen);
- }
- } else {
- DBG_MSG("Skipping hash check; no hash module found.\n");
+ modules[midx].bm_addr = mod->mod_start;
+ modules[midx].bm_size = mod->mod_end - mod->mod_start;
+ modules[midx].bm_name = mod->mod_name;
+ modules[midx].bm_hash = NULL;
+ modules[midx].bm_type = BMT_FILE;
+
+ if (mod->mod_name == NULL) {
+ modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
return;
}
- for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0;
- i < mb_info->mods_count - 1; ++mod, ++i) {
- if ((hash - hashes) + SHA1_ASCII_LENGTH + 1 > hashlen) {
- dboot_printf("Short hash module of length 0x%lx bytes; "
- "skipping hash checks\n", (ulong_t)hashlen);
- break;
+ p = (char *)(uintptr_t)mod->mod_name;
+ modules[midx].bm_name =
+ (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
+
+ while (p != NULL) {
+ q = strsep(&p, " \t\f\n\r");
+ if (strncmp(q, "name=", 5) == 0) {
+ if (q[5] != '\0' && !isspace(q[5])) {
+ modules[midx].bm_name =
+ (native_ptr_t)(uintptr_t)(q + 5);
+ }
+ continue;
}
- (void) memcpy(displayhash, hash, SHA1_ASCII_LENGTH);
- displayhash[SHA1_ASCII_LENGTH] = '\0';
- if (prom_debug) {
- dboot_printf("Checking hash for module %d [%s]: ",
- i, displayhash);
+ if (strncmp(q, "type=", 5) == 0) {
+ if (q[5] == '\0' || isspace(q[5]))
+ continue;
+ q += 5;
+ if (strcmp(q, "rootfs") == 0) {
+ modules[midx].bm_type = BMT_ROOTFS;
+ } else if (strcmp(q, "hash") == 0) {
+ modules[midx].bm_type = BMT_HASH;
+ } else if (strcmp(q, "file") != 0) {
+ dboot_printf("\tmodule #%d: unknown module "
+ "type '%s'; defaulting to 'file'",
+ midx, q);
+ }
+ continue;
}
- len = mod->mod_end - mod->mod_start; /* see above */
- if (check_image_hash(hash, (void *)mod->mod_start, len) != 0) {
- dboot_panic("SHA-1 hash mismatch on %s; expected %s\n",
- (char *)mod->mod_name, displayhash);
- } else {
- DBG_MSG("OK\n");
+ if (strncmp(q, "hash=", 5) == 0) {
+ if (q[5] != '\0' && !isspace(q[5])) {
+ modules[midx].bm_hash =
+ (native_ptr_t)(uintptr_t)(q + 5);
+ }
+ continue;
+ }
+
+ dboot_printf("ignoring unknown option '%s'\n", q);
+ }
+}
+
+/*
+ * Backward compatibility: if there are exactly one or two modules, both
+ * of type 'file' and neither with an embedded hash value, we have been
+ * given the legacy style modules. In this case we need to treat the first
+ * module as a rootfs and the second as a hash referencing that module.
+ * Otherwise, even if the configuration is invalid, we assume that the
+ * operator knows what he's doing or at least isn't being bitten by this
+ * interface change.
+ */
+static void
+fixup_modules(void)
+{
+ if (modules_used == 0 || modules_used > 2)
+ return;
+
+ if (modules[0].bm_type != BMT_FILE ||
+ modules_used > 1 && modules[1].bm_type != BMT_FILE) {
+ return;
+ }
+
+ if (modules[0].bm_hash != NULL ||
+ modules_used > 1 && modules[1].bm_hash != NULL) {
+ return;
+ }
+
+ modules[0].bm_type = BMT_ROOTFS;
+ if (modules_used > 1) {
+ modules[1].bm_type = BMT_HASH;
+ modules[1].bm_name = modules[0].bm_name;
+ }
+}
+
+/*
+ * For modules that do not have assigned hashes but have a separate hash module,
+ * find the assigned hash module and set the primary module's bm_hash to point
+ * to the hash data from that module. We will then ignore modules of type
+ * BMT_HASH from this point forward.
+ */
+static void
+assign_module_hashes(void)
+{
+ uint_t i, j;
+
+ for (i = 0; i < modules_used; i++) {
+ if (modules[i].bm_type == BMT_HASH ||
+ modules[i].bm_hash != NULL) {
+ continue;
+ }
+
+ for (j = 0; j < modules_used; j++) {
+ if (modules[j].bm_type != BMT_HASH ||
+ strcmp((char *)(uintptr_t)modules[j].bm_name,
+ (char *)(uintptr_t)modules[i].bm_name) != 0) {
+ continue;
+ }
+
+ if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
+ dboot_printf("Short hash module of length "
+ "0x%lx bytes; ignoring\n",
+ (ulong_t)modules[j].bm_size);
+ } else {
+ modules[i].bm_hash = modules[j].bm_addr;
+ }
+ break;
}
- hash += SHA1_ASCII_LENGTH + 1;
}
}
@@ -927,9 +1096,9 @@ init_mem_alloc(void)
DBG_MSG("Entered init_mem_alloc()\n");
DBG((uintptr_t)mb_info);
- if (mb_info->mods_count > MAX_MODULES) {
+ if (mb_info->mods_count > MAX_BOOT_MODULES) {
dboot_panic("Too many modules (%d) -- the maximum is %d.",
- mb_info->mods_count, MAX_MODULES);
+ mb_info->mods_count, MAX_BOOT_MODULES);
}
/*
* search the modules to find the last used address
@@ -940,18 +1109,7 @@ init_mem_alloc(void)
for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0;
i < mb_info->mods_count;
++mod, ++i) {
- if (prom_debug) {
- dboot_printf("\tmodule #%d: %s at: 0x%lx, end 0x%lx\n",
- i, (char *)(mod->mod_name),
- (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
- }
- modules[i].bm_addr = mod->mod_start;
- if (mod->mod_start > mod->mod_end) {
- dboot_panic("module[%d]: Invalid module start address "
- "(0x%llx)", i, (uint64_t)mod->mod_start);
- }
- modules[i].bm_size = mod->mod_end - mod->mod_start;
-
+ process_module(mod);
check_higher(mod->mod_end);
}
bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
@@ -959,6 +1117,8 @@ init_mem_alloc(void)
bi->bi_module_cnt = mb_info->mods_count;
DBG(bi->bi_module_cnt);
+ fixup_modules();
+ assign_module_hashes();
check_images();
/*
@@ -1004,6 +1164,38 @@ init_mem_alloc(void)
case 1:
if (end > max_mem)
max_mem = end;
+
+ /*
+ * Well, this is sad. One some systems, there
+ * is a region of memory that can be corrupted
+ * until some number of seconds after we have
+ * booted. And the BIOS doesn't tell us that
+ * this memory is unsafe to use. And we don't
+ * know how long it's dangerous. So we'll
+ * chop out this range from any memory list
+ * that would otherwise be usable. Note that
+ * any system of this type will give us the
+ * new-style (0x40) memlist, so we need not
+ * fix up the other path below.
+ */
+ if (start < CORRUPT_REGION_START &&
+ end > CORRUPT_REGION_START) {
+ memlists[memlists_used].addr = start;
+ memlists[memlists_used].size =
+ CORRUPT_REGION_START - start;
+ ++memlists_used;
+ if (end > CORRUPT_REGION_END)
+ start = CORRUPT_REGION_END;
+ else
+ continue;
+ }
+ if (start >= CORRUPT_REGION_START &&
+ start < CORRUPT_REGION_END) {
+ if (end <= CORRUPT_REGION_END)
+ continue;
+ start = CORRUPT_REGION_END;
+ }
+
memlists[memlists_used].addr = start;
memlists[memlists_used].size = end - start;
++memlists_used;
diff --git a/usr/src/uts/i86pc/io/ppm/acpisleep.c b/usr/src/uts/i86pc/io/ppm/acpisleep.c
index 78328170e6..6b94e50909 100644
--- a/usr/src/uts/i86pc/io/ppm/acpisleep.c
+++ b/usr/src/uts/i86pc/io/ppm/acpisleep.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -56,6 +57,19 @@
int acpi_rtc_wake = 0x0; /* wake in N seconds */
+/*
+ * Execute optional ACPI methods for suspend/resume.
+ * The value can be ACPI_EXECUTE_GTS and/or ACPI_EXECUTE_BFS.
+ * Global so it can be set in /etc/system.
+ * From usr/src/uts/intel/io/acpica/changes.txt:
+ * It has been seen on some systems where the execution of these
+ * methods causes errors and also prevents the machine from entering S5.
+ * It is therefore suggested that host operating systems do not execute
+ * these methods by default. In the future, perhaps these methods can be
+ * optionally executed based on the age of the system...
+ */
+int acpi_sleep_flags = ACPI_NO_OPTIONAL_METHODS;
+
#if 0 /* debug */
static uint8_t branchbuf[64 * 1024]; /* for the HDT branch trace stuff */
#endif /* debug */
@@ -142,8 +156,9 @@ acpi_enter_sleepstate(s3a_t *s3ap)
* Tell the hardware to sleep.
*/
PT(PT_SXE);
- PMD(PMD_SX, ("Calling AcpiEnterSleepState(%d) ...\n", Sx))
- if (AcpiEnterSleepState(Sx) != AE_OK) {
+ PMD(PMD_SX, ("Calling AcpiEnterSleepState(%d, %d) ...\n", Sx,
+ acpi_sleep_flags))
+ if (AcpiEnterSleepState(Sx, acpi_sleep_flags) != AE_OK) {
PT(PT_SXE_FAIL);
PMD(PMD_SX, ("... failed!\n"))
}
@@ -163,6 +178,11 @@ acpi_exit_sleepstate(s3a_t *s3ap)
PMD(PMD_SX, ("!We woke up!\n"))
PT(PT_LSS);
+ if (AcpiLeaveSleepStatePrep(Sx, acpi_sleep_flags) != AE_OK) {
+ PT(PT_LSS_FAIL);
+ PMD(PMD_SX, ("Problem with LeaveSleepState!\n"))
+ }
+
if (AcpiLeaveSleepState(Sx) != AE_OK) {
PT(PT_LSS_FAIL);
PMD(PMD_SX, ("Problem with LeaveSleepState!\n"))
diff --git a/usr/src/uts/i86pc/io/psm/psm_common.c b/usr/src/uts/i86pc/io/psm/psm_common.c
index 7a3dd8a733..9c17d85228 100644
--- a/usr/src/uts/i86pc/io/psm/psm_common.c
+++ b/usr/src/uts/i86pc/io/psm/psm_common.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -979,7 +980,7 @@ acpi_poweroff(void)
return (1);
}
ACPI_DISABLE_IRQS();
- status = AcpiEnterSleepState(5);
+ status = AcpiEnterSleepState(5, ACPI_NO_OPTIONAL_METHODS);
ACPI_ENABLE_IRQS();
/* we should be off; if we get here it's an error */
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index 50a27b3d30..9922891e56 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -80,7 +80,7 @@ proc PROCSIZE
p_agenttp
p_zone
p_brand
- p_brand_data
+ __p_brand_data
_kthread THREAD_SIZE
t_pcb T_LABEL
diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s
index 61ef4ac6c3..68181be28a 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm.s
@@ -631,6 +631,36 @@ _sysenter_done:
sysexit
SET_SIZE(sys_sysenter)
SET_SIZE(brand_sys_sysenter)
+#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the sysenter path, there is no default
+ * action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ BRAND_CALLBACK(BRAND_CB_INT80)
+
+ ALTENTRY(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %eip to point before the INT, push the expected error
+ * code and fake a GP fault.
+ *
+ */
+ subl $2, (%esp) /* int insn 2-bytes */
+ pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
/*
* Declare a uintptr_t which covers the entire pc range of syscall
diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
index 443689cec3..823404b485 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/asm_linkage.h>
@@ -800,6 +800,13 @@ _syscall32_save:
* more succinctly:
*
* SA(MAXSYSARGS * sizeof (long)) == 64
+ *
+ * Note, this space is used both to copy in the arguments from user
+ * land, but also to as part of the old UNIX style syscall_ap() method.
+ * syscall_entry expects that we do not change the values of this space
+ * that we give it. However, this means that when we end up in the more
+ * recent model of passing the arguments based on the calling
+ * conventions, we'll need to save an additional 16 bytes of stack.
*/
#define SYS_DROP 64 /* drop for args */
subq $SYS_DROP, %rsp
@@ -827,12 +834,16 @@ _syscall32_save:
*/
movq %rax, %rbx
- movl 0(%rsp), %edi
- movl 8(%rsp), %esi
- movl 0x10(%rsp), %edx
- movl 0x18(%rsp), %ecx
- movl 0x20(%rsp), %r8d
- movl 0x28(%rsp), %r9d
+ movl 0x0(%rsp), %edi /* arg0 */
+ movl 0x8(%rsp), %esi /* arg1 */
+ movl 0x10(%rsp), %edx /* arg2 */
+ movl 0x38(%rsp), %eax /* arg7 load */
+ movl 0x18(%rsp), %ecx /* arg3 */
+ pushq %rax /* arg7 saved to stack */
+ movl 0x28(%rsp), %r8d /* arg4 */
+ movl 0x38(%rsp), %eax /* arg6 load */
+ movl 0x30(%rsp), %r9d /* arg5 */
+ pushq %rax /* arg6 saved to stack */
call *SY_CALLC(%rbx)
@@ -1079,15 +1090,20 @@ sys_sysenter()
/*
* Fetch the arguments copied onto the kernel stack and put
* them in the right registers to invoke a C-style syscall handler.
- * %rax contains the handler address.
+ * %rax contains the handler address. For the last two arguments, we
+ * push them onto the stack -- we can't clobber the old arguments.
*/
movq %rax, %rbx
- movl 0(%rsp), %edi
- movl 8(%rsp), %esi
- movl 0x10(%rsp), %edx
- movl 0x18(%rsp), %ecx
- movl 0x20(%rsp), %r8d
- movl 0x28(%rsp), %r9d
+ movl 0x0(%rsp), %edi /* arg0 */
+ movl 0x8(%rsp), %esi /* arg1 */
+ movl 0x10(%rsp), %edx /* arg2 */
+ movl 0x38(%rsp), %eax /* arg7 load */
+ movl 0x18(%rsp), %ecx /* arg3 */
+ pushq %rax /* arg7 saved to stack */
+ movl 0x28(%rsp), %r8d /* arg4 */
+ movl 0x38(%rsp), %eax /* arg6 load */
+ movl 0x30(%rsp), %r9d /* arg5 */
+ pushq %rax /* arg6 saved to stack */
call *SY_CALLC(%rbx)
@@ -1159,6 +1175,48 @@ sys_sysenter()
SET_SIZE(brand_sys_sysenter)
#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the other entry points, there is no
+ * default action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ SWAPGS /* kernel gsbase */
+ XPV_TRAP_POP
+ BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
+ SWAPGS /* user gsbase */
+ jmp nopop_int80
+
+ ENTRY_NP(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %rip to point before the INT, push the expected error
+ * code and fake a GP fault. Note on 64-bit hypervisor we need
+ * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
+ * because gptrap will pop them again with its own XPV_TRAP_POP.
+ */
+ XPV_TRAP_POP
+nopop_int80:
+ subq $2, (%rsp) /* int insn 2-bytes */
+ pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+#if defined(__xpv)
+ push %r11
+ push %rcx
+#endif
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
+#endif /* __lint */
+
/*
* This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 0583ac6ed6..25d899c63e 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -753,6 +753,20 @@ i_cpr_is_supported(int sleeptype)
if (sleeptype != CPR_TORAM)
return (0);
+ /*
+ * Unfortunately, the x86 resume code was never implemented for GAS.
+ * The only obvious problem is that a trick necessary to appease Sun
+ * Studio does the wrong thing for GAS. Doubley unfortunate is that
+ * the condition used to detect GAS is incorrect, so we do in fact
+ * compile the Studio path, it just immediately fails in resume.
+ *
+ * Given that, if we were built using GCC, never allow CPR to be
+ * attempted.
+ */
+#ifdef __GNUC__
+ return (0);
+#endif
+
/*
* The next statement tests if a specific platform has turned off
* cpr support.
diff --git a/usr/src/uts/i86pc/os/fakebop.c b/usr/src/uts/i86pc/os/fakebop.c
index d38bcb046f..8e17a3fcab 100644
--- a/usr/src/uts/i86pc/os/fakebop.c
+++ b/usr/src/uts/i86pc/os/fakebop.c
@@ -22,10 +22,11 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- */
-/*
+ *
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
+ *
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
/*
@@ -1164,10 +1165,12 @@ build_boot_properties(void)
int name_len;
char *value;
int value_len;
- struct boot_modules *bm;
+ struct boot_modules *bm, *rdbm;
char *propbuf;
int quoted = 0;
int boot_arg_len;
+ uint_t i, midx;
+ char modid[32];
#ifndef __xpv
static int stdout_val = 0;
uchar_t boot_device;
@@ -1185,9 +1188,32 @@ build_boot_properties(void)
DBG((uintptr_t)propbuf);
if (xbootp->bi_module_cnt > 0) {
bm = xbootp->bi_modules;
- bsetprop64("ramdisk_start", (uint64_t)(uintptr_t)bm->bm_addr);
- bsetprop64("ramdisk_end", (uint64_t)(uintptr_t)bm->bm_addr +
- bm->bm_size);
+ rdbm = NULL;
+ for (midx = i = 0; i < xbootp->bi_module_cnt; i++) {
+ if (bm[i].bm_type == BMT_ROOTFS) {
+ rdbm = &bm[i];
+ continue;
+ }
+ if (bm[i].bm_type == BMT_HASH || bm[i].bm_name == NULL)
+ continue;
+
+ (void) snprintf(modid, sizeof (modid),
+ "module-name-%u", midx);
+ bsetprops(modid, (char *)bm[i].bm_name);
+ (void) snprintf(modid, sizeof (modid),
+ "module-addr-%u", midx);
+ bsetprop64(modid, (uint64_t)(uintptr_t)bm[i].bm_addr);
+ (void) snprintf(modid, sizeof (modid),
+ "module-size-%u", midx);
+ bsetprop64(modid, (uint64_t)bm[i].bm_size);
+ ++midx;
+ }
+ if (rdbm != NULL) {
+ bsetprop64("ramdisk_start",
+ (uint64_t)(uintptr_t)rdbm->bm_addr);
+ bsetprop64("ramdisk_end",
+ (uint64_t)(uintptr_t)rdbm->bm_addr + rdbm->bm_size);
+ }
}
DBG_MSG("Parsing command line for boot properties\n");
diff --git a/usr/src/uts/i86pc/os/ibft.c b/usr/src/uts/i86pc/os/ibft.c
index 43ffad5e81..bcb8c06d58 100644
--- a/usr/src/uts/i86pc/os/ibft.c
+++ b/usr/src/uts/i86pc/os/ibft.c
@@ -38,6 +38,7 @@
#include <sys/kmem.h>
#include <sys/psm.h>
#include <sys/bootconf.h>
+#include <sys/reboot.h>
#ifndef NULL
#define NULL 0
@@ -209,6 +210,7 @@ static ibft_status_t iscsi_parse_ibft_NIC(iscsi_ibft_nic_t *nicp);
static ibft_status_t iscsi_parse_ibft_target(char *begin_of_ibft,
iscsi_ibft_tgt_t *tgtp);
+extern int boothowto;
/*
* Return value:
@@ -762,7 +764,9 @@ ld_ib_prop()
* 1) pass "-B ibft-noprobe=1" on kernel command line
* 2) add line "set ibft_noprobe=1" in /etc/system
*/
- cmn_err(CE_NOTE, IBFT_NOPROBE_MSG);
+ if (boothowto & RB_VERBOSE) {
+ cmn_err(CE_NOTE, IBFT_NOPROBE_MSG);
+ }
return;
}
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index ac647bea16..2d7c253204 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -2792,7 +2792,11 @@ lgrp_plat_process_sli(uint32_t domain_id, uchar_t *sli_info,
/*
* Read ACPI System Resource Affinity Table (SRAT) to determine which CPUs
* and memory are local to each other in the same NUMA node and return number
- * of nodes
+ * of nodes.
+ *
+ * The SRAT table pointer is populated during bootup by
+ * build_firmware_properties() in fakebop.c. Several motherboard and BIOS
+ * manufacturers are guilty of not having a SRAT table.
*/
static int
lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
@@ -2810,9 +2814,15 @@ lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
/*
* Nothing to do when no SRAT or disabled
*/
- if (tp == NULL || !lgrp_plat_srat_enable)
+ if (!lgrp_plat_srat_enable)
return (-1);
+ if (tp == NULL) {
+ cmn_err(CE_WARN, "Couldn't read ACPI SRAT table from BIOS. "
+ "lgrp support will be limited to one group.\n");
+ return (-1);
+ }
+
/*
* Try to get domain information from MSCT table.
* ACPI4.0: OSPM will use information provided by the MSCT only
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 105c5c3363..7062c93f65 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -60,6 +60,7 @@
#include <sys/archsystm.h>
#include <sys/promif.h>
#include <sys/pci_cfgspace.h>
+#include <sys/bootvfs.h>
#ifdef __xpv
#include <sys/hypervisor.h>
#else
@@ -473,6 +474,10 @@ mach_modpath(char *path, const char *filename)
const char isastr[] = "/amd64";
size_t isalen = strlen(isastr);
+ len = strlen(SYSTEM_BOOT_PATH "/kernel");
+ (void) strcpy(path, SYSTEM_BOOT_PATH "/kernel ");
+ path += len + 1;
+
if ((p = strrchr(filename, '/')) == NULL)
return;
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index ac3538f646..82a0a5bc82 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -22,6 +22,7 @@
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -121,6 +122,7 @@
#include <sys/ddi_periodic.h>
#include <sys/systeminfo.h>
#include <sys/multiboot.h>
+#include <sys/ramdisk.h>
#ifdef __xpv
@@ -2346,6 +2348,20 @@ pp_in_range(page_t *pp, uint64_t low_addr, uint64_t high_addr)
(pp->p_pagenum < btopr(high_addr)));
}
+static int
+pp_in_module(page_t *pp, const rd_existing_t *modranges)
+{
+ uint_t i;
+
+ for (i = 0; modranges[i].phys != 0; i++) {
+ if (pp_in_range(pp, modranges[i].phys,
+ modranges[i].phys + modranges[i].size))
+ return (1);
+ }
+
+ return (0);
+}
+
void
release_bootstrap(void)
{
@@ -2353,10 +2369,40 @@ release_bootstrap(void)
page_t *pp;
extern void kobj_boot_unmountroot(void);
extern dev_t rootdev;
+ uint_t i;
+ char propname[32];
+ rd_existing_t *modranges;
#if !defined(__xpv)
pfn_t pfn;
#endif
+ /*
+ * Save the bootfs module ranges so that we can reserve them below
+ * for the real bootfs.
+ */
+ modranges = kmem_alloc(sizeof (rd_existing_t) * MAX_BOOT_MODULES,
+ KM_SLEEP);
+ for (i = 0; ; i++) {
+ uint64_t start, size;
+
+ modranges[i].phys = 0;
+
+ (void) snprintf(propname, sizeof (propname),
+ "module-addr-%u", i);
+ if (do_bsys_getproplen(NULL, propname) <= 0)
+ break;
+ (void) do_bsys_getprop(NULL, propname, &start);
+
+ (void) snprintf(propname, sizeof (propname),
+ "module-size-%u", i);
+ if (do_bsys_getproplen(NULL, propname) <= 0)
+ break;
+ (void) do_bsys_getprop(NULL, propname, &size);
+
+ modranges[i].phys = start;
+ modranges[i].size = size;
+ }
+
/* unmount boot ramdisk and release kmem usage */
kobj_boot_unmountroot();
@@ -2397,9 +2443,8 @@ release_bootstrap(void)
continue;
}
-
if (root_is_ramdisk && pp_in_range(pp, ramdisk_start,
- ramdisk_end)) {
+ ramdisk_end) || pp_in_module(pp, modranges)) {
pp->p_next = rd_pages;
rd_pages = pp;
continue;
@@ -2411,6 +2456,8 @@ release_bootstrap(void)
}
PRM_POINT("Boot pages released");
+ kmem_free(modranges, sizeof (rd_existing_t) * 99);
+
#if !defined(__xpv)
/* XXPV -- note this following bunch of code needs to be revisited in Xen 3.0 */
/*
diff --git a/usr/src/uts/i86pc/sys/acpidev.h b/usr/src/uts/i86pc/sys/acpidev.h
index 6d11277aaf..a3bd54d4e3 100644
--- a/usr/src/uts/i86pc/sys/acpidev.h
+++ b/usr/src/uts/i86pc/sys/acpidev.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_ACPIDEV_H
@@ -128,7 +129,7 @@ typedef enum acpidev_class_id {
#ifdef _KERNEL
/* Common ACPI object names. */
-#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
+#define ACPIDEV_OBJECT_NAME_SB METHOD_NAME__SB_
#define ACPIDEV_OBJECT_NAME_PR "_PR_"
/* Common ACPI method names. */
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index 8a87760456..11ae48340a 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -382,7 +382,7 @@ struct apic_io_intr {
/* special or reserve vectors */
#define APIC_CHECK_RESERVE_VECTORS(v) \
(((v) == T_FASTTRAP) || ((v) == APIC_SPUR_INTR) || \
- ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET))
+ ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET) || ((v) == 0x80))
/* cmos shutdown code for BIOS */
#define BIOS_SHUTDOWN 0x0a
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 64e1c2fb46..1fb0b793a0 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -27,6 +27,7 @@
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -3294,7 +3295,7 @@ hat_page_getattr(struct page *pp, uint_t flag)
/*
- * common code used by hat_pageunload() and hment_steal()
+ * common code used by hat_page_inval() and hment_steal()
*/
hment_t *
hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
@@ -3350,15 +3351,13 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
extern int vpm_enable;
/*
- * Unload all translations to a page. If the page is a subpage of a large
+ * Unload translations to a page. If the page is a subpage of a large
* page, the large page mappings are also removed.
- *
- * The forceflags are unused.
+ * If curhat is not NULL, then we only unload the translation
+ * for the given process, otherwise all translations are unloaded.
*/
-
-/*ARGSUSED*/
-static int
-hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
+void
+hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
{
page_t *cur_pp = pp;
hment_t *hm;
@@ -3366,15 +3365,10 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
htable_t *ht;
uint_t entry;
level_t level;
+ ulong_t cnt;
XPV_DISALLOW_MIGRATE();
- /*
- * prevent recursion due to kmem_free()
- */
- ++curthread->t_hatdepth;
- ASSERT(curthread->t_hatdepth < 16);
-
#if defined(__amd64)
/*
* clear the vpm ref.
@@ -3387,6 +3381,8 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
* The loop with next_size handles pages with multiple pagesize mappings
*/
next_size:
+ if (curhat != NULL)
+ cnt = hat_page_getshare(cur_pp);
for (;;) {
/*
@@ -3398,14 +3394,13 @@ next_size:
if (hm == NULL) {
x86_hm_exit(cur_pp);
+curproc_done:
/*
* If not part of a larger page, we're done.
*/
if (cur_pp->p_szc <= pg_szcd) {
- ASSERT(curthread->t_hatdepth > 0);
- --curthread->t_hatdepth;
XPV_ALLOW_MIGRATE();
- return (0);
+ return;
}
/*
@@ -3424,8 +3419,20 @@ next_size:
* If this mapping size matches, remove it.
*/
level = ht->ht_level;
- if (level == pg_szcd)
- break;
+ if (level == pg_szcd) {
+ if (curhat == NULL || ht->ht_hat == curhat)
+ break;
+ /*
+ * Unloading only the given process but it's
+ * not the hat for the current process. Leave
+ * entry in place. Also do a safety check to
+ * ensure we don't get in an infinite loop
+ */
+ if (cnt-- == 0) {
+ x86_hm_exit(cur_pp);
+ goto curproc_done;
+ }
+ }
}
/*
@@ -3435,14 +3442,44 @@ next_size:
hm = hati_page_unmap(cur_pp, ht, entry);
if (hm != NULL)
hment_free(hm);
+
+ /* Perform check above for being part of a larger page. */
+ if (curhat != NULL)
+ goto curproc_done;
}
}
+/*
+ * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
+ * we only unload the translation for the current process, otherwise all
+ * translations are unloaded.
+ */
+static int
+hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
+{
+ struct hat *curhat = NULL;
+
+ /*
+ * prevent recursion due to kmem_free()
+ */
+ ++curthread->t_hatdepth;
+ ASSERT(curthread->t_hatdepth < 16);
+
+ if (unloadflag == HAT_CURPROC_PGUNLOAD)
+ curhat = curthread->t_procp->p_as->a_hat;
+
+ hat_page_inval(pp, pg_szcd, curhat);
+
+ ASSERT(curthread->t_hatdepth > 0);
+ --curthread->t_hatdepth;
+ return (0);
+}
+
int
-hat_pageunload(struct page *pp, uint_t forceflag)
+hat_pageunload(struct page *pp, uint_t unloadflag)
{
ASSERT(PAGE_EXCL(pp));
- return (hati_pageunload(pp, 0, forceflag));
+ return (hati_pageunload(pp, 0, unloadflag));
}
/*
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 2212202a01..1c2bd3e0ec 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -24,6 +24,7 @@
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -623,10 +624,8 @@ void
map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
{
struct proc *p = curproc;
- caddr_t userlimit = (flags & _MAP_LOW32) ?
- (caddr_t)_userlimit32 : p->p_as->a_userlimit;
-
- map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags);
+ map_addr_proc(addrp, len, off, vacalign,
+ map_userlimit(p, p->p_as, flags), curproc, flags);
}
/*ARGSUSED*/
diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile
index 72b2d89989..37e10e011e 100644
--- a/usr/src/uts/intel/Makefile
+++ b/usr/src/uts/intel/Makefile
@@ -64,7 +64,7 @@ install_h.prereq := TARGET= install_h
.PARALLEL: $(PARALLEL_KMODS) $(XMODS) config $(LINT_DEPS)
-def all install clean clobber modlist: $(KMODS) $(XMODS) config
+def all install clean clobber modlist: genassym $(KMODS) $(XMODS) config
clobber: clobber.targ
@@ -106,7 +106,7 @@ CLOBBERFILES += $(PRIVS_C)
# intel/dtrace depends on i86pc/genassym, so we need to build both
# i86pc/genassym and intel/genassym.
#
-all.prereq install.prereq def.prereq: genunix FRC
+all.prereq install.prereq def.prereq: genassym genunix FRC
@cd ../i86pc/genassym; pwd; $(MAKE) $(@:%.prereq=%)
#
@@ -124,7 +124,7 @@ genunix: $(PRIVS_C)
modlintlib clean.lint: $(LINT_KMODS) $(XMODS)
-$(KMODS) $(SUBDIRS) config: FRC
+genassym $(KMODS) $(SUBDIRS) config: FRC
@cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET)
$(XMODS): FRC
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 01147a5314..a4d6b7e309 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -21,7 +21,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
#
@@ -99,6 +99,14 @@ GENUNIX_OBJS += \
#
CORE_OBJS += \
prmachdep.o
+
+LX_PROC_OBJS += \
+ lx_prsubr.o \
+ lx_prvfsops.o \
+ lx_prvnops.o
+
+LX_AUTOFS_OBJS += \
+ lx_autofs.o
#
# ZFS file system module
@@ -167,6 +175,7 @@ VGATEXT_OBJS += vgatext.o vgasubr.o
# Kernel linker
#
KRTLD_OBJS += \
+ bootfsops.o \
bootrd.o \
ufsops.o \
hsfs.o \
@@ -184,8 +193,9 @@ ACPICA_OBJS += dbcmds.o dbdisply.o \
dbexec.o dbfileio.o dbhistry.o dbinput.o dbstats.o \
dbutils.o dbxface.o evevent.o evgpe.o evgpeblk.o \
evmisc.o evregion.o evrgnini.o evsci.o evxface.o \
- evxfevnt.o evxfregn.o hwacpi.o hwgpe.o hwregs.o \
- hwsleep.o hwtimer.o dsfield.o dsinit.o dsmethod.o \
+ evxfevnt.o evxfregn.o hwacpi.o hwesleep.o hwgpe.o \
+ hwregs.o hwsleep.o hwtimer.o hwxfsleep.o dsfield.o \
+ dsinit.o dsmethod.o \
dsmthdat.o dsobject.o dsopcode.o dsutils.o dswexec.o \
dswload.o dswscope.o dswstate.o exconfig.o exconvrt.o \
excreate.o exdump.o exfield.o exfldio.o exmisc.o \
@@ -197,16 +207,18 @@ ACPICA_OBJS += dbcmds.o dbdisply.o \
nsalloc.o nsdump.o nsdumpdv.o nseval.o nsinit.o \
nsload.o nsnames.o nsobject.o nsparse.o nssearch.o \
nsutils.o nswalk.o nsxfeval.o nsxfname.o nsxfobj.o \
- rsaddr.o rscalc.o rscreate.o rsdump.o \
+ rsaddr.o rscalc.o rscreate.o rsdump.o rsserial.o \
rsinfo.o rsio.o rsirq.o rslist.o rsmemory.o rsmisc.o \
rsutils.o rsxface.o tbfadt.o tbfind.o tbinstal.o \
- tbutils.o tbxface.o tbxfroot.o \
+ tbutils.o tbxface.o tbxfroot.o utaddress.o \
utalloc.o utclib.o utcopy.o utdebug.o utdelete.o \
uteval.o utglobal.o utinit.o utmath.o utmisc.o \
- utobject.o utresrc.o utxface.o acpica.o acpi_enum.o \
+ utobject.o utresrc.o utxface.o utxfmutex.o acpica.o \
+ acpi_enum.o \
master_ops.o osl.o osl_ml.o acpica_ec.o utcache.o \
utmutex.o utstate.o dmbuffer.o dmnames.o dmobject.o \
- dmopcode.o dmresrc.o dmresrcl.o dmresrcs.o dmutils.o \
+ dmopcode.o dmresrc.o dmresrcl.o dmresrcl2.o dmresrcs.o \
+ dmutils.o \
dmwalk.o psloop.o nspredef.o hwxface.o hwvalid.o \
utlock.o utids.o nsrepair.o nsrepair2.o \
dbmethod.o dbnames.o dsargs.o dscontrol.o dswload2.o \
@@ -261,7 +273,28 @@ IOMMULIB_OBJS = iommulib.o
# Brand modules
#
SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o
+SNGL_BRAND_OBJS = sngl_brand.o sngl_brand_asm.o
S10_BRAND_OBJS = s10_brand.o s10_brand_asm.o
+LX_BRAND_OBJS = \
+ lx_brand.o \
+ lx_brand_asm.o \
+ lx_brk.o \
+ lx_clone.o \
+ lx_futex.o \
+ lx_getpid.o \
+ lx_id.o \
+ lx_ioctl.o \
+ lx_kill.o \
+ lx_misc.o \
+ lx_modify_ldt.o \
+ lx_pid.o \
+ lx_pipe.o \
+ lx_rw.o \
+ lx_sched.o \
+ lx_signum.o \
+ lx_syscall.o \
+ lx_sysinfo.o \
+ lx_thread_area.o
#
# special files
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 5ccee35ceb..4c0194240b 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -21,6 +21,7 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2014 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# This makefile contains the common definitions for all intel
@@ -42,6 +43,7 @@ PLATFORM = i86pc
#
UNIX_DIR = $(UTSBASE)/i86pc/unix
GENLIB_DIR = $(UTSBASE)/intel/genunix
+GENASSYM_DIR = $(UTSBASE)/intel/genassym
IPDRV_DIR = $(UTSBASE)/intel/ip
MODSTUBS_DIR = $(UNIX_DIR)
DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym
@@ -134,6 +136,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS)
ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS))
ASFLAGS += $(ASFLAGS_XARCH)
+AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR)
#
# Define the base directory for installation.
@@ -235,6 +238,7 @@ DRV_KMODS += devinfo
DRV_KMODS += dld
DRV_KMODS += dlpistub
DRV_KMODS_32 += dnet
+DRV_KMODS += dr_sas
DRV_KMODS += dump
DRV_KMODS += ecpp
DRV_KMODS += emlxs
@@ -247,6 +251,7 @@ DRV_KMODS += i8042
DRV_KMODS += i915
DRV_KMODS += icmp
DRV_KMODS += icmp6
+DRV_KMODS += inotify
DRV_KMODS += intel_nb5000
DRV_KMODS += intel_nhm
DRV_KMODS += ip
@@ -281,6 +286,7 @@ DRV_KMODS += mpt_sas
DRV_KMODS += mr_sas
DRV_KMODS += mwl
DRV_KMODS += nca
+DRV_KMODS += nfp
DRV_KMODS += nsmb
DRV_KMODS += nulldriver
DRV_KMODS += nv_sata
@@ -347,6 +353,8 @@ DRV_KMODS += uath
DRV_KMODS += urtw
DRV_KMODS += vgatext
DRV_KMODS += heci
+DRV_KMODS += vmxnet
+DRV_KMODS += vnd
DRV_KMODS += vnic
DRV_KMODS += vscan
DRV_KMODS += wc
@@ -479,6 +487,15 @@ DRV_KMODS += wusb_ca
DRV_KMODS += usbecm
#
+# USBGEM modules
+#
+DRV_KMODS += usbgem
+DRV_KMODS += axf
+DRV_KMODS += udmf
+DRV_KMODS += upf
+DRV_KMODS += urf
+
+#
# 1394 modules
#
MISC_KMODS += s1394 sbp2
@@ -502,7 +519,8 @@ MISC_KMODS += md_sp
#
# Brand modules
#
-BRAND_KMODS += sn1_brand s10_brand
+BRAND_KMODS += sn1_brand sngl_brand s10_brand lx_brand
+DRV_KMODS += lx_systrace lx_ptm lx_audio lx_netlink
#
# Exec Class Modules (/kernel/exec):
@@ -517,10 +535,10 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC
#
# File System Modules (/kernel/fs):
#
-FS_KMODS += autofs cachefs ctfs dcfs dev devfs fdfs fifofs hsfs lofs
-FS_KMODS += mntfs namefs nfs objfs zfs zut
+FS_KMODS += autofs cachefs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs
+FS_KMODS += lofs lx_afs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut
FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs
-FS_KMODS += smbfs
+FS_KMODS += smbfs bootfs
#
# Streams Modules (/kernel/strmod):
@@ -580,6 +598,7 @@ MISC_KMODS += drm
MISC_KMODS += fssnap_if
MISC_KMODS += gda
MISC_KMODS += gld
+MISC_KMODS += gsqueue
MISC_KMODS += hidparser
MISC_KMODS += hook
MISC_KMODS += hpcsvc
diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules
index b246e8fcca..65c31ba168 100644
--- a/usr/src/uts/intel/Makefile.rules
+++ b/usr/src/uts/intel/Makefile.rules
@@ -61,6 +61,9 @@ $(OBJS_DIR)/%.o: $(SRC)/common/util/i386/%.s
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/sn1/%.s
$(COMPILE.s) -o $@ $<
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/sngl/%.s
+ $(COMPILE.s) -o $@ $<
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/solaris10/%.s
$(COMPILE.s) -o $@ $<
@@ -177,6 +180,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nb5000/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/vmxnet/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nhm/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -319,6 +326,9 @@ $(LINTS_DIR)/%.ln: $(SRC)/common/util/i386/%.s
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/sn1/%.s
@($(LHEAD) $(LINT.s) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/sngl/%.s
+ @($(LHEAD) $(LINT.s) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/solaris10/%.s
@($(LHEAD) $(LINT.s) $< $(LTAIL))
@@ -457,6 +467,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/scsi/targets/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vgatext/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vmxnet/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/heci/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/axf/Makefile b/usr/src/uts/intel/axf/Makefile
new file mode 100644
index 0000000000..e4665c0186
--- /dev/null
+++ b/usr/src/uts/intel/axf/Makefile
@@ -0,0 +1,73 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = axf
+OBJECTS = $(AXF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(AXF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.2\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/bootfs/Makefile b/usr/src/uts/intel/bootfs/Makefile
new file mode 100644
index 0000000000..ca412de439
--- /dev/null
+++ b/usr/src/uts/intel/bootfs/Makefile
@@ -0,0 +1,72 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# This makefile drives the production of the bootfs file system
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = bootfs
+OBJECTS = $(BOOTFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(BOOTFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/brand/common/brand_asm.h b/usr/src/uts/intel/brand/common/brand_asm.h
index 1d540db2a9..c820d8e187 100644
--- a/usr/src/uts/intel/brand/common/brand_asm.h
+++ b/usr/src/uts/intel/brand/common/brand_asm.h
@@ -161,7 +161,7 @@ extern "C" {
#define GET_P_BRAND_DATA(sp, pcnt, reg) \
GET_PROCP(sp, pcnt, reg); \
- mov P_BRAND_DATA(reg), reg /* get p_brand_data */
+ mov __P_BRAND_DATA(reg), reg /* get p_brand_data */
/*
* Each of the following macros returns to the standard syscall codepath if
diff --git a/usr/src/uts/intel/brand/lx/lx_brand_asm.s b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
new file mode 100644
index 0000000000..568d462c2c
--- /dev/null
+++ b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#if defined(__lint)
+
+#include <sys/systm.h>
+
+#else /* __lint */
+
+#include <sys/controlregs.h>
+#include "genassym.h"
+#include "../common/brand_asm.h"
+
+#endif /* __lint */
+
+#ifdef __lint
+
+void
+lx_brand_int80_callback(void)
+{
+}
+
+void
+lx_brand_syscall_callback(void)
+{
+}
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+/*
+ * syscall handler for 32-bit Linux user processes:
+ * See "64-BIT INTERPOSITION STACK" in brand_asm.h.
+ */
+ENTRY(lx_brand_int80_callback)
+ GET_PROCP(SP_REG, 0, %r15)
+ movq P_ZONE(%r15), %r15 /* grab the zone pointer */
+ /* grab the 'max syscall num' for this process from 'zone brand data' */
+ movq ZONE_BRAND_DATA(%r15), %r15 /* grab the zone brand ptr */
+ movl LXZD_MAX_SYSCALL(%r15), %r15d /* get the 'max sysnum' word */
+ cmpq %r15, %rax /* is 0 <= syscall <= MAX? */
+ jbe 0f /* yes, syscall is OK */
+ xorl %eax, %eax /* no, zero syscall number */
+0:
+
+.lx_brand_int80_patch_point:
+ jmp .lx_brand_int80_notrace
+
+.lx_brand_int80_notrace:
+ CALC_TABLE_ADDR(%r15, L_HANDLER)
+1:
+ movq %r15, %rax
+ GET_V(%rsp, 0, V_SSP, %rsp) /* restore intr. stack pointer */
+ xchgq (%rsp), %rax /* swap %rax and return addr */
+ jmp sys_sysint_swapgs_iret
+
+.lx_brand_int80_trace:
+ /*
+ * If tracing is active, we vector to an alternate trace-enabling
+ * handler table instead.
+ */
+ CALC_TABLE_ADDR(%r15, L_TRACEHANDLER)
+ jmp 1b
+SET_SIZE(lx_brand_int80_callback)
+
+#define PATCH_POINT80 _CONST(.lx_brand_int80_patch_point + 1)
+#define PATCH_VAL80 _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace)
+
+ENTRY(lx_brand_int80_enable)
+ movl $1, lx_systrace_brand_enabled(%rip)
+ movq $PATCH_POINT80, %r8
+ movb $PATCH_VAL80, (%r8)
+ ret
+SET_SIZE(lx_brand_int80_enable)
+
+ENTRY(lx_brand_int80_disable)
+ movq $PATCH_POINT80, %r8
+ movb $0, (%r8)
+ movl $0, lx_systrace_brand_enabled(%rip)
+ ret
+SET_SIZE(lx_brand_int80_disable)
+
+
+/*
+ * syscall handler for 64-bit user processes:
+ *
+ * We're running on the kernel's %gs.
+ *
+ * We return directly to userland, bypassing the update_sregs() logic, so
+ * this routine must NOT do anything that could cause a context switch.
+ *
+ * %rax - syscall number
+ *
+ * See uts/i86pc/ml/syscall_asm_amd64.s for what happens before we get into
+ * the following lx brand-specific codepath.
+ *
+ * As the comment on the BRAND_CALLBACK macro describes, when we're called, all
+ * general registers, except for %r15, are as they were when the user process
+ * made the system call. %r15 is available to the callback as a scratch
+ * register. If the callback returns to the kernel path, %r15 does not have to
+ * be restored to the user value since BRAND_CALLBACK does that. If we jump
+ * out to the emulation we need to restore %r15 here.
+ *
+ * To 'return' to our user-space handler, we just need to place its address
+ * into %rcx. The original return address is passed back in %rax.
+ *
+ * Since this is the common syscall path for all 64-bit code (both Linux and
+ * native libc) in the branded zone (unlike the int80 path), we have to do a
+ * bit more checking to see if interpositioning is in effect (i.e. syscalls
+ * from the native ld.so.1 are not interposed since the emulation has not yet
+ * been installed, or the emulation is in native syscall mode).
+ */
+ENTRY(lx_brand_syscall_callback)
+ /* callback prologue */
+ GET_PROCP(SP_REG, 0, %r15)
+ mov __P_BRAND_DATA(%r15), %r15 /* get p_brand_data */
+ cmp $0, %r15 /* null ptr? */
+ je 2f /* yes, take normal ret path */
+ cmp $0, L_HANDLER(%r15) /* handler installed? */
+ je 2f /* no, take normal ret path */
+
+ /* check for native vs. Linux syscall */
+ GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
+ movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
+ movl BR_NTV_SYSCALL(%r15), %r15d /* grab syscall src flag */
+ cmp $1, %r15 /* check for native syscall */
+ je 2f /* is native, stay in kernel */
+
+ /* Linux syscall - subsequent emul. syscalls will use native mode */
+ GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
+ movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
+ movl $1, BR_NTV_SYSCALL(%r15) /* set native syscall flag */
+
+ /* check if we have to restore native fsbase */
+ GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
+ movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
+ movq BR_NTV_FSBASE(%r15), %r15 /* grab native fsbase */
+ cmp $0, %r15 /* native fsbase not saved? */
+ je 3f /* yes, skip loading */
+
+#ifdef DEBUG
+ /*
+ * This block is basically similar to a large assert.
+ *
+ * In debug code we do some extra validation of the %fsbase register to
+ * validate that we always have the expected Linux thread pointer and
+ * not the native value. At this point we know that the lwp brand data
+ * should contain the Linux %fsbase (from a Linux arch_prctl syscall)
+ * since the native %fsbase check above is non-null. We also know that
+ * we are making a Linux syscall from the other check above. We read
+ * the %fsbase and compare to the saved Linux %fsbase in the lwp_brand
+ * data. If we don't have the expected value, we save the incorrect
+ * %fsbase value into the br_lx_fsbase member for later inspection and
+ * change the syscall we are making into the Linux pivot_root syscall
+ * (an obscure syscall which we don't support and which an app in the
+ * zone cannot use). This allows us to see this error downstream via
+ * DTrace and see the incorrect %fsbase value we had.
+ */
+ GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
+ movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
+ movq BR_LX_FSBASE(%r15), %r15 /* grab Linux fsbase */
+
+ subq $24, %rsp /* make room for 3 regs */
+ movq %rax, 0x0(%rsp) /* save regs used by rdmsr */
+ movq %rcx, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+
+ movl $MSR_AMD_FSBASE, %ecx /* fsbase msr */
+ rdmsr /* get fsbase to edx:eax */
+
+ /* fix %edx; %eax lo already ok */
+ shlq $32, %rdx
+ or %rdx, %rax /* full value in %rax */
+ cmp %rax, %r15 /* check if is lx fsbase */
+ je 4f /* match, ok */
+
+ movq %rax, %rdi /* pass bad fsbase as arg0 */
+ movq $155, %rax /* fail! use pivot_root */
+ jmp 5f
+
+4:
+ movq 0x0(%rsp), %rax /* restore %rax */
+5:
+ movq 0x8(%rsp), %rcx /* restore other regs */
+ movq 0x10(%rsp), %rdx
+ addq $24, %rsp
+
+ /* reload r15 with the native value */
+ GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
+ movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
+ movq BR_NTV_FSBASE(%r15), %r15 /* grab native fsbase */
+#endif
+
+ /*
+ * Switch fsbase from Linux value back to native value. Also update pcb
+ * so that if we service an interrupt we will restore the correct fsbase
+ * in update_sregs().
+ */
+ subq $24, %rsp /* make room for 3 regs */
+ movq %rax, 0x0(%rsp) /* save regs used by wrmsr */
+ movq %rcx, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+ movq %r15, %rax /* native fsbase to %rax */
+ movq %rax, %rdx /* setup regs for wrmsr */
+ shrq $32, %rdx /* fix %edx; %eax already ok */
+ movl $MSR_AMD_FSBASE, %ecx /* fsbase msr */
+ wrmsr /* set fsbase from edx:eax */
+ movq %rsp, %rdx /* use rdx as temp sp */
+ addq $24, %rdx /* fix it back up */
+ GET_V(%rdx, 0, V_LWP, %r15); /* get lwp pointer */
+ movq %rax, LWP_PCB_FSBASE(%r15) /* save native fsbase in pcb */
+ movq 0x0(%rsp), %rax /* restore regs */
+ movq 0x8(%rsp), %rcx
+ movq 0x10(%rsp), %rdx
+ addq $24, %rsp
+
+3:
+ /*
+ * Linux syscall - validate syscall number.
+ * If necessary, the Linux %fsbase has already been loaded above.
+ */
+ GET_PROCP(SP_REG, 0, %r15)
+ movq P_ZONE(%r15), %r15 /* grab the zone pointer */
+ /* grab the 'max syscall num' for this process from 'zone brand data' */
+ movq ZONE_BRAND_DATA(%r15), %r15 /* grab the zone brand ptr */
+ movl LXZD_MAX_SYSCALL(%r15), %r15d /* get the 'max sysnum' word */
+ cmp %r15, %rax /* is 0 <= syscall <= MAX? */
+ ja 2f /* no, take normal ret path */
+
+.lx_brand_syscall_patch_point:
+ jmp .lx_brand_syscall_notrace
+.lx_brand_syscall_notrace:
+
+ CALC_TABLE_ADDR(%r15, L_HANDLER)
+1:
+ mov %rcx, %rax; /* save orig return addr in syscall_reg */
+ mov %r15, %rcx; /* place new return addr in %rcx */
+ mov %gs:CPU_RTMP_R15, %r15; /* restore scratch register */
+ mov V_SSP(SP_REG), SP_REG /* restore user stack pointer */
+ jmp nopop_sys_syscall_swapgs_sysretq
+
+2: /* no emulation, continue normal system call flow */
+ retq
+
+.lx_brand_syscall_trace:
+ /*
+ * If tracing is active, we vector to an alternate trace-enabling
+ * handler table instead.
+ */
+ CALC_TABLE_ADDR(%r15, L_TRACEHANDLER)
+ jmp 1b
+SET_SIZE(lx_brand_syscall_callback)
+
+#define PATCH_POINT_SC _CONST(.lx_brand_syscall_patch_point + 1)
+#define PATCH_VAL_SC \
+ _CONST(.lx_brand_syscall_trace - .lx_brand_syscall_notrace)
+
+ENTRY(lx_brand_syscall_enable)
+ movl $1, lx_systrace_brand_enabled(%rip)
+ movq $PATCH_POINT_SC, %r8
+ movb $PATCH_VAL_SC, (%r8)
+ ret
+SET_SIZE(lx_brand_syscall_enable)
+
+ENTRY(lx_brand_syscall_disable)
+ movq $PATCH_POINT_SC, %r8
+ movb $0, (%r8)
+ movl $0, lx_systrace_brand_enabled(%rip)
+ ret
+SET_SIZE(lx_brand_syscall_disable)
+
+
+#elif defined(__i386)
+
+/*
+ * See "32-BIT INTERPOSITION STACK" in brand_asm.h.
+ */
+ENTRY(lx_brand_int80_callback)
+ GET_PROCP(SP_REG, 0, %ebx)
+ movl P_ZONE(%ebx), %ebx /* grab the zone pointer */
+ /* grab the 'max syscall num' for this process from 'zone brand data' */
+ movl ZONE_BRAND_DATA(%ebx), %ebx /* grab the zone brand data */
+ movl LXZD_MAX_SYSCALL(%ebx), %ebx /* get the max sysnum */
+
+ cmpl %ebx, %eax /* is 0 <= syscall <= MAX? */
+ jbe 0f /* yes, syscall is OK */
+ xorl %eax, %eax /* no, zero syscall number */
+0:
+
+.lx_brand_int80_patch_point:
+ jmp .lx_brand_int80_notrace
+
+.lx_brand_int80_notrace:
+ CALC_TABLE_ADDR(%ebx, L_HANDLER)
+
+1:
+ movl %ebx, %eax
+ GET_V(%esp, 0, V_U_EBX, %ebx) /* restore scratch register */
+ addl $V_END, %esp /* restore intr. stack ptr */
+ xchgl (%esp), %eax /* swap new and orig. return addrs */
+ jmp nopop_sys_rtt_syscall
+
+.lx_brand_int80_trace:
+ CALC_TABLE_ADDR(%ebx, L_TRACEHANDLER)
+ jmp 1b
+SET_SIZE(lx_brand_int80_callback)
+
+
+#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1)
+#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace)
+
+ENTRY(lx_brand_int80_enable)
+ pushl %ebx
+ pushl %eax
+ movl $1, lx_systrace_brand_enabled
+ movl $PATCH_POINT, %ebx
+ movl $PATCH_VAL, %eax
+ movb %al, (%ebx)
+ popl %eax
+ popl %ebx
+ ret
+SET_SIZE(lx_brand_int80_enable)
+
+ENTRY(lx_brand_int80_disable)
+ pushl %ebx
+ movl $PATCH_POINT, %ebx
+ movb $0, (%ebx)
+ movl $0, lx_systrace_brand_enabled
+ popl %ebx
+ ret
+SET_SIZE(lx_brand_int80_disable)
+
+#endif /* __i386 */
+#endif /* __lint */
diff --git a/usr/src/uts/intel/brand/sngl/sngl_brand_asm.s b/usr/src/uts/intel/brand/sngl/sngl_brand_asm.s
new file mode 100644
index 0000000000..90932a9d6d
--- /dev/null
+++ b/usr/src/uts/intel/brand/sngl/sngl_brand_asm.s
@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#define XXX_emulation_table sngl_emulation_table
+#define XXX_brand_syscall32_callback sngl_brand_syscall32_callback
+#define XXX_brand_syscall_callback sngl_brand_syscall_callback
+#define XXX_brand_sysenter_callback sngl_brand_sysenter_callback
+#define XXX_brand_int91_callback sngl_brand_int91_callback
+
+#include "../common/brand_solaris.s"
diff --git a/usr/src/uts/intel/core_pcbe/Makefile b/usr/src/uts/intel/core_pcbe/Makefile
index d9d9b02de0..7f2d1ad8e5 100644
--- a/usr/src/uts/intel/core_pcbe/Makefile
+++ b/usr/src/uts/intel/core_pcbe/Makefile
@@ -34,7 +34,7 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.6.15
OBJECTS = $(CORE_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(CORE_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
SOFTLINKS = pcbe.GenuineIntel.6.23 \
pcbe.GenuineIntel.6.26 \
pcbe.GenuineIntel.6.28 \
@@ -45,7 +45,7 @@ SOFTLINKS = pcbe.GenuineIntel.6.23 \
pcbe.GenuineIntel.6.44 \
pcbe.GenuineIntel.6.46 \
pcbe.GenuineIntel.6.47
-ROOTSOFTLINKS = $(SOFTLINKS:%=$(USR_PCBE_DIR)/%)
+ROOTSOFTLINKS = $(SOFTLINKS:%=$(ROOT_PSM_PCBE_DIR)/%)
#
# Include common rules.
diff --git a/usr/src/uts/intel/dev/Makefile b/usr/src/uts/intel/dev/Makefile
index b5c7c1a9c8..e7ae468c05 100644
--- a/usr/src/uts/intel/dev/Makefile
+++ b/usr/src/uts/intel/dev/Makefile
@@ -66,6 +66,7 @@ INC_PATH += -I$(UTSBASE)/common/io/bpf
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-uninitialized
+CERRWARN += -_gcc=-Wno-unused-function
#
# Default build targets.
diff --git a/usr/src/uts/intel/dr_sas/Makefile b/usr/src/uts/intel/dr_sas/Makefile
new file mode 100644
index 0000000000..f4871b694a
--- /dev/null
+++ b/usr/src/uts/intel/dr_sas/Makefile
@@ -0,0 +1,90 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/intel/dr_sas/Makefile
+#
+# This makefile drives the production of the dr_sas driver kernel module.
+#
+# intel implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = dr_sas
+OBJECTS = $(DR_SAS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DR_SAS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/dr_sas
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Kernel Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/scsi
+
+CERRWARN += -_gcc=-Wno-unused-label
+CERRWARN += -_gcc=-Wno-switch
+CERRWARN += -_gcc=-Wno-uninitialized
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/elfexec/Makefile b/usr/src/uts/intel/elfexec/Makefile
index 9751e04ba7..ce0433391c 100644
--- a/usr/src/uts/intel/elfexec/Makefile
+++ b/usr/src/uts/intel/elfexec/Makefile
@@ -102,12 +102,15 @@ include $(UTSBASE)/intel/Makefile.targ
$(OBJS_DIR)/elf32.o: $(UTSBASE)/common/exec/elf/elf.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf.c
+ $(CTFCONVERT_O)
$(OBJS_DIR)/elf32_notes.o: $(UTSBASE)/common/exec/elf/elf_notes.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf_notes.c
+ $(CTFCONVERT_O)
$(OBJS_DIR)/old32_notes.o: $(UTSBASE)/common/exec/elf/old_notes.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/old_notes.c
+ $(CTFCONVERT_O)
$(LINTS_DIR)/elf32.ln: $(UTSBASE)/common/exec/elf/elf.c
@($(LHEAD) $(LINT.c) -Celf32 -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf.c $(LTAIL))
diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile
new file mode 100644
index 0000000000..ce01dc8610
--- /dev/null
+++ b/usr/src/uts/intel/genassym/Makefile
@@ -0,0 +1,85 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of genassym.h through
+# compile time intialized data.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h
+OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(GENASSYM_H)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Overrides
+#
+CLEANFILES = Nothing_to_remove
+CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+clean.lint:
+
+install: def
+
+#
+# Create genassym.h
+#
+$(GENASSYM_H): $(OFFSETS_SRC)
+ $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in
new file mode 100644
index 0000000000..59763c1b4b
--- /dev/null
+++ b/usr/src/uts/intel/genassym/offsets.in
@@ -0,0 +1,49 @@
+\
+\ CDDL HEADER START
+\
+\ The contents of this file are subject to the terms of the
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
+\
+\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+\ or http://www.opensolaris.org/os/licensing.
+\ See the License for the specific language governing permissions
+\ and limitations under the License.
+\
+\ When distributing Covered Code, include this CDDL HEADER in each
+\ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+\ If applicable, add the following below this CDDL HEADER, with the
+\ fields enclosed by brackets "[]" replaced with your own identifying
+\ information: Portions Copyright [yyyy] [name of copyright owner]
+\
+\ CDDL HEADER END
+\
+\
+\ Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+\ Use is subject to license terms.
+\ Copyright 2014 Joyent, Inc. All rights reserved.
+\
+
+\
+\ offsets.in: input file to produce the architecture-dependent genassym.h
+\ using the ctfstabs program
+\
+
+#ifndef _GENASSYM
+#define _GENASSYM
+#endif
+
+#include <sys/lx_brand.h>
+
+lx_proc_data
+ l_handler
+ l_tracehandler
+ l_traceflag
+
+lx_zone_data
+ lxzd_max_syscall
+
+lx_lwp_data
+ br_ntv_syscall
+ br_lx_fsbase
+ br_ntv_fsbase
diff --git a/usr/src/uts/intel/gsqueue/Makefile b/usr/src/uts/intel/gsqueue/Makefile
new file mode 100644
index 0000000000..411e384309
--- /dev/null
+++ b/usr/src/uts/intel/gsqueue/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = gsqueue
+OBJECTS = $(GSQUEUE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(GSQUEUE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Ndrv/ip
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/hyprlofs/Makefile b/usr/src/uts/intel/hyprlofs/Makefile
new file mode 100644
index 0000000000..919b045617
--- /dev/null
+++ b/usr/src/uts/intel/hyprlofs/Makefile
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/hyprlofs/Makefile
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the hyprlofs file system
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = hyprlofs
+OBJECTS = $(HYPRLOFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(HYPRLOFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c
index d83b16d673..42cc0d4d10 100644
--- a/usr/src/uts/intel/ia32/os/archdep.c
+++ b/usr/src/uts/intel/ia32/os/archdep.c
@@ -632,6 +632,8 @@ getuserpc()
static greg_t
fix_segreg(greg_t sr, int iscs, model_t datamodel)
{
+ kthread_t *t = curthread;
+
switch (sr &= 0xffff) {
case 0:
@@ -667,6 +669,19 @@ fix_segreg(greg_t sr, int iscs, model_t datamodel)
break;
}
+ /*
+ * Allow this process's brand to do any necessary segment register
+ * manipulation.
+ */
+ if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) {
+ greg_t bsr = BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel);
+
+ if (bsr == 0 && iscs == IS_CS)
+ return (0 | SEL_UPL);
+ else
+ return (bsr);
+ }
+
/*
* Force it into the LDT in ring 3 for 32-bit processes, which by
* default do not have an LDT, so that any attempt to use an invalid
diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c
index a05137eee6..97024b7b59 100644
--- a/usr/src/uts/intel/ia32/os/desctbls.c
+++ b/usr/src/uts/intel/ia32/os/desctbls.c
@@ -161,7 +161,7 @@ struct interposing_handler {
* The brand infrastructure interposes on two handlers, and we use one as a
* NULL signpost.
*/
-static struct interposing_handler brand_tbl[2];
+static struct interposing_handler brand_tbl[3];
/*
* software prototypes for default local descriptor table
@@ -976,6 +976,12 @@ init_idt_common(gate_desc_t *idt)
set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
+ /*
+ * install "int80" handler at, well, 0x80.
+ */
+ set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
+ 0);
+
/*
* install fast trap handler at 210.
*/
@@ -1001,21 +1007,27 @@ init_idt_common(gate_desc_t *idt)
SDT_SYSIGT, TRP_UPL, 0);
/*
- * Prepare interposing descriptor for the syscall handler
- * and cache copy of the default descriptor.
+- * Prepare interposing descriptors for the branded "int80"
+- * and syscall handlers and cache copies of the default
+- * descriptors.
*/
- brand_tbl[0].ih_inum = T_SYSCALLINT;
- brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
+ brand_tbl[0].ih_inum = T_INT80;
+ brand_tbl[0].ih_default_desc = idt0[T_INT80];
+ set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
+ SDT_SYSIGT, TRP_UPL, 0);
+
+ brand_tbl[1].ih_inum = T_SYSCALLINT;
+ brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
#if defined(__amd64)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#elif defined(__i386)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#endif /* __i386 */
- brand_tbl[1].ih_inum = 0;
+ brand_tbl[2].ih_inum = 0;
}
#if defined(__xpv)
diff --git a/usr/src/uts/intel/inotify/Makefile b/usr/src/uts/intel/inotify/Makefile
new file mode 100644
index 0000000000..80e7a80404
--- /dev/null
+++ b/usr/src/uts/intel/inotify/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = inotify
+OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR
+CERRWARN += -_gcc=-Wno-parentheses
+LDFLAGS += -dy -Nfs/specfs
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/io/acpica/changes.txt b/usr/src/uts/intel/io/acpica/changes.txt
index f53fd426da..69dcdf708c 100644
--- a/usr/src/uts/intel/io/acpica/changes.txt
+++ b/usr/src/uts/intel/io/acpica/changes.txt
@@ -1,7 +1,588 @@
----------------------------------------
-27 May 2011. Summary of changes for version 20110527:
+20 March 2012. Summary of changes for version 20120320:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+Enhanced the sleep/wake interfaces to optionally execute the _GTS method
+(Going To Sleep) and the _BFS method (Back From Sleep). Windows apparently
+does not execute these methods, and therefore these methods are often
+untested. It has been seen on some systems where the execution of these
+methods causes errors and also prevents the machine from entering S5. It is
+therefore suggested that host operating systems do not execute these methods
+by default. In the future, perhaps these methods can be optionally executed
+based on the age of the system and/or what is the newest version of Windows
+that the BIOS asks for via _OSI. Changed interfaces: AcpiEnterSleepState and
+AcpileaveSleepStatePrep. See the ACPICA reference and Linux BZ 13041. Lin
+Ming.
+
+Fixed a problem where the length of the local/common FADT was set too early.
+The local FADT table length cannot be set to the common length until the
+original length has been examined. There is code that checks the table length
+and sets various fields appropriately. This can affect older machines with
+early FADT versions. For example, this can cause inadvertent writes to the
+CST_CNT register. Julian Anastasov.
+
+Fixed a mapping issue related to a physical table override. Use the deferred
+mapping mechanism for tables loaded via the physical override OSL interface.
+This allows for early mapping before the virtual memory manager is available.
+Thomas Renninger, Bob Moore.
+
+Enhanced the automatic return-object repair code: Repair a common problem with
+predefined methods that are defined to return a variable-length Package of
+sub-objects. If there is only one sub-object, some BIOS ASL code mistakenly
+simply returns the single object instead of a Package with one sub-object.
+This new support will repair this error by wrapping a Package object around
+the original object, creating the correct and expected Package with one sub-
+object. Names that can be repaired in this manner include: _ALR, _CSD, _HPX,
+_MLS, _PLD, _PRT, _PSS, _TRT, _TSS, _BCL, _DOD, _FIX, and _Sx. ACPICA BZ 939.
+
+Changed the exception code returned for invalid ACPI paths passed as
+parameters to external interfaces such as AcpiEvaluateObject. Was
+AE_BAD_PARAMETER, now is the more sensible AE_BAD_PATHNAME.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 93.0K Code, 25.0K Data, 118.0K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+ Current Release:
+ Non-Debug Version: 92.9K Code, 25.0K Data, 117.9K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Added the infrastructure and initial implementation of a integrated C-
+like preprocessor. This will simplify BIOS development process by eliminating
+the need for a separate preprocessing step during builds. On Windows, it also
+eliminates the need to install a separate C compiler. ACPICA BZ 761. Some
+features including full #define() macro support are still under development.
+These preprocessor directives are supported:
+ #define
+ #elif
+ #else
+ #endif
+ #error
+ #if
+ #ifdef
+ #ifndef
+ #include
+ #pragma message
+ #undef
+ #warning
+In addition, these new command line options are supported:
+ -D <symbol> Define symbol for preprocessor use
+ -li Create preprocessed output file (*.i)
+ -P Preprocess only and create preprocessor output file (*.i)
+
+Table Compiler: Fixed a problem where the equals operator within an expression
+did not work properly.
+
+Updated iASL to use the current versions of Bison/Flex. Updated the Windows
+project file to invoke these tools from the standard location. ACPICA BZ 904.
+Versions supported:
+ Flex for Windows: V2.5.4
+ Bison for Windows: V2.4.1
+
+----------------------------------------
+15 February 2012. Summary of changes for version 20120215:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+There have been some major changes to the sleep/wake support code, as
+described below (a - e).
+
+a) The AcpiLeaveSleepState has been split into two interfaces, similar to
+AcpiEnterSleepStatePrep and AcpiEnterSleepState. The new interface is
+AcpiLeaveSleepStatePrep. This allows the host to perform actions between the
+time the _BFS method is called and the _WAK method is called. NOTE: all hosts
+must update their wake/resume code or else sleep/wake will not work properly.
+Rafael Wysocki.
+
+b) In AcpiLeaveSleepState, now enable all runtime GPEs before calling the _WAK
+method. Some machines require that the GPEs are enabled before the _WAK method
+is executed. Thomas Renninger.
+
+c) In AcpiLeaveSleepState, now always clear the WAK_STS (wake status) bit.
+Some BIOS code assumes that WAK_STS will be cleared on resume and use it to
+determine whether the system is rebooting or resuming. Matthew Garrett.
+
+d) Move the invocations of _GTS (Going To Sleep) and _BFS (Back From Sleep) to
+match the ACPI specification requirement. Rafael Wysocki.
+
+e) Implemented full support for the ACPI 5.0 SleepStatus and SleepControl
+registers within the V5 FADT. This support adds two new files:
+hardware/hwesleep.c implements the support for the new registers. Moved all
+sleep/wake external interfaces to hardware/hwxfsleep.c.
+
+
+Added a new OSL interface for ACPI table overrides,
+AcpiOsPhysicalTableOverride. This interface allows the host to override a
+table via a physical address, instead of the logical address required by
+AcpiOsTableOverride. This simplifies the host implementation. Initial
+implementation by Thomas Renninger. The ACPICA implementation creates a single
+shared function for table overrides that attempts both a logical and a
+physical override.
+
+Expanded the OSL memory read/write interfaces to 64-bit data
+(AcpiOsReadMemory, AcpiOsWriteMemory.) This enables full 64-bit memory
+transfer support for GAS register structures passed to AcpiRead and AcpiWrite.
+
+Implemented the ACPI_REDUCED_HARDWARE option to allow the creation of a custom
+build of ACPICA that supports only the ACPI 5.0 reduced hardware (SoC) model.
+See the ACPICA reference for details. ACPICA BZ 942. This option removes about
+10% of the code and 5% of the static data, and the following hardware ACPI
+features become unavailable:
+ PM Event and Control registers
+ SCI interrupt (and handler)
+ Fixed Events
+ General Purpose Events (GPEs)
+ Global Lock
+ ACPI PM timer
+ FACS table (Waking vectors and Global Lock)
+
+Updated the unix tarball directory structure to match the ACPICA git source
+tree. This ensures that the generic unix makefiles work properly (in
+generate/unix). Also updated the Linux makefiles to match. ACPICA BZ 867.
+
+Updated the return value of the _REV predefined method to integer value 5 to
+reflect ACPI 5.0 support.
+
+Moved the external ACPI PM timer interface prototypes to the public acpixf.h
+file where they belong.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 92.8K Code, 24.9K Data, 117.7K Total
+ Debug Version: 171.7K Code, 72.9K Data, 244.5K Total
+ Current Release:
+ Non-Debug Version: 93.0K Code, 25.0K Data, 118.0K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+Disassembler: Fixed a problem with the new ACPI 5.0 serial resource
+descriptors (I2C, SPI, UART) where the resource produce/consumer bit was
+incorrectly displayed.
+
+AcpiHelp: Add display of ACPI/PNP device IDs that are defined in the ACPI
+specification.
+
+----------------------------------------
+11 January 2012. Summary of changes for version 20120111:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+Implemented a new mechanism to allow host device drivers to check for address
+range conflicts with ACPI Operation Regions. Both SystemMemory and SystemIO
+address spaces are supported. A new external interface, AcpiCheckAddressRange,
+allows drivers to check an address range against the ACPI namespace. See the
+ACPICA reference for additional details. Adds one new file,
+utilities/utaddress.c. Lin Ming, Bob Moore.
+
+Fixed several issues with the ACPI 5.0 FADT support: Add the sleep Control and
+Status registers, update the ACPI 5.0 flags, and update internal data
+structures to handle an FADT larger than 256 bytes. The size of the ACPI 5.0
+FADT is 268 bytes.
+
+Updated all ACPICA copyrights and signons to 2012. Added the 2012 copyright to
+all module headers and signons, including the standard Linux header. This
+affects virtually every file in the ACPICA core subsystem, iASL compiler, and
+all ACPICA utilities.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 92.3K Code, 24.9K Data, 117.2K Total
+ Debug Version: 170.8K Code, 72.6K Data, 243.4K Total
+ Current Release:
+ Non-Debug Version: 92.8K Code, 24.9K Data, 117.7K Total
+ Debug Version: 171.7K Code, 72.9K Data, 244.5K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+Disassembler: fixed a problem with the automatic resource tag generation
+support. Fixes a problem where the resource tags are inadvertently not
+constructed if the table being disassembled contains external references to
+control methods. Moved the actual construction of the tags to after the final
+namespace is constructed (after 2nd parse is invoked due to external control
+method references.) ACPICA BZ 941.
+
+Table Compiler: Make all "generic" operators caseless. These are the operators
+like UINT8, String, etc. Making these caseless improves ease-of-use. ACPICA BZ
+934.
+
+----------------------------------------
+23 November 2011. Summary of changes for version 20111123:
+
+0) ACPI 5.0 Support:
+
+This release contains full support for the ACPI 5.0 specification, as
+summarized below.
+
+Reduced Hardware Support:
+-------------------------
+
+This support allows for ACPI systems without the usual ACPI hardware. This
+support is enabled by a flag in the revision 5 FADT. If it is set, ACPICA will
+not attempt to initialize or use any of the usual ACPI hardware. Note, when
+this flag is set, all of the following ACPI hardware is assumed to be not
+present and is not initialized or accessed:
+
+ General Purpose Events (GPEs)
+ Fixed Events (PM1a/PM1b and PM Control)
+ Power Management Timer and Console Buttons (power/sleep)
+ Real-time Clock Alarm
+ Global Lock
+ System Control Interrupt (SCI)
+ The FACS is assumed to be non-existent
+
+ACPI Tables:
+------------
+
+All new tables and updates to existing tables are fully supported in the
+ACPICA headers (for use by device drivers), the disassembler, and the iASL
+Data Table Compiler. ACPI 5.0 defines these new tables:
+
+ BGRT /* Boot Graphics Resource Table */
+ DRTM /* Dynamic Root of Trust for Measurement table */
+ FPDT /* Firmware Performance Data Table */
+ GTDT /* Generic Timer Description Table */
+ MPST /* Memory Power State Table */
+ PCCT /* Platform Communications Channel Table */
+ PMTT /* Platform Memory Topology Table */
+ RASF /* RAS Feature table */
+
+Operation Regions/SpaceIDs:
+---------------------------
+
+All new operation regions are fully supported by the iASL compiler, the
+disassembler, and the ACPICA runtime code (for dispatch to region handlers.)
+The new operation region Space IDs are:
+
+ GeneralPurposeIo
+ GenericSerialBus
+
+Resource Descriptors:
+---------------------
+
+All new ASL resource descriptors are fully supported by the iASL compiler, the
+ASL/AML disassembler, and the ACPICA runtime Resource Manager code (including
+all new predefined resource tags). New descriptors are:
+
+ FixedDma
+ GpioIo
+ GpioInt
+ I2cSerialBus
+ SpiSerialBus
+ UartSerialBus
+
+ASL/AML Operators, New and Modified:
+------------------------------------
+
+One new operator is added, the Connection operator, which is used to associate
+a GeneralPurposeIo or GenericSerialBus resource descriptor with individual
+field objects within an operation region. Several new protocols are associated
+with the AccessAs operator. All are fully supported by the iASL compiler,
+disassembler, and runtime ACPICA AML interpreter:
+
+ Connection // Declare Field Connection attributes
+ AccessAs: AttribBytes (n) // Read/Write N-Bytes Protocol
+ AccessAs: AttribRawBytes (n) // Raw Read/Write N-Bytes Protocol
+ AccessAs: AttribRawProcessBytes (n) // Raw Process Call Protocol
+ RawDataBuffer // Data type for Vendor Data fields
+
+Predefined ASL/AML Objects:
+---------------------------
+
+All new predefined objects/control-methods are supported by the iASL compiler
+and the ACPICA runtime validation/repair (arguments and return values.) New
+predefined names include the following:
+
+Standard Predefined Names (Objects or Control Methods):
+ _AEI, _CLS, _CPC, _CWS, _DEP,
+ _DLM, _EVT, _GCP, _CRT, _GWS,
+ _HRV, _PRE, _PSE, _SRT, _SUB.
+
+Resource Tags (Names used to access individual fields within resource
+descriptors):
+ _DBT, _DPL, _DRS, _END, _FLC,
+ _IOR, _LIN, _MOD, _PAR, _PHA,
+ _PIN, _PPI, _POL, _RXL, _SLV,
+ _SPE, _STB, _TXL, _VEN.
+
+ACPICA External Interfaces:
+---------------------------
+
+Several new interfaces have been defined for use by ACPI-related device
+drivers and other host OS services:
+
+AcpiAcquireMutex and AcpiReleaseMutex: These interfaces allow the host OS to
+acquire and release AML mutexes that are defined in the DSDT/SSDT tables
+provided by the BIOS. They are intended to be used in conjunction with the
+ACPI 5.0 _DLM (Device Lock Method) in order to provide transaction-level
+mutual exclusion with the AML code/interpreter.
+
+AcpiGetEventResources: Returns the (formatted) resource descriptors as defined
+by the ACPI 5.0 _AEI object (ACPI Event Information). This object provides
+resource descriptors associated with hardware-reduced platform events, similar
+to the AcpiGetCurrentResources interface.
+
+Operation Region Handlers: For General Purpose IO and Generic Serial Bus
+operation regions, information about the Connection() object and any optional
+length information is passed to the region handler within the Context
+parameter.
+
+AcpiBufferToResource: This interface converts a raw AML buffer containing a
+resource template or resource descriptor to the ACPI_RESOURCE internal format
+suitable for use by device drivers. Can be used by an operation region handler
+to convert the Connection() buffer object into a ACPI_RESOURCE.
+
+Miscellaneous/Tools/TestSuites:
+-------------------------------
+
+Support for extended _HID names (Four alpha characters instead of three).
+Support for ACPI 5.0 features in the AcpiExec and AcpiHelp utilities.
+Support for ACPI 5.0 features in the ASLTS test suite.
+Fully updated documentation (ACPICA and iASL reference documents.)
+
+ACPI Table Definition Language:
+-------------------------------
+
+Support for this language was implemented and released as a subsystem of the
+iASL compiler in 2010. (See the iASL compiler User Guide.)
+
+
+Non-ACPI 5.0 changes for this release:
+--------------------------------------
+
+1) ACPICA Core Subsystem:
+
+Fix a problem with operation region declarations where a failure can occur if
+the region name and an argument that evaluates to an object (such as the
+region address) are in different namespace scopes. Lin Ming, ACPICA BZ 937.
+
+Do not abort an ACPI table load if an invalid space ID is found within. This
+will be caught later if the offending method is executed. ACPICA BZ 925.
+
+Fixed an issue with the FFixedHW space ID where the ID was not always
+recognized properly (Both ACPICA and iASL). ACPICA BZ 926.
+
+Fixed a problem with the 32-bit generation of the unix-specific OSL
+(osunixxf.c). Lin Ming, ACPICA BZ 936.
+
+Several changes made to enable generation with the GCC 4.6 compiler. ACPICA BZ
+935.
+
+New error messages: Unsupported I/O requests (not 8/16/32 bit), and Index/Bank
+field registers out-of-range.
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Implemented the __PATH__ operator, which returns the full pathname of
+the current source file.
-This release is available at www.acpica.org/downloads
+AcpiHelp: Automatically display expanded keyword information for all ASL
+operators.
+
+Debugger: Add "Template" command to disassemble/dump resource template
+buffers.
+
+Added a new master script to generate and execute the ASLTS test suite.
+Automatically handles 32- and 64-bit generation. See tests/aslts.sh
+
+iASL: Fix problem with listing generation during processing of the Switch()
+operator where AML listing was disabled until the entire Switch block was
+completed.
+
+iASL: Improve support for semicolon statement terminators. Fix "invalid
+character" message for some cases when the semicolon is used. Semicolons are
+now allowed after every <Term> grammar element. ACPICA BZ 927.
+
+iASL: Fixed some possible aliasing warnings during generation. ACPICA BZ 923.
+
+Disassembler: Fix problem with disassembly of the DataTableRegion operator
+where an inadvertent "Unhandled deferred opcode" message could be generated.
+
+3) Example Code and Data Size
+
+These are the sizes for the OS-independent acpica.lib produced by the
+Microsoft Visual C++ 9.0 32-bit compiler. The debug version of the code
+includes the debug output trace mechanism and has a much larger code and data
+size.
+
+ Previous Release:
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release:
+ Non-Debug Version: 92.3K Code, 24.9K Data, 117.2K Total
+ Debug Version: 170.8K Code, 72.6K Data, 243.4K Total
+
+----------------------------------------
+22 September 2011. Summary of changes for version 20110922:
+
+0) ACPI 5.0 News:
+
+Support for ACPI 5.0 in ACPICA has been underway for several months and will
+be released at the same time that ACPI 5.0 is officially released.
+
+The ACPI 5.0 specification is on track for release in the next few months.
+
+1) ACPICA Core Subsystem:
+
+Fixed a problem where the maximum sleep time for the Sleep() operator was
+intended to be limited to two seconds, but was inadvertently limited to 20
+seconds instead.
+
+Linux and Unix makefiles: Added header file dependencies to ensure correct
+generation of ACPICA core code and utilities. Also simplified the makefiles
+considerably through the use of the vpath variable to specify search paths.
+ACPICA BZ 924.
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Implemented support to check the access length for all fields created to
+access named Resource Descriptor fields. For example, if a resource field is
+defined to be two bits, a warning is issued if a CreateXxxxField() is used
+with an incorrect bit length. This is implemented for all current resource
+descriptor names. ACPICA BZ 930.
+
+Disassembler: Fixed a byte ordering problem with the output of 24-bit and 56-
+bit integers.
+
+iASL: Fixed a couple of issues associated with variable-length package
+objects. 1) properly handle constants like One, Ones, Zero -- do not make a
+VAR_PACKAGE when these are used as a package length. 2) Allow the VAR_PACKAGE
+opcode (in addition to PACKAGE) when validating object types for predefined
+names.
+
+iASL: Emit statistics for all output files (instead of just the ASL input and
+AML output). Includes listings, hex files, etc.
+
+iASL: Added -G option to the table compiler to allow the compilation of custom
+ACPI tables. The only part of a table that is required is the standard 36-byte
+ACPI header.
+
+AcpiXtract: Ported to the standard ACPICA environment (with ACPICA headers),
+which also adds correct 64-bit support. Also, now all output filenames are
+completely lower case.
+
+AcpiExec: Ignore any non-AML tables (tables other than DSDT or SSDT) when
+loading table files. A warning is issued for any such tables. The only
+exception is an FADT. This also fixes a possible fault when attempting to load
+non-AML tables. ACPICA BZ 932.
+
+AcpiHelp: Added the AccessAs and Offset operators. Fixed a problem where a
+missing table terminator could cause a fault when using the -p option.
+
+AcpiSrc: Fixed a possible divide-by-zero fault when generating file
+statistics.
+
+3) Example Code and Data Size
+
+These are the sizes for the OS-independent acpica.lib produced by the
+Microsoft Visual C++ 9.0 32-bit compiler. The debug version of the code
+includes the debug output trace mechanism and has a much larger code and data
+size.
+
+ Previous Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+
+
+----------------------------------------
+23 June 2011. Summary of changes for version 20110623:
+
+1) ACPI CA Core Subsystem:
+
+Updated the predefined name repair mechanism to not attempt repair of a _TSS
+return object if a _PSS object is present. We can only sort the _TSS return
+package if there is no _PSS within the same scope. This is because if _PSS is
+present, the ACPI specification dictates that the _TSS Power Dissipation field
+is to be ignored, and therefore some BIOSs leave garbage values in the _TSS
+Power field(s). In this case, it is best to just return the _TSS package as-
+is. Reported by, and fixed with assistance from Fenghua Yu.
+
+Added an option to globally disable the control method return value validation
+and repair. This runtime option can be used to disable return value repair if
+this is causing a problem on a particular machine. Also added an option to
+AcpiExec (-dr) to set this disable flag.
+
+All makefiles and project files: Major changes to improve generation of ACPICA
+tools. ACPICA BZ 912:
+ Reduce default optimization levels to improve compatibility
+ For Linux, add strict-aliasing=0 for gcc 4
+ Cleanup and simplify use of command line defines
+ Cleanup multithread library support
+ Improve usage messages
+
+Linux-specific header: update handling of THREAD_ID and pthread. For the 32-
+bit case, improve casting to eliminate possible warnings, especially with the
+acpica tools.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release (VC 9.0):
+ Non-Debug Version: 90.1K Code, 23.9K Data, 114.0K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+
+2) iASL Compiler/Disassembler and Tools:
+
+With this release, a new utility named "acpihelp" has been added to the ACPICA
+package. This utility summarizes the ACPI specification chapters for the ASL
+and AML languages. It generates under Linux/Unix as well as Windows, and
+provides the following functionality:
+ Find/display ASL operator(s) -- with description and syntax.
+ Find/display ASL keyword(s) -- with exact spelling and descriptions.
+ Find/display ACPI predefined name(s) -- with description, number
+ of arguments, and the return value data type.
+ Find/display AML opcode name(s) -- with opcode, arguments, and grammar.
+ Decode/display AML opcode -- with opcode name, arguments, and grammar.
+
+Service Layers: Make multi-thread support configurable. Conditionally compile
+the multi-thread support so that threading libraries will not be linked if not
+necessary. The only tool that requires multi-thread support is AcpiExec.
+
+iASL: Update yyerrror/AslCompilerError for "const" errors. Newer versions of
+Bison appear to want the interface to yyerror to be a const char * (or at
+least this is a problem when generating iASL on some systems.) ACPICA BZ 923
+Pierre Lejeune.
+
+Tools: Fix for systems where O_BINARY is not defined. Only used for Windows
+versions of the tools.
+
+----------------------------------------
+27 May 2011. Summary of changes for version 20110527:
1) ACPI CA Core Subsystem:
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbcmds.c b/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
index 73387b872c..459cd916ee 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
#include "accommon.h"
#include "acevents.h"
#include "acdebug.h"
+#include "acnamesp.h"
#include "acresrc.h"
#include "actables.h"
@@ -69,6 +70,18 @@ AcpiDmTestResourceConversion (
ACPI_NAMESPACE_NODE *Node,
char *Name);
+static ACPI_STATUS
+AcpiDbResourceCallback (
+ ACPI_RESOURCE *Resource,
+ void *Context);
+
+static ACPI_STATUS
+AcpiDbDeviceResources (
+ ACPI_HANDLE ObjHandle,
+ UINT32 NestingLevel,
+ void *Context,
+ void **ReturnValue);
+
/*******************************************************************************
*
@@ -148,28 +161,49 @@ AcpiDbSleep (
UINT8 SleepState;
+ ACPI_FUNCTION_TRACE (AcpiDbSleep);
+
+
SleepState = (UINT8) ACPI_STRTOUL (ObjectArg, NULL, 0);
AcpiOsPrintf ("**** Prepare to sleep ****\n");
Status = AcpiEnterSleepStatePrep (SleepState);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ goto ErrorExit;
}
AcpiOsPrintf ("**** Going to sleep ****\n");
- Status = AcpiEnterSleepState (SleepState);
+ Status = AcpiEnterSleepState (SleepState, ACPI_NO_OPTIONAL_METHODS);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ goto ErrorExit;
}
- AcpiOsPrintf ("**** returning from sleep ****\n");
+ AcpiOsPrintf ("**** Prepare to return from sleep ****\n");
+ Status = AcpiLeaveSleepStatePrep (SleepState, ACPI_NO_OPTIONAL_METHODS);
+ if (ACPI_FAILURE (Status))
+ {
+ goto ErrorExit;
+ }
+
+ AcpiOsPrintf ("**** Returning from sleep ****\n");
Status = AcpiLeaveSleepState (SleepState);
+ if (ACPI_FAILURE (Status))
+ {
+ goto ErrorExit;
+ }
return (Status);
+
+
+ErrorExit:
+
+ ACPI_EXCEPTION ((AE_INFO, Status, "During sleep test"));
+ return (Status);
}
+
/*******************************************************************************
*
* FUNCTION: AcpiDbDisplayLocks
@@ -339,25 +373,20 @@ AcpiDbSendNotify (
return;
}
- /* Decode Named object type */
+ /* Dispatch the notify if legal */
- switch (Node->Type)
+ if (AcpiEvIsNotifyObject (Node))
{
- case ACPI_TYPE_DEVICE:
- case ACPI_TYPE_THERMAL:
-
- /* Send the notify */
-
Status = AcpiEvQueueNotifyRequest (Node, Value);
if (ACPI_FAILURE (Status))
{
AcpiOsPrintf ("Could not queue notify\n");
}
- break;
-
- default:
- AcpiOsPrintf ("Named object is not a device or a thermal object\n");
- break;
+ }
+ else
+ {
+ AcpiOsPrintf ("Named object [%4.4s] Type %s, must be Device/Thermal/Processor type\n",
+ AcpiUtGetNodeName (Node), AcpiUtGetTypeName (Node->Type));
}
}
@@ -456,6 +485,78 @@ AcpiDbDisplayInterfaces (
/*******************************************************************************
*
+ * FUNCTION: AcpiDbDisplayTemplate
+ *
+ * PARAMETERS: BufferArg - Buffer name or addrss
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump a buffer that contains a resource template
+ *
+ ******************************************************************************/
+
+void
+AcpiDbDisplayTemplate (
+ char *BufferArg)
+{
+ ACPI_NAMESPACE_NODE *Node;
+ ACPI_STATUS Status;
+ ACPI_BUFFER ReturnObj;
+
+
+ /* Translate BufferArg to an Named object */
+
+ Node = AcpiDbConvertToNode (BufferArg);
+ if (!Node || (Node == AcpiGbl_RootNode))
+ {
+ AcpiOsPrintf ("Invalid argument: %s\n", BufferArg);
+ return;
+ }
+
+ /* We must have a buffer object */
+
+ if (Node->Type != ACPI_TYPE_BUFFER)
+ {
+ AcpiOsPrintf ("Not a Buffer object, cannot be a template: %s\n",
+ BufferArg);
+ return;
+ }
+
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+
+ /* Attempt to convert the raw buffer to a resource list */
+
+ Status = AcpiRsCreateResourceList (Node->Object, &ReturnObj);
+
+ AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
+ AcpiDbgLevel |= ACPI_LV_RESOURCES;
+
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not convert Buffer to a resource list: %s, %s\n",
+ BufferArg, AcpiFormatException (Status));
+ goto DumpBuffer;
+ }
+
+ /* Now we can dump the resource list */
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
+ ReturnObj.Pointer));
+
+DumpBuffer:
+ AcpiOsPrintf ("\nRaw data buffer:\n");
+ AcpiUtDumpBuffer ((UINT8 *) Node->Object->Buffer.Pointer,
+ Node->Object->Buffer.Length,
+ DB_BYTE_DISPLAY, ACPI_UINT32_MAX);
+
+ AcpiDbSetOutputDestination (ACPI_DB_CONSOLE_OUTPUT);
+ return;
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiDmCompareAmlResources
*
* PARAMETERS: Aml1Buffer - Contains first resource list
@@ -479,11 +580,14 @@ AcpiDmCompareAmlResources (
{
UINT8 *Aml1;
UINT8 *Aml2;
+ UINT8 *Aml1End;
+ UINT8 *Aml2End;
ACPI_RSDESC_SIZE Aml1Length;
ACPI_RSDESC_SIZE Aml2Length;
ACPI_RSDESC_SIZE Offset = 0;
UINT8 ResourceType;
UINT32 Count = 0;
+ UINT32 i;
/* Compare overall buffer sizes (may be different due to size rounding) */
@@ -491,16 +595,18 @@ AcpiDmCompareAmlResources (
if (Aml1BufferLength != Aml2BufferLength)
{
AcpiOsPrintf (
- "**** Buffer length mismatch in converted AML: original %X new %X ****\n",
+ "**** Buffer length mismatch in converted AML: Original %X, New %X ****\n",
Aml1BufferLength, Aml2BufferLength);
}
Aml1 = Aml1Buffer;
Aml2 = Aml2Buffer;
+ Aml1End = Aml1Buffer + Aml1BufferLength;
+ Aml2End = Aml2Buffer + Aml2BufferLength;
/* Walk the descriptor lists, comparing each descriptor */
- while (Aml1 < (Aml1Buffer + Aml1BufferLength))
+ while ((Aml1 < Aml1End) && (Aml2 < Aml2End))
{
/* Get the lengths of each descriptor */
@@ -513,7 +619,7 @@ AcpiDmCompareAmlResources (
if (Aml1Length != Aml2Length)
{
AcpiOsPrintf (
- "**** Length mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X L1 %X L2 %X ****\n",
+ "**** Length mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X Len1 %X, Len2 %X ****\n",
Count, ResourceType, Offset, Aml1Length, Aml2Length);
}
@@ -524,6 +630,15 @@ AcpiDmCompareAmlResources (
AcpiOsPrintf (
"**** Data mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X ****\n",
Count, ResourceType, Offset);
+
+ for (i = 0; i < Aml1Length; i++)
+ {
+ if (Aml1[i] != Aml2[i])
+ {
+ AcpiOsPrintf ("Mismatch at byte offset %.2X: is %2.2X, should be %2.2X\n",
+ i, Aml2[i], Aml1[i]);
+ }
+ }
}
/* Exit on EndTag descriptor */
@@ -626,160 +741,301 @@ Exit1:
/*******************************************************************************
*
- * FUNCTION: AcpiDbDisplayResources
+ * FUNCTION: AcpiDbResourceCallback
*
- * PARAMETERS: ObjectArg - String with hex value of the object
+ * PARAMETERS: ACPI_WALK_RESOURCE_CALLBACK
*
- * RETURN: None
+ * RETURN: Status
*
- * DESCRIPTION: Display the resource objects associated with a device.
+ * DESCRIPTION: Simple callback to exercise AcpiWalkResources
*
******************************************************************************/
-void
-AcpiDbDisplayResources (
- char *ObjectArg)
+static ACPI_STATUS
+AcpiDbResourceCallback (
+ ACPI_RESOURCE *Resource,
+ void *Context)
+{
+
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDbDeviceResources
+ *
+ * PARAMETERS: ACPI_WALK_CALLBACK
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Display the _PRT/_CRS/_PRS resources for a device object.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiDbDeviceResources (
+ ACPI_HANDLE ObjHandle,
+ UINT32 NestingLevel,
+ void *Context,
+ void **ReturnValue)
{
ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *PrtNode = NULL;
+ ACPI_NAMESPACE_NODE *CrsNode = NULL;
+ ACPI_NAMESPACE_NODE *PrsNode = NULL;
+ ACPI_NAMESPACE_NODE *AeiNode = NULL;
+ char *ParentPath;
ACPI_BUFFER ReturnObj;
+ ACPI_STATUS Status;
- AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
- AcpiDbgLevel |= ACPI_LV_RESOURCES;
+ Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, ObjHandle);
+ ParentPath = AcpiNsGetExternalPathname (Node);
+ if (!ParentPath)
+ {
+ return (AE_NO_MEMORY);
+ }
- /* Convert string to object pointer */
+ /* Get handles to the resource methods for this device */
- Node = AcpiDbConvertToNode (ObjectArg);
- if (!Node)
+ (void) AcpiGetHandle (Node, METHOD_NAME__PRT, ACPI_CAST_PTR (ACPI_HANDLE, &PrtNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__CRS, ACPI_CAST_PTR (ACPI_HANDLE, &CrsNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__PRS, ACPI_CAST_PTR (ACPI_HANDLE, &PrsNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__AEI, ACPI_CAST_PTR (ACPI_HANDLE, &AeiNode));
+ if (!PrtNode && !CrsNode && !PrsNode && !AeiNode)
{
- return;
+ goto Cleanup; /* Nothing to do */
}
+ AcpiOsPrintf ("\nDevice: %s\n", ParentPath);
+
/* Prepare for a return object of arbitrary size */
ReturnObj.Pointer = AcpiGbl_DbBuffer;
ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
/* _PRT */
- AcpiOsPrintf ("Evaluating _PRT\n");
+ if (PrtNode)
+ {
+ AcpiOsPrintf ("Evaluating _PRT\n");
- /* Check if _PRT exists */
+ Status = AcpiEvaluateObject (PrtNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _PRT: %s\n",
+ AcpiFormatException (Status));
+ goto GetCrs;
+ }
- Status = AcpiEvaluateObject (Node, METHOD_NAME__PRT, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _PRT: %s\n",
- AcpiFormatException (Status));
- goto GetCrs;
- }
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ Status = AcpiGetIrqRoutingTable (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("GetIrqRoutingTable failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetCrs;
+ }
- Status = AcpiGetIrqRoutingTable (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("GetIrqRoutingTable failed: %s\n",
- AcpiFormatException (Status));
- goto GetCrs;
+ AcpiRsDumpIrqList (ACPI_CAST_PTR (UINT8, AcpiGbl_DbBuffer));
}
- AcpiRsDumpIrqList (ACPI_CAST_PTR (UINT8, AcpiGbl_DbBuffer));
-
/* _CRS */
GetCrs:
- AcpiOsPrintf ("Evaluating _CRS\n");
+ if (CrsNode)
+ {
+ AcpiOsPrintf ("Evaluating _CRS\n");
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- /* Check if _CRS exists */
+ Status = AcpiEvaluateObject (CrsNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _CRS: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- Status = AcpiEvaluateObject (Node, METHOD_NAME__CRS, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _CRS: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
- }
+ /* This code is here to exercise the AcpiWalkResources interface */
- /* Get the _CRS resource list */
+ Status = AcpiWalkResources (Node, METHOD_NAME__CRS,
+ AcpiDbResourceCallback, NULL);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiWalkResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ /* Get the _CRS resource list */
- Status = AcpiGetCurrentResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("AcpiGetCurrentResources failed: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
- }
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- /* Dump the _CRS resource list */
+ Status = AcpiGetCurrentResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetCurrentResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
- ReturnObj.Pointer));
+ /* Dump the _CRS resource list */
- /*
- * Perform comparison of original AML to newly created AML. This tests both
- * the AML->Resource conversion and the Resource->Aml conversion.
- */
- Status = AcpiDmTestResourceConversion (Node, METHOD_NAME__CRS);
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
+ ReturnObj.Pointer));
- /* Execute _SRS with the resource list */
+ /*
+ * Perform comparison of original AML to newly created AML. This tests both
+ * the AML->Resource conversion and the Resource->Aml conversion.
+ */
+ Status = AcpiDmTestResourceConversion (Node, METHOD_NAME__CRS);
- Status = AcpiSetCurrentResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("AcpiSetCurrentResources failed: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
+ /* Execute _SRS with the resource list */
+
+ Status = AcpiSetCurrentResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiSetCurrentResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
}
/* _PRS */
GetPrs:
- AcpiOsPrintf ("Evaluating _PRS\n");
+ if (PrsNode)
+ {
+ AcpiOsPrintf ("Evaluating _PRS\n");
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiEvaluateObject (PrsNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _PRS: %s\n",
+ AcpiFormatException (Status));
+ goto GetAei;
+ }
- /* Check if _PRS exists */
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- Status = AcpiEvaluateObject (Node, METHOD_NAME__PRS, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _PRS: %s\n",
- AcpiFormatException (Status));
- goto Cleanup;
+ Status = AcpiGetPossibleResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetPossibleResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetAei;
+ }
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
}
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- Status = AcpiGetPossibleResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
+ /* _AEI */
+
+GetAei:
+ if (AeiNode)
{
- AcpiOsPrintf ("AcpiGetPossibleResources failed: %s\n",
- AcpiFormatException (Status));
- goto Cleanup;
+ AcpiOsPrintf ("Evaluating _AEI\n");
+
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiEvaluateObject (AeiNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _AEI: %s\n",
+ AcpiFormatException (Status));
+ goto Cleanup;
+ }
+
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiGetEventResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetEventResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto Cleanup;
+ }
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
}
- AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
Cleanup:
+ ACPI_FREE (ParentPath);
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDbDisplayResources
+ *
+ * PARAMETERS: ObjectArg - String object name or object pointer.
+ * "*" means "display resources for all devices"
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Display the resource objects associated with a device.
+ *
+ ******************************************************************************/
+
+void
+AcpiDbDisplayResources (
+ char *ObjectArg)
+{
+ ACPI_NAMESPACE_NODE *Node;
+
+
+ AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
+ AcpiDbgLevel |= ACPI_LV_RESOURCES;
+
+ /* Asterisk means "display resources for all devices" */
+
+ if (!ACPI_STRCMP (ObjectArg, "*"))
+ {
+ (void) AcpiWalkNamespace (ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, AcpiDbDeviceResources, NULL, NULL, NULL);
+ }
+ else
+ {
+ /* Convert string to object pointer */
+
+ Node = AcpiDbConvertToNode (ObjectArg);
+ if (Node)
+ {
+ if (Node->Type != ACPI_TYPE_DEVICE)
+ {
+ AcpiOsPrintf ("%4.4s: Name is not a device object (%s)\n",
+ Node->Name.Ascii, AcpiUtGetTypeName (Node->Type));
+ }
+ else
+ {
+ (void) AcpiDbDeviceResources (Node, 0, NULL, NULL);
+ }
+ }
+ }
AcpiDbSetOutputDestination (ACPI_DB_CONSOLE_OUTPUT);
- return;
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiDbGenerateGpe
@@ -818,5 +1074,6 @@ AcpiDbGenerateGpe (
(void) AcpiEvGpeDispatch (NULL, GpeEventInfo, GpeNumber);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
#endif /* ACPI_DEBUGGER */
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbdisply.c b/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
index 61e837e751..7facc8dd08 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -90,6 +90,8 @@ static ACPI_ADR_SPACE_TYPE AcpiGbl_SpaceIdList[] =
ACPI_ADR_SPACE_CMOS,
ACPI_ADR_SPACE_PCI_BAR_TARGET,
ACPI_ADR_SPACE_IPMI,
+ ACPI_ADR_SPACE_GPIO,
+ ACPI_ADR_SPACE_GSBUS,
ACPI_ADR_SPACE_DATA_TABLE,
ACPI_ADR_SPACE_FIXED_HARDWARE
};
@@ -105,8 +107,8 @@ typedef struct acpi_handler_info
static ACPI_HANDLER_INFO AcpiGbl_HandlerList[] =
{
- {&AcpiGbl_SystemNotify.Handler, "System Notifications"},
- {&AcpiGbl_DeviceNotify.Handler, "Device Notifications"},
+ {&AcpiGbl_GlobalNotify[0].Handler, "System Notifications"},
+ {&AcpiGbl_GlobalNotify[1].Handler, "Device Notifications"},
{&AcpiGbl_TableHandler, "ACPI Table Events"},
{&AcpiGbl_ExceptionHandler, "Control Method Exceptions"},
{&AcpiGbl_InterfaceHandler, "OSI Invocations"}
@@ -768,6 +770,7 @@ AcpiDbDisplayArgumentObject (
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiDbDisplayGpes
@@ -930,6 +933,7 @@ AcpiDbDisplayGpes (
GpeXruptInfo = GpeXruptInfo->Next;
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -971,7 +975,7 @@ AcpiDbDisplayHandlers (
while (HandlerObj)
{
- if (i == HandlerObj->AddressSpace.SpaceId)
+ if (AcpiGbl_SpaceIdList[i] == HandlerObj->AddressSpace.SpaceId)
{
AcpiOsPrintf (ACPI_HANDLER_PRESENT_STRING,
(HandlerObj->AddressSpace.HandlerFlags &
@@ -989,8 +993,28 @@ AcpiDbDisplayHandlers (
FoundHandler:;
}
+
+ /* Find all handlers for user-defined SpaceIDs */
+
+ HandlerObj = ObjDesc->Device.Handler;
+ while (HandlerObj)
+ {
+ if (HandlerObj->AddressSpace.SpaceId >= ACPI_USER_REGION_BEGIN)
+ {
+ AcpiOsPrintf (ACPI_PREDEFINED_PREFIX,
+ "User-defined ID", HandlerObj->AddressSpace.SpaceId);
+ AcpiOsPrintf (ACPI_HANDLER_PRESENT_STRING,
+ (HandlerObj->AddressSpace.HandlerFlags &
+ ACPI_ADDR_HANDLER_DEFAULT_INSTALLED) ? "Default" : "User",
+ HandlerObj->AddressSpace.Handler);
+ }
+
+ HandlerObj = HandlerObj->AddressSpace.Next;
+ }
}
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Fixed event handlers */
AcpiOsPrintf ("\nFixed Event Handlers:\n");
@@ -1009,6 +1033,8 @@ AcpiDbDisplayHandlers (
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* Miscellaneous global handlers */
AcpiOsPrintf ("\nMiscellaneous Global Handlers:\n");
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbexec.c b/usr/src/uts/intel/io/acpica/debugger/dbexec.c
index bcbc7a8daf..7818c9ddbe 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbexec.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbexec.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbfileio.c b/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
index 8d0ef0aaa8..ef9b7fef53 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -271,10 +271,11 @@ AcpiDbReadTable (
fseek (fp, 0, SEEK_SET);
- /* The RSDT and FACS tables do not have standard ACPI headers */
+ /* The RSDT, FACS and S3PT tables do not have standard ACPI headers */
if (ACPI_COMPARE_NAME (TableHeader.Signature, "RSD ") ||
- ACPI_COMPARE_NAME (TableHeader.Signature, "FACS"))
+ ACPI_COMPARE_NAME (TableHeader.Signature, "FACS") ||
+ ACPI_COMPARE_NAME (TableHeader.Signature, "S3PT"))
{
*TableLength = FileSize;
StandardHeader = FALSE;
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbhistry.c b/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
index 855d6b956f..536b69cdfa 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbinput.c b/usr/src/uts/intel/io/acpica/debugger/dbinput.c
index 1d716cfb1b..0ae313934b 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbinput.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbinput.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -131,6 +131,7 @@ enum AcpiExDebuggerCommands
CMD_STATS,
CMD_STOP,
CMD_TABLES,
+ CMD_TEMPLATE,
CMD_TERMINATE,
CMD_THREADS,
CMD_TRACE,
@@ -199,6 +200,7 @@ static const COMMAND_INFO AcpiGbl_DbCommands[] =
{"STATS", 0},
{"STOP", 0},
{"TABLES", 0},
+ {"TEMPLATE", 1},
{"TERMINATE", 0},
{"THREADS", 3},
{"TRACE", 1},
@@ -268,9 +270,10 @@ AcpiDbDisplayHelp (
AcpiOsPrintf (" Predefined Check all predefined names\n");
AcpiOsPrintf (" Prefix [<NamePath>] Set or Get current execution prefix\n");
AcpiOsPrintf (" References <Addr> Find all references to object at addr\n");
- AcpiOsPrintf (" Resources <Device> Get and display Device resources\n");
+ AcpiOsPrintf (" Resources <DeviceName | *> Display Device resources (* = all devices)\n");
AcpiOsPrintf (" Set N <NamedObject> <Value> Set value for named integer\n");
AcpiOsPrintf (" Sleep <SleepState> Simulate sleep/wake sequence\n");
+ AcpiOsPrintf (" Template <Object> Format/dump a Buffer/ResourceTemplate\n");
AcpiOsPrintf (" Terminate Delete namespace and all internal objects\n");
AcpiOsPrintf (" Type <Object> Display object type\n");
@@ -659,12 +662,15 @@ AcpiDbCommandDispatch (
break;
case CMD_ENABLEACPI:
+#if (!ACPI_REDUCED_HARDWARE)
+
Status = AcpiEnable();
if (ACPI_FAILURE(Status))
{
AcpiOsPrintf("AcpiEnable failed (Status=%X)\n", Status);
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
break;
case CMD_EVENT:
@@ -854,6 +860,10 @@ AcpiDbCommandDispatch (
AcpiDbDisplayTableInfo (AcpiGbl_DbArgs[1]);
break;
+ case CMD_TEMPLATE:
+ AcpiDbDisplayTemplate (AcpiGbl_DbArgs[1]);
+ break;
+
case CMD_TERMINATE:
AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
AcpiUtSubsystemShutdown ();
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbmethod.c b/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
index f0b88ebdb3..0d45d68150 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbnames.c b/usr/src/uts/intel/io/acpica/debugger/dbnames.c
index e4e4280f29..be326c5390 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbnames.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbstats.c b/usr/src/uts/intel/io/acpica/debugger/dbstats.c
index bf17c63932..fcc4998ede 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbstats.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbstats.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -227,8 +227,8 @@ AcpiDbEnumerateObject (
case ACPI_TYPE_DEVICE:
- AcpiDbEnumerateObject (ObjDesc->Device.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->Device.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->Device.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->Device.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->Device.Handler);
break;
@@ -248,21 +248,21 @@ AcpiDbEnumerateObject (
case ACPI_TYPE_POWER:
- AcpiDbEnumerateObject (ObjDesc->PowerResource.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->PowerResource.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->PowerResource.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->PowerResource.NotifyList[1]);
break;
case ACPI_TYPE_PROCESSOR:
- AcpiDbEnumerateObject (ObjDesc->Processor.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->Processor.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->Processor.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->Processor.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->Processor.Handler);
break;
case ACPI_TYPE_THERMAL:
- AcpiDbEnumerateObject (ObjDesc->ThermalZone.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->ThermalZone.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->ThermalZone.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->ThermalZone.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->ThermalZone.Handler);
break;
@@ -522,6 +522,18 @@ AcpiDbDisplayStatistics (
AcpiOsPrintf ("NamespaceNode %3d\n", sizeof (ACPI_NAMESPACE_NODE));
AcpiOsPrintf ("AcpiObject %3d\n", sizeof (ACPI_OBJECT));
+ AcpiOsPrintf ("\n");
+
+ AcpiOsPrintf ("Generic State %3d\n", sizeof (ACPI_GENERIC_STATE));
+ AcpiOsPrintf ("Common State %3d\n", sizeof (ACPI_COMMON_STATE));
+ AcpiOsPrintf ("Control State %3d\n", sizeof (ACPI_CONTROL_STATE));
+ AcpiOsPrintf ("Update State %3d\n", sizeof (ACPI_UPDATE_STATE));
+ AcpiOsPrintf ("Scope State %3d\n", sizeof (ACPI_SCOPE_STATE));
+ AcpiOsPrintf ("Parse Scope %3d\n", sizeof (ACPI_PSCOPE_STATE));
+ AcpiOsPrintf ("Package State %3d\n", sizeof (ACPI_PKG_STATE));
+ AcpiOsPrintf ("Thread State %3d\n", sizeof (ACPI_THREAD_STATE));
+ AcpiOsPrintf ("Result Values %3d\n", sizeof (ACPI_RESULT_VALUES));
+ AcpiOsPrintf ("Notify Info %3d\n", sizeof (ACPI_NOTIFY_INFO));
break;
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbutils.c b/usr/src/uts/intel/io/acpica/debugger/dbutils.c
index 5b071e87b6..cf29b225de 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbutils.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -190,10 +190,7 @@ AcpiDbDumpExternalObject (
case ACPI_TYPE_STRING:
AcpiOsPrintf ("[String] Length %.2X = ", ObjDesc->String.Length);
- for (i = 0; i < ObjDesc->String.Length; i++)
- {
- AcpiOsPrintf ("%c", ObjDesc->String.Pointer[i]);
- }
+ AcpiUtPrintString (ObjDesc->String.Pointer, ACPI_UINT8_MAX);
AcpiOsPrintf ("\n");
break;
@@ -208,7 +205,7 @@ AcpiDbDumpExternalObject (
AcpiOsPrintf ("\n");
}
AcpiUtDumpBuffer (ACPI_CAST_PTR (UINT8, ObjDesc->Buffer.Pointer),
- ObjDesc->Buffer.Length, DB_DWORD_DISPLAY, _COMPONENT);
+ ObjDesc->Buffer.Length, DB_BYTE_DISPLAY, _COMPONENT);
}
else
{
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbxface.c b/usr/src/uts/intel/io/acpica/debugger/dbxface.c
index e638733c22..cd30273434 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbxface.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbxface.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c b/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
index f223f89f43..9f49f92e47 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -108,19 +108,19 @@ AcpiDmDisasmByteList (
}
AcpiDmIndent (Level);
- if (ByteCount > 7)
+ if (ByteCount > 8)
{
- AcpiOsPrintf ("/* %04X */ ", i);
+ AcpiOsPrintf ("/* %04X */ ", i);
}
}
- AcpiOsPrintf ("0x%2.2X", (UINT32) ByteData[i]);
+ AcpiOsPrintf (" 0x%2.2X", (UINT32) ByteData[i]);
/* Add comma if there are more bytes to display */
if (i < (ByteCount -1))
{
- AcpiOsPrintf (", ");
+ AcpiOsPrintf (",");
}
}
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmnames.c b/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
index b00bca8e86..92e67c2e0a 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmobject.c b/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
index 9d5d7dd92c..cb515e9427 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c b/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
index 5da3847714..6d2c3e1ecf 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -303,6 +303,7 @@ AcpiDmDisassembleOneOp (
UINT32 Length;
ACPI_PARSE_OBJECT *Child;
ACPI_STATUS Status;
+ UINT8 *Aml;
if (!Op)
@@ -426,16 +427,19 @@ AcpiDmDisassembleOneOp (
* types of buffers, we have to closely look at the data in the
* buffer to determine the type.
*/
- Status = AcpiDmIsResourceTemplate (Op);
- if (ACPI_SUCCESS (Status))
+ if (!AcpiGbl_NoResourceDisassembly)
{
- Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
- AcpiOsPrintf ("ResourceTemplate");
- break;
- }
- else if (Status == AE_AML_NO_RESOURCE_END_TAG)
- {
- AcpiOsPrintf ("/**** Is ResourceTemplate, but EndTag not at buffer end ****/ ");
+ Status = AcpiDmIsResourceTemplate (Op);
+ if (ACPI_SUCCESS (Status))
+ {
+ Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
+ AcpiOsPrintf ("ResourceTemplate");
+ break;
+ }
+ else if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ AcpiOsPrintf ("/**** Is ResourceTemplate, but EndTag not at buffer end ****/ ");
+ }
}
if (AcpiDmIsUnicodeBuffer (Op))
@@ -495,7 +499,7 @@ AcpiDmDisassembleOneOp (
if (Info->BitOffset % 8 == 0)
{
- AcpiOsPrintf (" Offset (0x%.2X)", ACPI_DIV_8 (Info->BitOffset));
+ AcpiOsPrintf ("Offset (0x%.2X)", ACPI_DIV_8 (Info->BitOffset));
}
else
{
@@ -507,16 +511,59 @@ AcpiDmDisassembleOneOp (
case AML_INT_ACCESSFIELD_OP:
+ case AML_INT_EXTACCESSFIELD_OP:
- AcpiOsPrintf (" AccessAs (%s, ",
- AcpiGbl_AccessTypes [(UINT32) (Op->Common.Value.Integer >> 8) & 0x7]);
+ AcpiOsPrintf ("AccessAs (%s, ",
+ AcpiGbl_AccessTypes [(UINT32) (Op->Common.Value.Integer & 0x7)]);
+
+ AcpiDmDecodeAttribute ((UINT8) (Op->Common.Value.Integer >> 8));
+
+ if (Op->Common.AmlOpcode == AML_INT_EXTACCESSFIELD_OP)
+ {
+ AcpiOsPrintf (" (0x%2.2X)", (unsigned) ((Op->Common.Value.Integer >> 16) & 0xFF));
+ }
- AcpiDmDecodeAttribute ((UINT8) Op->Common.Value.Integer);
AcpiOsPrintf (")");
AcpiDmCommaIfFieldMember (Op);
break;
+ case AML_INT_CONNECTION_OP:
+
+ /*
+ * Two types of Connection() - one with a buffer object, the
+ * other with a namestring that points to a buffer object.
+ */
+ AcpiOsPrintf ("Connection (");
+ Child = Op->Common.Value.Arg;
+
+ if (Child->Common.AmlOpcode == AML_INT_BYTELIST_OP)
+ {
+ AcpiOsPrintf ("\n");
+
+ Aml = Child->Named.Data;
+ Length = (UINT32) Child->Common.Value.Integer;
+
+ Info->Level += 1;
+ Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
+ AcpiDmResourceTemplate (Info, Op->Common.Parent, Aml, Length);
+
+ Info->Level -= 1;
+ AcpiDmIndent (Info->Level);
+ }
+ else
+ {
+ AcpiDmNamestring (Child->Common.Value.Name);
+ }
+
+ AcpiOsPrintf (")");
+ AcpiDmCommaIfFieldMember (Op);
+ AcpiOsPrintf ("\n");
+
+ Op->Common.DisasmFlags |= ACPI_PARSEOP_IGNORE; /* for now, ignore in AcpiDmAscendingOp */
+ Child->Common.DisasmFlags |= ACPI_PARSEOP_IGNORE;
+ break;
+
case AML_INT_BYTELIST_OP:
AcpiDmByteList (Info, Op);
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
index fe208d5260..9c07bf7e94 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,12 +55,6 @@
/* Dispatch tables for Resource disassembly functions */
-typedef
-void (*ACPI_RESOURCE_HANDLER) (
- AML_RESOURCE *Resource,
- UINT32 Length,
- UINT32 Level);
-
static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
{
/* Small descriptors */
@@ -75,7 +69,7 @@ static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
AcpiDmEndDependentDescriptor, /* 0x07, ACPI_RESOURCE_NAME_END_DEPENDENT */
AcpiDmIoDescriptor, /* 0x08, ACPI_RESOURCE_NAME_IO_PORT */
AcpiDmFixedIoDescriptor, /* 0x09, ACPI_RESOURCE_NAME_FIXED_IO_PORT */
- NULL, /* 0x0A, Reserved */
+ AcpiDmFixedDmaDescriptor, /* 0x0A, ACPI_RESOURCE_NAME_FIXED_DMA */
NULL, /* 0x0B, Reserved */
NULL, /* 0x0C, Reserved */
NULL, /* 0x0D, Reserved */
@@ -95,7 +89,10 @@ static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
AcpiDmWordDescriptor, /* 0x08, ACPI_RESOURCE_NAME_WORD_ADDRESS_SPACE */
AcpiDmInterruptDescriptor, /* 0x09, ACPI_RESOURCE_NAME_EXTENDED_XRUPT */
AcpiDmQwordDescriptor, /* 0x0A, ACPI_RESOURCE_NAME_QWORD_ADDRESS_SPACE */
- AcpiDmExtendedDescriptor /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS_SPACE */
+ AcpiDmExtendedDescriptor, /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS_SPACE */
+ AcpiDmGpioDescriptor, /* 0x0C, ACPI_RESOURCE_NAME_GPIO */
+ NULL, /* 0x0D, Reserved */
+ AcpiDmSerialBusDescriptor /* 0x0E, ACPI_RESOURCE_NAME_SERIAL_BUS */
};
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
index 2284928752..ed78b31c70 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1050,4 +1050,3 @@ AcpiDmVendorLargeDescriptor (
}
#endif
-
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c
new file mode 100644
index 0000000000..e79307ee72
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c
@@ -0,0 +1,700 @@
+/*******************************************************************************
+ *
+ * Module Name: dmresrcl2.c - "Large" Resource Descriptor disassembly (#2)
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acdisasm.h"
+
+
+#ifdef ACPI_DISASSEMBLER
+
+#define _COMPONENT ACPI_CA_DEBUGGER
+ ACPI_MODULE_NAME ("dbresrcl2")
+
+/* Local prototypes */
+
+static void
+AcpiDmI2cSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmSpiSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmUartSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmGpioCommon (
+ AML_RESOURCE *Resource,
+ UINT32 Level);
+
+static void
+AcpiDmDumpRawDataBuffer (
+ UINT8 *Buffer,
+ UINT32 Length,
+ UINT32 Level);
+
+
+/* Dispatch table for the serial bus descriptors */
+
+static ACPI_RESOURCE_HANDLER SerialBusResourceDispatch [] =
+{
+ NULL,
+ AcpiDmI2cSerialBusDescriptor,
+ AcpiDmSpiSerialBusDescriptor,
+ AcpiDmUartSerialBusDescriptor
+};
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmDumpRawDataBuffer
+ *
+ * PARAMETERS: Buffer - Pointer to the data bytes
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump a data buffer as a RawDataBuffer() object. Used for
+ * vendor data bytes.
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmDumpRawDataBuffer (
+ UINT8 *Buffer,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 Index;
+ UINT32 i;
+ UINT32 j;
+
+
+ if (!Length)
+ {
+ return;
+ }
+
+ AcpiOsPrintf ("RawDataBuffer (0x%.2X) // Vendor Data", Length);
+
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("{\n");
+ AcpiDmIndent (Level + 2);
+
+ for (i = 0; i < Length;)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ Index = i + j;
+ if (Index >= Length)
+ {
+ goto Finish;
+ }
+
+ AcpiOsPrintf ("0x%2.2X", Buffer[Index]);
+ if ((Index + 1) >= Length)
+ {
+ goto Finish;
+ }
+
+ AcpiOsPrintf (", ");
+ }
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 2);
+
+ i += 8;
+ }
+
+Finish:
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("}");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioCommon
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode common parts of a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioCommon (
+ AML_RESOURCE *Resource,
+ UINT32 Level)
+{
+ UINT32 PinCount;
+ UINT16 *PinList;
+ UINT8 *VendorData;
+ UINT32 i;
+
+
+ /* ResourceSource, ResourceSourceIndex, ResourceType */
+
+ AcpiDmIndent (Level + 1);
+ if (Resource->Gpio.ResSourceOffset)
+ {
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, Resource->Gpio.ResSourceOffset),
+ ACPI_UINT8_MAX);
+ }
+
+ AcpiOsPrintf (", ");
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.ResSourceIndex);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->Gpio.Flags & 1)]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",");
+
+ /* Dump the vendor data */
+
+ if (Resource->Gpio.VendorOffset)
+ {
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ Resource->Gpio.VendorOffset);
+
+ AcpiDmDumpRawDataBuffer (VendorData,
+ Resource->Gpio.VendorLength, Level);
+ }
+
+ AcpiOsPrintf (")\n");
+
+ /* Dump the interrupt list */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("{ // Pin list\n");
+
+ PinCount = ((UINT32) (Resource->Gpio.ResSourceOffset -
+ Resource->Gpio.PinTableOffset)) /
+ sizeof (UINT16);
+
+ PinList = (UINT16 *) ACPI_ADD_PTR (char, Resource,
+ Resource->Gpio.PinTableOffset);
+
+ for (i = 0; i < PinCount; i++)
+ {
+ AcpiDmIndent (Level + 2);
+ AcpiOsPrintf ("0x%4.4X%s\n", PinList[i], ((i + 1) < PinCount) ? "," : "");
+ }
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("}\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioIntDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioIntDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ /* Dump the GpioInt-specific portion of the descriptor */
+
+ /* EdgeLevel, ActiveLevel, Shared */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("GpioInt (%s, %s, %s, ",
+ AcpiGbl_HeDecode [(Resource->Gpio.IntFlags & 1)],
+ AcpiGbl_LlDecode [(Resource->Gpio.IntFlags >> 1) & 1],
+ AcpiGbl_ShrDecode [(Resource->Gpio.IntFlags >> 3) & 1]);
+
+ /* PinConfig, DebounceTimeout */
+
+ if (Resource->Gpio.PinConfig <= 3)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_PpcDecode[Resource->Gpio.PinConfig]);
+ }
+ else
+ {
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.PinConfig);
+ }
+ AcpiOsPrintf ("0x%4.4X,\n", Resource->Gpio.DebounceTimeout);
+
+ /* Dump the GpioInt/GpioIo common portion of the descriptor */
+
+ AcpiDmGpioCommon (Resource, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioIoDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioIoDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ /* Dump the GpioIo-specific portion of the descriptor */
+
+ /* Shared, PinConfig */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("GpioIo (%s, ",
+ AcpiGbl_ShrDecode [(Resource->Gpio.IntFlags >> 3) & 1]);
+
+ if (Resource->Gpio.PinConfig <= 3)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_PpcDecode[Resource->Gpio.PinConfig]);
+ }
+ else
+ {
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.PinConfig);
+ }
+
+ /* DebounceTimeout, DriveStrength, IoRestriction */
+
+ AcpiOsPrintf ("0x%4.4X, ", Resource->Gpio.DebounceTimeout);
+ AcpiOsPrintf ("0x%4.4X, ", Resource->Gpio.DriveStrength);
+ AcpiOsPrintf ("%s,\n",
+ AcpiGbl_IorDecode [Resource->Gpio.IntFlags & 3]);
+
+ /* Dump the GpioInt/GpioIo common portion of the descriptor */
+
+ AcpiDmGpioCommon (Resource, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GpioInt/GpioIo GPIO Interrupt/IO descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmGpioDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT8 ConnectionType;
+
+
+ ConnectionType = Resource->Gpio.ConnectionType;
+
+ switch (ConnectionType)
+ {
+ case AML_RESOURCE_GPIO_TYPE_INT:
+ AcpiDmGpioIntDescriptor (Resource, Length, Level);
+ break;
+
+ case AML_RESOURCE_GPIO_TYPE_IO:
+ AcpiDmGpioIoDescriptor (Resource, Length, Level);
+ break;
+
+ default:
+ AcpiOsPrintf ("Unknown GPIO type\n");
+ break;
+ }
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmDumpSerialBusVendorData
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump optional serial bus vendor data
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmDumpSerialBusVendorData (
+ AML_RESOURCE *Resource,
+ UINT32 Level)
+{
+ UINT8 *VendorData;
+ UINT32 VendorLength;
+
+
+ /* Get the (optional) vendor data and length */
+
+ switch (Resource->CommonSerialBus.Type)
+ {
+ case AML_RESOURCE_I2C_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_I2C_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS));
+ break;
+
+ case AML_RESOURCE_SPI_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_SPI_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS));
+ break;
+
+ case AML_RESOURCE_UART_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_UART_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_UART_SERIALBUS));
+ break;
+
+ default:
+ return;
+ }
+
+ /* Dump the vendor bytes as a RawDataBuffer object */
+
+ AcpiDmDumpRawDataBuffer (VendorData, VendorLength, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmI2cSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a I2C serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmI2cSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* SlaveAddress, SlaveMode, ConnectionSpeed, AddressingMode */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("I2cSerialBus (0x%4.4X, %s, 0x%8.8X,\n",
+ Resource->I2cSerialBus.SlaveAddress,
+ AcpiGbl_SmDecode [(Resource->I2cSerialBus.Flags & 1)],
+ Resource->I2cSerialBus.ConnectionSpeed);
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_AmDecode [(Resource->I2cSerialBus.TypeSpecificFlags & 1)]);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->I2cSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->I2cSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmSpiSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a SPI serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmSpiSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* DeviceSelection, DeviceSelectionPolarity, WireMode, DataBitLength */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("SpiSerialBus (0x%4.4X, %s, %s, 0x%2.2X,\n",
+ Resource->SpiSerialBus.DeviceSelection,
+ AcpiGbl_DpDecode [(Resource->SpiSerialBus.TypeSpecificFlags >> 1) & 1],
+ AcpiGbl_WmDecode [(Resource->SpiSerialBus.TypeSpecificFlags & 1)],
+ Resource->SpiSerialBus.DataBitLength);
+
+ /* SlaveMode, ConnectionSpeed, ClockPolarity, ClockPhase */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, 0x%8.8X, %s,\n",
+ AcpiGbl_SmDecode [(Resource->SpiSerialBus.Flags & 1)],
+ Resource->SpiSerialBus.ConnectionSpeed,
+ AcpiGbl_CpoDecode [(Resource->SpiSerialBus.ClockPolarity & 1)]);
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_CphDecode [(Resource->SpiSerialBus.ClockPhase & 1)]);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->SpiSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->SpiSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmUartSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a UART serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmUartSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* ConnectionSpeed, BitsPerByte, StopBits */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("UartSerialBus (0x%8.8X, %s, %s,\n",
+ Resource->UartSerialBus.DefaultBaudRate,
+ AcpiGbl_BpbDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 4) & 3],
+ AcpiGbl_SbDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 2) & 3]);
+
+ /* LinesInUse, IsBigEndian, Parity, FlowControl */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, %s, %s, %s,\n",
+ Resource->UartSerialBus.LinesEnabled,
+ AcpiGbl_EdDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 7) & 1],
+ AcpiGbl_PtDecode [Resource->UartSerialBus.Parity & 7],
+ AcpiGbl_FcDecode [Resource->UartSerialBus.TypeSpecificFlags & 3]);
+
+ /* ReceiveBufferSize, TransmitBufferSize */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%4.4X, 0x%4.4X, ",
+ Resource->UartSerialBus.RxFifoSize,
+ Resource->UartSerialBus.TxFifoSize);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->UartSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->UartSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a I2C/SPI/UART serial bus descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ SerialBusResourceDispatch [Resource->CommonSerialBus.Type] (
+ Resource, Length, Level);
+}
+
+#endif
+
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
index 52b0fa3ea3..32759d991b 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,9 +121,9 @@ AcpiDmDmaDescriptor (
AcpiDmIndent (Level);
AcpiOsPrintf ("DMA (%s, %s, %s, ",
- AcpiGbl_TypDecode [(Resource->Dma.Flags >> 5) & 3],
- AcpiGbl_BmDecode [(Resource->Dma.Flags >> 2) & 1],
- AcpiGbl_SizDecode [(Resource->Dma.Flags >> 0) & 3]);
+ AcpiGbl_TypDecode [(Resource->Dma.Flags >> 5) & 3],
+ AcpiGbl_BmDecode [(Resource->Dma.Flags >> 2) & 1],
+ AcpiGbl_SizDecode [(Resource->Dma.Flags >> 0) & 3]);
/* Insert a descriptor name */
@@ -137,6 +137,49 @@ AcpiDmDmaDescriptor (
/*******************************************************************************
*
+ * FUNCTION: AcpiDmFixedDmaDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a FixedDMA descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmFixedDmaDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("FixedDMA (0x%4.4X, 0x%4.4X, ",
+ Resource->FixedDma.RequestLines,
+ Resource->FixedDma.Channels);
+
+ if (Resource->FixedDma.Width <= 5)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_DtsDecode [Resource->FixedDma.Width]);
+ }
+ else
+ {
+ AcpiOsPrintf ("%X /* INVALID DMA WIDTH */, ", Resource->FixedDma.Width);
+ }
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiDmIoDescriptor
*
* PARAMETERS: Resource - Pointer to the resource descriptor
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmutils.c b/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
index b3db9312d5..ce3443ec3f 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -135,7 +135,8 @@ const char *AcpiGbl_IrqDecode[] =
*
* RETURN: None
*
- * DESCRIPTION: Decode the AccessAs attribute byte. (Mostly SMBus stuff)
+ * DESCRIPTION: Decode the AccessAs attribute byte. (Mostly SMBus and
+ * GenericSerialBus stuff.)
*
******************************************************************************/
@@ -146,44 +147,61 @@ AcpiDmDecodeAttribute (
switch (Attribute)
{
- case AML_FIELD_ATTRIB_SMB_QUICK:
+ case AML_FIELD_ATTRIB_QUICK:
- AcpiOsPrintf ("SMBQuick");
+ AcpiOsPrintf ("AttribQuick");
break;
- case AML_FIELD_ATTRIB_SMB_SEND_RCV:
+ case AML_FIELD_ATTRIB_SEND_RCV:
- AcpiOsPrintf ("SMBSendReceive");
+ AcpiOsPrintf ("AttribSendReceive");
break;
- case AML_FIELD_ATTRIB_SMB_BYTE:
+ case AML_FIELD_ATTRIB_BYTE:
- AcpiOsPrintf ("SMBByte");
+ AcpiOsPrintf ("AttribByte");
break;
- case AML_FIELD_ATTRIB_SMB_WORD:
+ case AML_FIELD_ATTRIB_WORD:
- AcpiOsPrintf ("SMBWord");
+ AcpiOsPrintf ("AttribWord");
break;
- case AML_FIELD_ATTRIB_SMB_WORD_CALL:
+ case AML_FIELD_ATTRIB_BLOCK:
- AcpiOsPrintf ("SMBProcessCall");
+ AcpiOsPrintf ("AttribBlock");
break;
- case AML_FIELD_ATTRIB_SMB_BLOCK:
+ case AML_FIELD_ATTRIB_MULTIBYTE:
- AcpiOsPrintf ("SMBBlock");
+ AcpiOsPrintf ("AttribBytes");
break;
- case AML_FIELD_ATTRIB_SMB_BLOCK_CALL:
+ case AML_FIELD_ATTRIB_WORD_CALL:
- AcpiOsPrintf ("SMBBlockProcessCall");
+ AcpiOsPrintf ("AttribProcessCall");
+ break;
+
+ case AML_FIELD_ATTRIB_BLOCK_CALL:
+
+ AcpiOsPrintf ("AttribBlockProcessCall");
+ break;
+
+ case AML_FIELD_ATTRIB_RAW_BYTES:
+
+ AcpiOsPrintf ("AttribRawBytes");
+ break;
+
+ case AML_FIELD_ATTRIB_RAW_PROCESS:
+
+ AcpiOsPrintf ("AttribRawProcessBytes");
break;
default:
- AcpiOsPrintf ("0x%.2X", Attribute);
+ /* A ByteConst is allowed by the grammar */
+
+ AcpiOsPrintf ("0x%2.2X", Attribute);
break;
}
}
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c b/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
index 2a4b5e545f..dd3ee00255 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -464,7 +464,8 @@ AcpiDmDescendingOp (
AcpiDmDisassembleOneOp (NULL, Info, Op);
- if (Op->Common.DisasmOpcode == ACPI_DASM_LNOT_PREFIX)
+ if ((Op->Common.DisasmOpcode == ACPI_DASM_LNOT_PREFIX) ||
+ (Op->Common.AmlOpcode == AML_INT_CONNECTION_OP))
{
return (AE_OK);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c b/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
index 87def86051..ea0d7bee88 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -424,7 +424,15 @@ AcpiDsGetRegionArguments (
/* Execute the argument AML */
- Status = AcpiDsExecuteArguments (Node, Node->Parent,
+ Status = AcpiDsExecuteArguments (Node, ExtraDesc->Extra.ScopeNode,
ExtraDesc->Extra.AmlLength, ExtraDesc->Extra.AmlStart);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiUtAddAddressRange (ObjDesc->Region.SpaceId,
+ ObjDesc->Region.Address, ObjDesc->Region.Length,
+ Node);
return_ACPI_STATUS (Status);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c b/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
index 41a44951c2..5c5c51a573 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c b/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
index 886b6c24c5..1c3c523720 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -243,6 +243,7 @@ AcpiDsGetFieldNames (
{
ACPI_STATUS Status;
UINT64 Position;
+ ACPI_PARSE_OBJECT *Child;
ACPI_FUNCTION_TRACE_PTR (DsGetFieldNames, Info);
@@ -257,10 +258,11 @@ AcpiDsGetFieldNames (
while (Arg)
{
/*
- * Three types of field elements are handled:
- * 1) Offset - specifies a bit offset
- * 2) AccessAs - changes the access mode
- * 3) Name - Enters a new named field into the namespace
+ * Four types of field elements are handled:
+ * 1) Name - Enters a new named field into the namespace
+ * 2) Offset - specifies a bit offset
+ * 3) AccessAs - changes the access mode/attributes
+ * 4) Connection - Associate a resource template with the field
*/
switch (Arg->Common.AmlOpcode)
{
@@ -279,24 +281,67 @@ AcpiDsGetFieldNames (
Info->FieldBitPosition = (UINT32) Position;
break;
-
case AML_INT_ACCESSFIELD_OP:
-
+ case AML_INT_EXTACCESSFIELD_OP:
/*
- * Get a new AccessType and AccessAttribute -- to be used for all
- * field units that follow, until field end or another AccessAs
- * keyword.
+ * Get new AccessType, AccessAttribute, and AccessLength fields
+ * -- to be used for all field units that follow, until the
+ * end-of-field or another AccessAs keyword is encountered.
+ * NOTE. These three bytes are encoded in the integer value
+ * of the parseop for convenience.
*
* In FieldFlags, preserve the flag bits other than the
- * ACCESS_TYPE bits
+ * ACCESS_TYPE bits.
*/
+
+ /* AccessType (ByteAcc, WordAcc, etc.) */
+
Info->FieldFlags = (UINT8)
((Info->FieldFlags & ~(AML_FIELD_ACCESS_TYPE_MASK)) |
- ((UINT8) ((UINT32) Arg->Common.Value.Integer >> 8)));
+ ((UINT8) ((UINT32) (Arg->Common.Value.Integer & 0x07))));
+
+ /* AccessAttribute (AttribQuick, AttribByte, etc.) */
+
+ Info->Attribute = (UINT8) ((Arg->Common.Value.Integer >> 8) & 0xFF);
+
+ /* AccessLength (for serial/buffer protocols) */
- Info->Attribute = (UINT8) (Arg->Common.Value.Integer);
+ Info->AccessLength = (UINT8) ((Arg->Common.Value.Integer >> 16) & 0xFF);
break;
+ case AML_INT_CONNECTION_OP:
+ /*
+ * Clear any previous connection. New connection is used for all
+ * fields that follow, similar to AccessAs
+ */
+ Info->ResourceBuffer = NULL;
+ Info->ConnectionNode = NULL;
+
+ /*
+ * A Connection() is either an actual resource descriptor (buffer)
+ * or a named reference to a resource template
+ */
+ Child = Arg->Common.Value.Arg;
+ if (Child->Common.AmlOpcode == AML_INT_BYTELIST_OP)
+ {
+ Info->ResourceBuffer = Child->Named.Data;
+ Info->ResourceLength = (UINT16) Child->Named.Value.Integer;
+ }
+ else
+ {
+ /* Lookup the Connection() namepath, it should already exist */
+
+ Status = AcpiNsLookup (WalkState->ScopeInfo,
+ Child->Common.Value.Name, ACPI_TYPE_ANY,
+ ACPI_IMODE_EXECUTE, ACPI_NS_DONT_OPEN_SCOPE,
+ WalkState, &Info->ConnectionNode);
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_ERROR_NAMESPACE (Child->Common.Value.Name, Status);
+ return_ACPI_STATUS (Status);
+ }
+ }
+ break;
case AML_INT_NAMEDFIELD_OP:
@@ -348,7 +393,6 @@ AcpiDsGetFieldNames (
Info->FieldBitPosition += Info->FieldBitLength;
break;
-
default:
ACPI_ERROR ((AE_INFO,
@@ -406,6 +450,8 @@ AcpiDsCreateField (
}
}
+ ACPI_MEMSET (&Info, 0, sizeof (ACPI_CREATE_FIELD_INFO));
+
/* Second arg is the field flags */
Arg = Arg->Common.Next;
@@ -418,7 +464,6 @@ AcpiDsCreateField (
Info.RegionNode = RegionNode;
Status = AcpiDsGetFieldNames (&Info, WalkState, Arg->Common.Next);
-
return_ACPI_STATUS (Status);
}
@@ -514,8 +559,8 @@ AcpiDsInitFieldObjects (
while (Arg)
{
/*
- * Ignore OFFSET and ACCESSAS terms here; we are only interested in the
- * field names in order to enter them into the namespace.
+ * Ignore OFFSET/ACCESSAS/CONNECTION terms here; we are only interested
+ * in the field names in order to enter them into the namespace.
*/
if (Arg->Common.AmlOpcode == AML_INT_NAMEDFIELD_OP)
{
@@ -697,7 +742,6 @@ AcpiDsCreateIndexField (
Info.RegionNode = RegionNode;
Status = AcpiDsGetFieldNames (&Info, WalkState, Arg->Common.Next);
-
return_ACPI_STATUS (Status);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c b/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
index 154e5de34f..1c7ad88492 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c b/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
index 1cfa1c6853..670382ac9b 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c b/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
index 74ee4bdf31..b4509e7710 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c b/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
index 69888505c6..b3567766a2 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c b/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
index 45fbed1e62..7569236f2e 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c b/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
index 278009795c..66431aad53 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c b/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
index 7983787958..5732855bbe 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswload.c b/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
index d8e77a6538..d40f911f41 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c b/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
index 608d525b36..27e3176209 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c b/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
index 9842b85b21..a59fdc9dca 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c b/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
index f97ccadb60..dd11a18130 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/events/evevent.c b/usr/src/uts/intel/io/acpica/events/evevent.c
index 0f5785a634..b5a7acde72 100644
--- a/usr/src/uts/intel/io/acpica/events/evevent.c
+++ b/usr/src/uts/intel/io/acpica/events/evevent.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,6 +48,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evevent")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -81,6 +83,13 @@ AcpiEvInitializeEvents (
ACPI_FUNCTION_TRACE (EvInitializeEvents);
+ /* If Hardware Reduced flag is set, there are no fixed events */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/*
* Initialize the Fixed and General Purpose Events. This is done prior to
* enabling SCIs to prevent interrupts from occurring before the handlers
@@ -128,6 +137,13 @@ AcpiEvInstallXruptHandlers (
ACPI_FUNCTION_TRACE (EvInstallXruptHandlers);
+ /* If Hardware Reduced flag is set, there is no ACPI h/w */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/* Install the SCI handler */
Status = AcpiEvInstallSciHandler ();
@@ -315,4 +331,6 @@ AcpiEvFixedEventDispatch (
AcpiGbl_FixedEventHandlers[Event].Context));
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
diff --git a/usr/src/uts/intel/io/acpica/events/evglock.c b/usr/src/uts/intel/io/acpica/events/evglock.c
index cab444d06d..f40d129508 100644
--- a/usr/src/uts/intel/io/acpica/events/evglock.c
+++ b/usr/src/uts/intel/io/acpica/events/evglock.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evglock")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/* Local prototypes */
@@ -79,6 +80,13 @@ AcpiEvInitGlobalLockHandler (
ACPI_FUNCTION_TRACE (EvInitGlobalLockHandler);
+ /* If Hardware Reduced flag is set, there is no global lock */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/* Attempt installation of the global lock handler */
Status = AcpiInstallFixedEventHandler (ACPI_EVENT_GLOBAL,
@@ -365,3 +373,5 @@ AcpiEvReleaseGlobalLock (
AcpiOsReleaseMutex (AcpiGbl_GlobalLockMutex->Mutex.OsMutex);
return_ACPI_STATUS (Status);
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpe.c b/usr/src/uts/intel/io/acpica/events/evgpe.c
index c4326c721b..a3781a55c6 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpe.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpe.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static void ACPI_SYSTEM_XFACE
@@ -826,3 +828,4 @@ AcpiEvGpeDispatch (
return_UINT32 (ACPI_INTERRUPT_HANDLED);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeblk.c b/usr/src/uts/intel/io/acpica/events/evgpeblk.c
index b8b7af254e..c281855c2c 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeblk.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeblk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpeblk")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -545,3 +547,4 @@ AcpiEvInitializeGpeBlock (
return_ACPI_STATUS (AE_OK);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeinit.c b/usr/src/uts/intel/io/acpica/events/evgpeinit.c
index f1d38ed853..f8962e947f 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeinit.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,7 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpeinit")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*
* Note: History of _PRW support in ACPICA
@@ -457,3 +458,5 @@ AcpiEvMatchGpeMethod (
Name, GpeNumber));
return_ACPI_STATUS (AE_OK);
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeutil.c b/usr/src/uts/intel/io/acpica/events/evgpeutil.c
index e706fbb7c7..70580cd853 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeutil.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeutil.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,6 @@
* POSSIBILITY OF SUCH DAMAGES.
*/
-
#include "acpi.h"
#include "accommon.h"
#include "acevents.h"
@@ -50,6 +49,7 @@
ACPI_MODULE_NAME ("evgpeutil")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiEvWalkGpeList
@@ -422,3 +422,4 @@ AcpiEvDeleteGpeHandlers (
return_ACPI_STATUS (AE_OK);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evmisc.c b/usr/src/uts/intel/io/acpica/events/evmisc.c
index 1667f075db..68f751aca3 100644
--- a/usr/src/uts/intel/io/acpica/events/evmisc.c
+++ b/usr/src/uts/intel/io/acpica/events/evmisc.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -111,105 +111,82 @@ AcpiEvQueueNotifyRequest (
UINT32 NotifyValue)
{
ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_OPERAND_OBJECT *HandlerObj = NULL;
- ACPI_GENERIC_STATE *NotifyInfo;
+ ACPI_OPERAND_OBJECT *HandlerListHead = NULL;
+ ACPI_GENERIC_STATE *Info;
+ UINT8 HandlerListId = 0;
ACPI_STATUS Status = AE_OK;
ACPI_FUNCTION_NAME (EvQueueNotifyRequest);
- /*
- * For value 3 (Ejection Request), some device method may need to be run.
- * For value 2 (Device Wake) if _PRW exists, the _PS0 method may need
- * to be run.
- * For value 0x80 (Status Change) on the power button or sleep button,
- * initiate soft-off or sleep operation?
- */
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Dispatching Notify on [%4.4s] Node %p Value 0x%2.2X (%s)\n",
- AcpiUtGetNodeName (Node), Node, NotifyValue,
- AcpiUtGetNotifyName (NotifyValue)));
-
- /* Get the notify object attached to the NS Node */
+ /* Are Notifies allowed on this object? */
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (ObjDesc)
+ if (!AcpiEvIsNotifyObject (Node))
{
- /* We have the notify object, Get the right handler */
-
- switch (Node->Type)
- {
- /* Notify allowed only on these types */
+ return (AE_TYPE);
+ }
- case ACPI_TYPE_DEVICE:
- case ACPI_TYPE_THERMAL:
- case ACPI_TYPE_PROCESSOR:
+ /* Get the correct notify list type (System or Device) */
- if (NotifyValue <= ACPI_MAX_SYS_NOTIFY)
- {
- HandlerObj = ObjDesc->CommonNotify.SystemNotify;
- }
- else
- {
- HandlerObj = ObjDesc->CommonNotify.DeviceNotify;
- }
- break;
+ if (NotifyValue <= ACPI_MAX_SYS_NOTIFY)
+ {
+ HandlerListId = ACPI_SYSTEM_HANDLER_LIST;
+ }
+ else
+ {
+ HandlerListId = ACPI_DEVICE_HANDLER_LIST;
+ }
- default:
+ /* Get the notify object attached to the namespace Node */
- /* All other types are not supported */
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (ObjDesc)
+ {
+ /* We have an attached object, Get the correct handler list */
- return (AE_TYPE);
- }
+ HandlerListHead = ObjDesc->CommonNotify.NotifyList[HandlerListId];
}
/*
- * If there is any handler to run, schedule the dispatcher.
- * Check for:
- * 1) Global system notify handler
- * 2) Global device notify handler
- * 3) Per-device notify handler
+ * If there is no notify handler (Global or Local)
+ * for this object, just ignore the notify
*/
- if ((AcpiGbl_SystemNotify.Handler &&
- (NotifyValue <= ACPI_MAX_SYS_NOTIFY)) ||
- (AcpiGbl_DeviceNotify.Handler &&
- (NotifyValue > ACPI_MAX_SYS_NOTIFY)) ||
- HandlerObj)
+ if (!AcpiGbl_GlobalNotify[HandlerListId].Handler && !HandlerListHead)
{
- NotifyInfo = AcpiUtCreateGenericState ();
- if (!NotifyInfo)
- {
- return (AE_NO_MEMORY);
- }
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "No notify handler for Notify, ignoring (%4.4s, %X) node %p\n",
+ AcpiUtGetNodeName (Node), NotifyValue, Node));
- if (!HandlerObj)
- {
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Executing system notify handler for Notify (%4.4s, %X) "
- "node %p\n",
- AcpiUtGetNodeName (Node), NotifyValue, Node));
- }
+ return (AE_OK);
+ }
- NotifyInfo->Common.DescriptorType = ACPI_DESC_TYPE_STATE_NOTIFY;
- NotifyInfo->Notify.Node = Node;
- NotifyInfo->Notify.Value = (UINT16) NotifyValue;
- NotifyInfo->Notify.HandlerObj = HandlerObj;
+ /* Setup notify info and schedule the notify dispatcher */
- Status = AcpiOsExecute (
- OSL_NOTIFY_HANDLER, AcpiEvNotifyDispatch, NotifyInfo);
- if (ACPI_FAILURE (Status))
- {
- AcpiUtDeleteGenericState (NotifyInfo);
- }
- }
- else
+ Info = AcpiUtCreateGenericState ();
+ if (!Info)
{
- /* There is no notify handler (per-device or system) for this device */
+ return (AE_NO_MEMORY);
+ }
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "No notify handler for Notify (%4.4s, %X) node %p\n",
- AcpiUtGetNodeName (Node), NotifyValue, Node));
+ Info->Common.DescriptorType = ACPI_DESC_TYPE_STATE_NOTIFY;
+
+ Info->Notify.Node = Node;
+ Info->Notify.Value = (UINT16) NotifyValue;
+ Info->Notify.HandlerListId = HandlerListId;
+ Info->Notify.HandlerListHead = HandlerListHead;
+ Info->Notify.Global = &AcpiGbl_GlobalNotify[HandlerListId];
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "Dispatching Notify on [%4.4s] (%s) Value 0x%2.2X (%s) Node %p\n",
+ AcpiUtGetNodeName (Node), AcpiUtGetTypeName (Node->Type),
+ NotifyValue, AcpiUtGetNotifyName (NotifyValue), Node));
+
+ Status = AcpiOsExecute (OSL_NOTIFY_HANDLER, AcpiEvNotifyDispatch,
+ Info);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiUtDeleteGenericState (Info);
}
return (Status);
@@ -233,64 +210,41 @@ static void ACPI_SYSTEM_XFACE
AcpiEvNotifyDispatch (
void *Context)
{
- ACPI_GENERIC_STATE *NotifyInfo = (ACPI_GENERIC_STATE *) Context;
- ACPI_NOTIFY_HANDLER GlobalHandler = NULL;
- void *GlobalContext = NULL;
+ ACPI_GENERIC_STATE *Info = (ACPI_GENERIC_STATE *) Context;
ACPI_OPERAND_OBJECT *HandlerObj;
ACPI_FUNCTION_ENTRY ();
- /*
- * We will invoke a global notify handler if installed. This is done
- * _before_ we invoke the per-device handler attached to the device.
- */
- if (NotifyInfo->Notify.Value <= ACPI_MAX_SYS_NOTIFY)
- {
- /* Global system notification handler */
-
- if (AcpiGbl_SystemNotify.Handler)
- {
- GlobalHandler = AcpiGbl_SystemNotify.Handler;
- GlobalContext = AcpiGbl_SystemNotify.Context;
- }
- }
- else
- {
- /* Global driver notification handler */
+ /* Invoke a global notify handler if installed */
- if (AcpiGbl_DeviceNotify.Handler)
- {
- GlobalHandler = AcpiGbl_DeviceNotify.Handler;
- GlobalContext = AcpiGbl_DeviceNotify.Context;
- }
- }
-
- /* Invoke the system handler first, if present */
-
- if (GlobalHandler)
+ if (Info->Notify.Global->Handler)
{
- GlobalHandler (NotifyInfo->Notify.Node, NotifyInfo->Notify.Value,
- GlobalContext);
+ Info->Notify.Global->Handler (Info->Notify.Node,
+ Info->Notify.Value,
+ Info->Notify.Global->Context);
}
- /* Now invoke the per-device handler, if present */
+ /* Now invoke the local notify handler(s) if any are installed */
- HandlerObj = NotifyInfo->Notify.HandlerObj;
- if (HandlerObj)
+ HandlerObj = Info->Notify.HandlerListHead;
+ while (HandlerObj)
{
- HandlerObj->Notify.Handler (NotifyInfo->Notify.Node,
- NotifyInfo->Notify.Value,
+ HandlerObj->Notify.Handler (Info->Notify.Node,
+ Info->Notify.Value,
HandlerObj->Notify.Context);
+
+ HandlerObj = HandlerObj->Notify.Next[Info->Notify.HandlerListId];
}
/* All done with the info object */
- AcpiUtDeleteGenericState (NotifyInfo);
+ AcpiUtDeleteGenericState (Info);
}
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
* FUNCTION: AcpiEvTerminate
@@ -370,3 +324,5 @@ AcpiEvTerminate (
}
return_VOID;
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evregion.c b/usr/src/uts/intel/io/acpica/events/evregion.c
index 193eb07b92..8c1a8ebbae 100644
--- a/usr/src/uts/intel/io/acpica/events/evregion.c
+++ b/usr/src/uts/intel/io/acpica/events/evregion.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -372,6 +372,7 @@ Cleanup1:
* FUNCTION: AcpiEvAddressSpaceDispatch
*
* PARAMETERS: RegionObj - Internal region object
+ * FieldObj - Corresponding field. Can be NULL.
* Function - Read or Write operation
* RegionOffset - Where in the region to read or write
* BitWidth - Field width in bits (8, 16, 32, or 64)
@@ -388,6 +389,7 @@ Cleanup1:
ACPI_STATUS
AcpiEvAddressSpaceDispatch (
ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *FieldObj,
UINT32 Function,
UINT32 RegionOffset,
UINT32 BitWidth,
@@ -399,6 +401,7 @@ AcpiEvAddressSpaceDispatch (
ACPI_OPERAND_OBJECT *HandlerDesc;
ACPI_OPERAND_OBJECT *RegionObj2;
void *RegionContext = NULL;
+ ACPI_CONNECTION_INFO *Context;
ACPI_FUNCTION_TRACE (EvAddressSpaceDispatch);
@@ -423,6 +426,8 @@ AcpiEvAddressSpaceDispatch (
return_ACPI_STATUS (AE_NOT_EXIST);
}
+ Context = HandlerDesc->AddressSpace.Context;
+
/*
* It may be the case that the region has never been initialized.
* Some types of regions require special init code
@@ -450,7 +455,7 @@ AcpiEvAddressSpaceDispatch (
AcpiExExitInterpreter ();
Status = RegionSetup (RegionObj, ACPI_REGION_ACTIVATE,
- HandlerDesc->AddressSpace.Context, &RegionContext);
+ Context, &RegionContext);
/* Re-enter the interpreter */
@@ -499,6 +504,27 @@ AcpiEvAddressSpaceDispatch (
ACPI_FORMAT_NATIVE_UINT (RegionObj->Region.Address + RegionOffset),
AcpiUtGetRegionName (RegionObj->Region.SpaceId)));
+
+ /*
+ * Special handling for GenericSerialBus and GeneralPurposeIo:
+ * There are three extra parameters that must be passed to the
+ * handler via the context:
+ * 1) Connection buffer, a resource template from Connection() op.
+ * 2) Length of the above buffer.
+ * 3) Actual access length from the AccessAs() op.
+ */
+ if (((RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS) ||
+ (RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GPIO)) &&
+ Context &&
+ FieldObj)
+ {
+ /* Get the Connection (ResourceTemplate) buffer */
+
+ Context->Connection = FieldObj->Field.ResourceBuffer;
+ Context->Length = FieldObj->Field.ResourceLength;
+ Context->AccessLength = FieldObj->Field.AccessLength;
+ }
+
if (!(HandlerDesc->AddressSpace.HandlerFlags &
ACPI_ADDR_HANDLER_DEFAULT_INSTALLED))
{
@@ -514,7 +540,7 @@ AcpiEvAddressSpaceDispatch (
Status = Handler (Function,
(RegionObj->Region.Address + RegionOffset), BitWidth, Value,
- HandlerDesc->AddressSpace.Context, RegionObj2->Extra.RegionContext);
+ Context, RegionObj2->Extra.RegionContext);
if (ACPI_FAILURE (Status))
{
diff --git a/usr/src/uts/intel/io/acpica/events/evrgnini.c b/usr/src/uts/intel/io/acpica/events/evrgnini.c
index 496e40e1aa..92946b5aaa 100644
--- a/usr/src/uts/intel/io/acpica/events/evrgnini.c
+++ b/usr/src/uts/intel/io/acpica/events/evrgnini.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/events/evsci.c b/usr/src/uts/intel/io/acpica/events/evsci.c
index 157c184f2b..c47cccc8db 100644
--- a/usr/src/uts/intel/io/acpica/events/evsci.c
+++ b/usr/src/uts/intel/io/acpica/events/evsci.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evsci")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static UINT32 ACPI_SYSTEM_XFACE
@@ -204,4 +206,4 @@ AcpiEvRemoveSciHandler (
return_ACPI_STATUS (Status);
}
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxface.c b/usr/src/uts/intel/io/acpica/events/evxface.c
index 8a903a6b47..1b709a5787 100644
--- a/usr/src/uts/intel/io/acpica/events/evxface.c
+++ b/usr/src/uts/intel/io/acpica/events/evxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,337 @@
/*******************************************************************************
*
+ * FUNCTION: AcpiInstallNotifyHandler
+ *
+ * PARAMETERS: Device - The device for which notifies will be handled
+ * HandlerType - The type of handler:
+ * ACPI_SYSTEM_NOTIFY: System Handler (00-7F)
+ * ACPI_DEVICE_NOTIFY: Device Handler (80-FF)
+ * ACPI_ALL_NOTIFY: Both System and Device
+ * Handler - Address of the handler
+ * Context - Value passed to the handler on each GPE
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Install a handler for notifications on an ACPI Device,
+ * ThermalZone, or Processor object.
+ *
+ * NOTES: The Root namespace object may have only one handler for each
+ * type of notify (System/Device). Device/Thermal/Processor objects
+ * may have one device notify handler, and multiple system notify
+ * handlers.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiInstallNotifyHandler (
+ ACPI_HANDLE Device,
+ UINT32 HandlerType,
+ ACPI_NOTIFY_HANDLER Handler,
+ void *Context)
+{
+ ACPI_NAMESPACE_NODE *Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, Device);
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_OPERAND_OBJECT *HandlerObj;
+ ACPI_STATUS Status;
+ UINT32 i;
+
+
+ ACPI_FUNCTION_TRACE (AcpiInstallNotifyHandler);
+
+
+ /* Parameter validation */
+
+ if ((!Device) || (!Handler) || (!HandlerType) ||
+ (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * Root Object:
+ * Registering a notify handler on the root object indicates that the
+ * caller wishes to receive notifications for all objects. Note that
+ * only one global handler can be registered per notify type.
+ * Ensure that a handler is not already installed.
+ */
+ if (Device == ACPI_ROOT_OBJECT)
+ {
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ if (AcpiGbl_GlobalNotify[i].Handler)
+ {
+ Status = AE_ALREADY_EXISTS;
+ goto UnlockAndExit;
+ }
+
+ AcpiGbl_GlobalNotify[i].Handler = Handler;
+ AcpiGbl_GlobalNotify[i].Context = Context;
+ }
+ }
+
+ goto UnlockAndExit; /* Global notify handler installed, all done */
+ }
+
+ /*
+ * All Other Objects:
+ * Caller will only receive notifications specific to the target
+ * object. Note that only certain object types are allowed to
+ * receive notifications.
+ */
+
+ /* Are Notifies allowed on this object? */
+
+ if (!AcpiEvIsNotifyObject (Node))
+ {
+ Status = AE_TYPE;
+ goto UnlockAndExit;
+ }
+
+ /* Check for an existing internal object, might not exist */
+
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (!ObjDesc)
+ {
+ /* Create a new object */
+
+ ObjDesc = AcpiUtCreateInternalObject (Node->Type);
+ if (!ObjDesc)
+ {
+ Status = AE_NO_MEMORY;
+ goto UnlockAndExit;
+ }
+
+ /* Attach new object to the Node, remove local reference */
+
+ Status = AcpiNsAttachObject (Device, ObjDesc, Node->Type);
+ AcpiUtRemoveReference (ObjDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ goto UnlockAndExit;
+ }
+ }
+
+ /* Ensure that the handler is not already installed in the lists */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj = ObjDesc->CommonNotify.NotifyList[i];
+ while (HandlerObj)
+ {
+ if (HandlerObj->Notify.Handler == Handler)
+ {
+ Status = AE_ALREADY_EXISTS;
+ goto UnlockAndExit;
+ }
+
+ HandlerObj = HandlerObj->Notify.Next[i];
+ }
+ }
+ }
+
+ /* Create and populate a new notify handler object */
+
+ HandlerObj = AcpiUtCreateInternalObject (ACPI_TYPE_LOCAL_NOTIFY);
+ if (!HandlerObj)
+ {
+ Status = AE_NO_MEMORY;
+ goto UnlockAndExit;
+ }
+
+ HandlerObj->Notify.Node = Node;
+ HandlerObj->Notify.HandlerType = HandlerType;
+ HandlerObj->Notify.Handler = Handler;
+ HandlerObj->Notify.Context = Context;
+
+ /* Install the handler at the list head(s) */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj->Notify.Next[i] =
+ ObjDesc->CommonNotify.NotifyList[i];
+
+ ObjDesc->CommonNotify.NotifyList[i] = HandlerObj;
+ }
+ }
+
+ /* Add an extra reference if handler was installed in both lists */
+
+ if (HandlerType == ACPI_ALL_NOTIFY)
+ {
+ AcpiUtAddReference (HandlerObj);
+ }
+
+
+UnlockAndExit:
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiInstallNotifyHandler)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiRemoveNotifyHandler
+ *
+ * PARAMETERS: Device - The device for which the handler is installed
+ * HandlerType - The type of handler:
+ * ACPI_SYSTEM_NOTIFY: System Handler (00-7F)
+ * ACPI_DEVICE_NOTIFY: Device Handler (80-FF)
+ * ACPI_ALL_NOTIFY: Both System and Device
+ * Handler - Address of the handler
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Remove a handler for notifies on an ACPI device
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiRemoveNotifyHandler (
+ ACPI_HANDLE Device,
+ UINT32 HandlerType,
+ ACPI_NOTIFY_HANDLER Handler)
+{
+ ACPI_NAMESPACE_NODE *Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, Device);
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_OPERAND_OBJECT *HandlerObj;
+ ACPI_OPERAND_OBJECT *PreviousHandlerObj;
+ ACPI_STATUS Status;
+ UINT32 i;
+
+
+ ACPI_FUNCTION_TRACE (AcpiRemoveNotifyHandler);
+
+
+ /* Parameter validation */
+
+ if ((!Device) || (!Handler) || (!HandlerType) ||
+ (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
+#ifdef _UNDER_DEVELOPMENT
+ /* Make sure all deferred tasks are completed */
+
+ AcpiOsWaitEventsComplete (NULL);
+#endif
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Root Object. Global handlers are removed here */
+
+ if (Device == ACPI_ROOT_OBJECT)
+ {
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ if (!AcpiGbl_GlobalNotify[i].Handler ||
+ (AcpiGbl_GlobalNotify[i].Handler != Handler))
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "Removing global notify handler\n"));
+
+ AcpiGbl_GlobalNotify[i].Handler = NULL;
+ AcpiGbl_GlobalNotify[i].Context = NULL;
+ }
+ }
+
+ goto UnlockAndExit;
+ }
+
+ /* All other objects: Are Notifies allowed on this object? */
+
+ if (!AcpiEvIsNotifyObject (Node))
+ {
+ Status = AE_TYPE;
+ goto UnlockAndExit;
+ }
+
+ /* Must have an existing internal object */
+
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (!ObjDesc)
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ /* Internal object exists. Find the handler and remove it */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj = ObjDesc->CommonNotify.NotifyList[i];
+ PreviousHandlerObj = NULL;
+
+ /* Attempt to find the handler in the handler list */
+
+ while (HandlerObj &&
+ (HandlerObj->Notify.Handler != Handler))
+ {
+ PreviousHandlerObj = HandlerObj;
+ HandlerObj = HandlerObj->Notify.Next[i];
+ }
+
+ if (!HandlerObj)
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ /* Remove the handler object from the list */
+
+ if (PreviousHandlerObj) /* Handler is not at the list head */
+ {
+ PreviousHandlerObj->Notify.Next[i] =
+ HandlerObj->Notify.Next[i];
+ }
+ else /* Handler is at the list head */
+ {
+ ObjDesc->CommonNotify.NotifyList[i] =
+ HandlerObj->Notify.Next[i];
+ }
+
+ AcpiUtRemoveReference (HandlerObj);
+ }
+ }
+
+
+UnlockAndExit:
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiRemoveNotifyHandler)
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiInstallExceptionHandler
*
* PARAMETERS: Handler - Pointer to the handler function for the
@@ -103,6 +434,7 @@ Cleanup:
ACPI_EXPORT_SYMBOL (AcpiInstallExceptionHandler)
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiInstallGlobalEventHandler
@@ -307,357 +639,6 @@ ACPI_EXPORT_SYMBOL (AcpiRemoveFixedEventHandler)
/*******************************************************************************
*
- * FUNCTION: AcpiInstallNotifyHandler
- *
- * PARAMETERS: Device - The device for which notifies will be handled
- * HandlerType - The type of handler:
- * ACPI_SYSTEM_NOTIFY: SystemHandler (00-7f)
- * ACPI_DEVICE_NOTIFY: DriverHandler (80-ff)
- * ACPI_ALL_NOTIFY: both system and device
- * Handler - Address of the handler
- * Context - Value passed to the handler on each GPE
- *
- * RETURN: Status
- *
- * DESCRIPTION: Install a handler for notifies on an ACPI device
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiInstallNotifyHandler (
- ACPI_HANDLE Device,
- UINT32 HandlerType,
- ACPI_NOTIFY_HANDLER Handler,
- void *Context)
-{
- ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_OPERAND_OBJECT *NotifyObj;
- ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiInstallNotifyHandler);
-
-
- /* Parameter validation */
-
- if ((!Device) ||
- (!Handler) ||
- (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
- {
- return_ACPI_STATUS (AE_BAD_PARAMETER);
- }
-
- Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Convert and validate the device handle */
-
- Node = AcpiNsValidateHandle (Device);
- if (!Node)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /*
- * Root Object:
- * Registering a notify handler on the root object indicates that the
- * caller wishes to receive notifications for all objects. Note that
- * only one <external> global handler can be regsitered (per notify type).
- */
- if (Device == ACPI_ROOT_OBJECT)
- {
- /* Make sure the handler is not already installed */
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- AcpiGbl_SystemNotify.Handler) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- AcpiGbl_DeviceNotify.Handler))
- {
- Status = AE_ALREADY_EXISTS;
- goto UnlockAndExit;
- }
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- AcpiGbl_SystemNotify.Node = Node;
- AcpiGbl_SystemNotify.Handler = Handler;
- AcpiGbl_SystemNotify.Context = Context;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- AcpiGbl_DeviceNotify.Node = Node;
- AcpiGbl_DeviceNotify.Handler = Handler;
- AcpiGbl_DeviceNotify.Context = Context;
- }
-
- /* Global notify handler installed */
- }
-
- /*
- * All Other Objects:
- * Caller will only receive notifications specific to the target object.
- * Note that only certain object types can receive notifications.
- */
- else
- {
- /* Notifies allowed on this object? */
-
- if (!AcpiEvIsNotifyObject (Node))
- {
- Status = AE_TYPE;
- goto UnlockAndExit;
- }
-
- /* Check for an existing internal object */
-
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (ObjDesc)
- {
- /* Object exists - make sure there's no handler */
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- ObjDesc->CommonNotify.SystemNotify) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- ObjDesc->CommonNotify.DeviceNotify))
- {
- Status = AE_ALREADY_EXISTS;
- goto UnlockAndExit;
- }
- }
- else
- {
- /* Create a new object */
-
- ObjDesc = AcpiUtCreateInternalObject (Node->Type);
- if (!ObjDesc)
- {
- Status = AE_NO_MEMORY;
- goto UnlockAndExit;
- }
-
- /* Attach new object to the Node */
-
- Status = AcpiNsAttachObject (Device, ObjDesc, Node->Type);
-
- /* Remove local reference to the object */
-
- AcpiUtRemoveReference (ObjDesc);
- if (ACPI_FAILURE (Status))
- {
- goto UnlockAndExit;
- }
- }
-
- /* Install the handler */
-
- NotifyObj = AcpiUtCreateInternalObject (ACPI_TYPE_LOCAL_NOTIFY);
- if (!NotifyObj)
- {
- Status = AE_NO_MEMORY;
- goto UnlockAndExit;
- }
-
- NotifyObj->Notify.Node = Node;
- NotifyObj->Notify.Handler = Handler;
- NotifyObj->Notify.Context = Context;
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- ObjDesc->CommonNotify.SystemNotify = NotifyObj;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- ObjDesc->CommonNotify.DeviceNotify = NotifyObj;
- }
-
- if (HandlerType == ACPI_ALL_NOTIFY)
- {
- /* Extra ref if installed in both */
-
- AcpiUtAddReference (NotifyObj);
- }
- }
-
-
-UnlockAndExit:
- (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
- return_ACPI_STATUS (Status);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiInstallNotifyHandler)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiRemoveNotifyHandler
- *
- * PARAMETERS: Device - The device for which notifies will be handled
- * HandlerType - The type of handler:
- * ACPI_SYSTEM_NOTIFY: SystemHandler (00-7f)
- * ACPI_DEVICE_NOTIFY: DriverHandler (80-ff)
- * ACPI_ALL_NOTIFY: both system and device
- * Handler - Address of the handler
- *
- * RETURN: Status
- *
- * DESCRIPTION: Remove a handler for notifies on an ACPI device
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiRemoveNotifyHandler (
- ACPI_HANDLE Device,
- UINT32 HandlerType,
- ACPI_NOTIFY_HANDLER Handler)
-{
- ACPI_OPERAND_OBJECT *NotifyObj;
- ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiRemoveNotifyHandler);
-
-
- /* Parameter validation */
-
- if ((!Device) ||
- (!Handler) ||
- (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
- {
- return_ACPI_STATUS (AE_BAD_PARAMETER);
- }
-
- Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Convert and validate the device handle */
-
- Node = AcpiNsValidateHandle (Device);
- if (!Node)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Root Object */
-
- if (Device == ACPI_ROOT_OBJECT)
- {
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Removing notify handler for namespace root object\n"));
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- !AcpiGbl_SystemNotify.Handler) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- !AcpiGbl_DeviceNotify.Handler))
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- AcpiGbl_SystemNotify.Node = NULL;
- AcpiGbl_SystemNotify.Handler = NULL;
- AcpiGbl_SystemNotify.Context = NULL;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- AcpiGbl_DeviceNotify.Node = NULL;
- AcpiGbl_DeviceNotify.Handler = NULL;
- AcpiGbl_DeviceNotify.Context = NULL;
- }
- }
-
- /* All Other Objects */
-
- else
- {
- /* Notifies allowed on this object? */
-
- if (!AcpiEvIsNotifyObject (Node))
- {
- Status = AE_TYPE;
- goto UnlockAndExit;
- }
-
- /* Check for an existing internal object */
-
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (!ObjDesc)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- /* Object exists - make sure there's an existing handler */
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- NotifyObj = ObjDesc->CommonNotify.SystemNotify;
- if (!NotifyObj)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (NotifyObj->Notify.Handler != Handler)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Remove the handler */
-
- ObjDesc->CommonNotify.SystemNotify = NULL;
- AcpiUtRemoveReference (NotifyObj);
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- NotifyObj = ObjDesc->CommonNotify.DeviceNotify;
- if (!NotifyObj)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (NotifyObj->Notify.Handler != Handler)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Remove the handler */
-
- ObjDesc->CommonNotify.DeviceNotify = NULL;
- AcpiUtRemoveReference (NotifyObj);
- }
- }
-
-
-UnlockAndExit:
- (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
- return_ACPI_STATUS (Status);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiRemoveNotifyHandler)
-
-
-/*******************************************************************************
- *
* FUNCTION: AcpiInstallGpeHandler
*
* PARAMETERS: GpeDevice - Namespace node for the GPE (NULL for FADT
@@ -975,3 +956,4 @@ AcpiReleaseGlobalLock (
ACPI_EXPORT_SYMBOL (AcpiReleaseGlobalLock)
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfevnt.c b/usr/src/uts/intel/io/acpica/events/evxfevnt.c
index a813dd3c96..254214af19 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfevnt.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfevnt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,7 @@
ACPI_MODULE_NAME ("evxfevnt")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiEnable
@@ -377,4 +378,4 @@ AcpiGetEventStatus (
ACPI_EXPORT_SYMBOL (AcpiGetEventStatus)
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfgpe.c b/usr/src/uts/intel/io/acpica/events/evxfgpe.c
index 76019969ec..c0dd88c777 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfgpe.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfgpe.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,6 +53,7 @@
ACPI_MODULE_NAME ("evxfgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiUpdateAllGpes
@@ -898,3 +899,5 @@ AcpiGetGpeDevice (
}
ACPI_EXPORT_SYMBOL (AcpiGetGpeDevice)
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfregn.c b/usr/src/uts/intel/io/acpica/events/evxfregn.c
index 4fd637e31f..493df9217d 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfregn.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfregn.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exconfig.c b/usr/src/uts/intel/io/acpica/executer/exconfig.c
index d7677e99ce..9e05497b9f 100644
--- a/usr/src/uts/intel/io/acpica/executer/exconfig.c
+++ b/usr/src/uts/intel/io/acpica/executer/exconfig.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -334,7 +334,7 @@ AcpiExRegionRead (
for (i = 0; i < Length; i++)
{
- Status = AcpiEvAddressSpaceDispatch (ObjDesc, ACPI_READ,
+ Status = AcpiEvAddressSpaceDispatch (ObjDesc, NULL, ACPI_READ,
RegionOffset, 8, &Value);
if (ACPI_FAILURE (Status))
{
diff --git a/usr/src/uts/intel/io/acpica/executer/exconvrt.c b/usr/src/uts/intel/io/acpica/executer/exconvrt.c
index 6894acdec7..e79cbc96e9 100644
--- a/usr/src/uts/intel/io/acpica/executer/exconvrt.c
+++ b/usr/src/uts/intel/io/acpica/executer/exconvrt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/excreate.c b/usr/src/uts/intel/io/acpica/executer/excreate.c
index fe93e0e940..888ff7d7c4 100644
--- a/usr/src/uts/intel/io/acpica/executer/excreate.c
+++ b/usr/src/uts/intel/io/acpica/executer/excreate.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -284,7 +284,7 @@ Cleanup:
*
* PARAMETERS: AmlStart - Pointer to the region declaration AML
* AmlLength - Max length of the declaration AML
- * RegionSpace - SpaceID for the region
+ * SpaceId - Address space ID for the region
* WalkState - Current state
*
* RETURN: Status
@@ -297,7 +297,7 @@ ACPI_STATUS
AcpiExCreateRegion (
UINT8 *AmlStart,
UINT32 AmlLength,
- UINT8 RegionSpace,
+ UINT8 SpaceId,
ACPI_WALK_STATE *WalkState)
{
ACPI_STATUS Status;
@@ -326,16 +326,18 @@ AcpiExCreateRegion (
* Space ID must be one of the predefined IDs, or in the user-defined
* range
*/
- if ((RegionSpace >= ACPI_NUM_PREDEFINED_REGIONS) &&
- (RegionSpace < ACPI_USER_REGION_BEGIN) &&
- (RegionSpace != ACPI_ADR_SPACE_DATA_TABLE))
+ if (!AcpiIsValidSpaceId (SpaceId))
{
- ACPI_ERROR ((AE_INFO, "Invalid AddressSpace type 0x%X", RegionSpace));
- return_ACPI_STATUS (AE_AML_INVALID_SPACE_ID);
+ /*
+ * Print an error message, but continue. We don't want to abort
+ * a table load for this exception. Instead, if the region is
+ * actually used at runtime, abort the executing method.
+ */
+ ACPI_ERROR ((AE_INFO, "Invalid/unknown Address Space ID: 0x%2.2X", SpaceId));
}
ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, "Region Type - %s (0x%X)\n",
- AcpiUtGetRegionName (RegionSpace), RegionSpace));
+ AcpiUtGetRegionName (SpaceId), SpaceId));
/* Create the region descriptor */
@@ -353,10 +355,18 @@ AcpiExCreateRegion (
RegionObj2 = ObjDesc->Common.NextObject;
RegionObj2->Extra.AmlStart = AmlStart;
RegionObj2->Extra.AmlLength = AmlLength;
+ if (WalkState->ScopeInfo)
+ {
+ RegionObj2->Extra.ScopeNode = WalkState->ScopeInfo->Scope.Node;
+ }
+ else
+ {
+ RegionObj2->Extra.ScopeNode = Node;
+ }
/* Init the region from the operands */
- ObjDesc->Region.SpaceId = RegionSpace;
+ ObjDesc->Region.SpaceId = SpaceId;
ObjDesc->Region.Address = 0;
ObjDesc->Region.Length = 0;
ObjDesc->Region.Node = Node;
diff --git a/usr/src/uts/intel/io/acpica/executer/exdebug.c b/usr/src/uts/intel/io/acpica/executer/exdebug.c
index 62bf810c79..166b2af170 100644
--- a/usr/src/uts/intel/io/acpica/executer/exdebug.c
+++ b/usr/src/uts/intel/io/acpica/executer/exdebug.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exdump.c b/usr/src/uts/intel/io/acpica/executer/exdump.c
index 60003e6154..0943e668c1 100644
--- a/usr/src/uts/intel/io/acpica/executer/exdump.c
+++ b/usr/src/uts/intel/io/acpica/executer/exdump.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -131,8 +131,8 @@ static ACPI_EXDUMP_INFO AcpiExDumpDevice[4] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpDevice), NULL},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.Handler), "Handler"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.DeviceNotify), "Device Notify"}
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.NotifyList[1]), "Device Notify"}
};
static ACPI_EXDUMP_INFO AcpiExDumpEvent[2] =
@@ -179,8 +179,8 @@ static ACPI_EXDUMP_INFO AcpiExDumpPower[5] =
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpPower), NULL},
{ACPI_EXD_UINT32, ACPI_EXD_OFFSET (PowerResource.SystemLevel), "System Level"},
{ACPI_EXD_UINT32, ACPI_EXD_OFFSET (PowerResource.ResourceOrder), "Resource Order"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.DeviceNotify), "Device Notify"}
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.NotifyList[1]), "Device Notify"}
};
static ACPI_EXDUMP_INFO AcpiExDumpProcessor[7] =
@@ -189,16 +189,16 @@ static ACPI_EXDUMP_INFO AcpiExDumpProcessor[7] =
{ACPI_EXD_UINT8, ACPI_EXD_OFFSET (Processor.ProcId), "Processor ID"},
{ACPI_EXD_UINT8 , ACPI_EXD_OFFSET (Processor.Length), "Length"},
{ACPI_EXD_ADDRESS, ACPI_EXD_OFFSET (Processor.Address), "Address"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.DeviceNotify), "Device Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.NotifyList[1]), "Device Notify"},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.Handler), "Handler"}
};
static ACPI_EXDUMP_INFO AcpiExDumpThermal[4] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpThermal), NULL},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.DeviceNotify), "Device Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.NotifyList[1]), "Device Notify"},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.Handler), "Handler"}
};
@@ -209,11 +209,13 @@ static ACPI_EXDUMP_INFO AcpiExDumpBufferField[3] =
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (BufferField.BufferObj), "Buffer Object"}
};
-static ACPI_EXDUMP_INFO AcpiExDumpRegionField[3] =
+static ACPI_EXDUMP_INFO AcpiExDumpRegionField[5] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpRegionField), NULL},
{ACPI_EXD_FIELD, 0, NULL},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.RegionObj), "Region Object"}
+ {ACPI_EXD_UINT8, ACPI_EXD_OFFSET (Field.AccessLength), "AccessLength"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.RegionObj), "Region Object"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.ResourceBuffer), "ResourceBuffer"}
};
static ACPI_EXDUMP_INFO AcpiExDumpBankField[5] =
@@ -256,11 +258,15 @@ static ACPI_EXDUMP_INFO AcpiExDumpAddressHandler[6] =
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (AddressSpace.Context), "Context"}
};
-static ACPI_EXDUMP_INFO AcpiExDumpNotify[3] =
+static ACPI_EXDUMP_INFO AcpiExDumpNotify[7] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpNotify), NULL},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Node), "Node"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Context), "Context"}
+ {ACPI_EXD_UINT32, ACPI_EXD_OFFSET (Notify.HandlerType), "Handler Type"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Handler), "Handler"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Context), "Context"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Next[0]), "Next System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Next[1]), "Next Device Notify"}
};
@@ -999,10 +1005,7 @@ AcpiExDumpPackageObj (
case ACPI_TYPE_STRING:
AcpiOsPrintf ("[String] Value: ");
- for (i = 0; i < ObjDesc->String.Length; i++)
- {
- AcpiOsPrintf ("%c", ObjDesc->String.Pointer[i]);
- }
+ AcpiUtPrintString (ObjDesc->String.Pointer, ACPI_UINT8_MAX);
AcpiOsPrintf ("\n");
break;
diff --git a/usr/src/uts/intel/io/acpica/executer/exfield.c b/usr/src/uts/intel/io/acpica/executer/exfield.c
index 098a18650e..a682406826 100644
--- a/usr/src/uts/intel/io/acpica/executer/exfield.c
+++ b/usr/src/uts/intel/io/acpica/executer/exfield.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -113,19 +113,25 @@ AcpiExReadDataFromField (
}
else if ((ObjDesc->Common.Type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
(ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS ||
ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_IPMI))
{
/*
- * This is an SMBus or IPMI read. We must create a buffer to hold
+ * This is an SMBus, GSBus or IPMI read. We must create a buffer to hold
* the data and then directly access the region handler.
*
- * Note: Smbus protocol value is passed in upper 16-bits of Function
+ * Note: SMBus and GSBus protocol value is passed in upper 16-bits of Function
*/
if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS)
{
Length = ACPI_SMBUS_BUFFER_SIZE;
Function = ACPI_READ | (ObjDesc->Field.Attribute << 16);
}
+ else if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS)
+ {
+ Length = ACPI_GSBUS_BUFFER_SIZE;
+ Function = ACPI_READ | (ObjDesc->Field.Attribute << 16);
+ }
else /* IPMI */
{
Length = ACPI_IPMI_BUFFER_SIZE;
@@ -274,23 +280,24 @@ AcpiExWriteDataToField (
}
else if ((ObjDesc->Common.Type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
(ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS ||
ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_IPMI))
{
/*
- * This is an SMBus or IPMI write. We will bypass the entire field
+ * This is an SMBus, GSBus or IPMI write. We will bypass the entire field
* mechanism and handoff the buffer directly to the handler. For
* these address spaces, the buffer is bi-directional; on a write,
* return data is returned in the same buffer.
*
* Source must be a buffer of sufficient size:
- * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE.
+ * ACPI_SMBUS_BUFFER_SIZE, ACPI_GSBUS_BUFFER_SIZE, or ACPI_IPMI_BUFFER_SIZE.
*
- * Note: SMBus protocol type is passed in upper 16-bits of Function
+ * Note: SMBus and GSBus protocol type is passed in upper 16-bits of Function
*/
if (SourceDesc->Common.Type != ACPI_TYPE_BUFFER)
{
ACPI_ERROR ((AE_INFO,
- "SMBus or IPMI write requires Buffer, found type %s",
+ "SMBus/IPMI/GenericSerialBus write requires Buffer, found type %s",
AcpiUtGetObjectTypeName (SourceDesc)));
return_ACPI_STATUS (AE_AML_OPERAND_TYPE);
@@ -301,6 +308,11 @@ AcpiExWriteDataToField (
Length = ACPI_SMBUS_BUFFER_SIZE;
Function = ACPI_WRITE | (ObjDesc->Field.Attribute << 16);
}
+ else if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS)
+ {
+ Length = ACPI_GSBUS_BUFFER_SIZE;
+ Function = ACPI_WRITE | (ObjDesc->Field.Attribute << 16);
+ }
else /* IPMI */
{
Length = ACPI_IPMI_BUFFER_SIZE;
@@ -310,7 +322,7 @@ AcpiExWriteDataToField (
if (SourceDesc->Buffer.Length < Length)
{
ACPI_ERROR ((AE_INFO,
- "SMBus or IPMI write requires Buffer of length %u, found length %u",
+ "SMBus/IPMI/GenericSerialBus write requires Buffer of length %u, found length %u",
Length, SourceDesc->Buffer.Length));
return_ACPI_STATUS (AE_AML_BUFFER_LIMIT);
diff --git a/usr/src/uts/intel/io/acpica/executer/exfldio.c b/usr/src/uts/intel/io/acpica/executer/exfldio.c
index bf663a2f5e..e138e9a13c 100644
--- a/usr/src/uts/intel/io/acpica/executer/exfldio.c
+++ b/usr/src/uts/intel/io/acpica/executer/exfldio.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -98,6 +98,7 @@ AcpiExSetupRegion (
{
ACPI_STATUS Status = AE_OK;
ACPI_OPERAND_OBJECT *RgnDesc;
+ UINT8 SpaceId;
ACPI_FUNCTION_TRACE_U32 (ExSetupRegion, FieldDatumByteOffset);
@@ -116,6 +117,16 @@ AcpiExSetupRegion (
return_ACPI_STATUS (AE_AML_OPERAND_TYPE);
}
+ SpaceId = RgnDesc->Region.SpaceId;
+
+ /* Validate the Space ID */
+
+ if (!AcpiIsValidSpaceId (SpaceId))
+ {
+ ACPI_ERROR ((AE_INFO, "Invalid/unknown Address Space ID: 0x%2.2X", SpaceId));
+ return_ACPI_STATUS (AE_AML_INVALID_SPACE_ID);
+ }
+
/*
* If the Region Address and Length have not been previously evaluated,
* evaluate them now and save the results.
@@ -130,11 +141,12 @@ AcpiExSetupRegion (
}
/*
- * Exit now for SMBus or IPMI address space, it has a non-linear
+ * Exit now for SMBus, GSBus or IPMI address space, it has a non-linear
* address space and the request cannot be directly validated
*/
- if (RgnDesc->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
- RgnDesc->Region.SpaceId == ACPI_ADR_SPACE_IPMI)
+ if (SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ SpaceId == ACPI_ADR_SPACE_GSBUS ||
+ SpaceId == ACPI_ADR_SPACE_IPMI)
{
/* SMBus or IPMI has a non-linear address space */
@@ -290,7 +302,8 @@ AcpiExAccessRegion (
/* Invoke the appropriate AddressSpace/OpRegion handler */
- Status = AcpiEvAddressSpaceDispatch (RgnDesc, Function, RegionOffset,
+ Status = AcpiEvAddressSpaceDispatch (RgnDesc, ObjDesc,
+ Function, RegionOffset,
ACPI_MUL_8 (ObjDesc->CommonField.AccessByteWidth), Value);
if (ACPI_FAILURE (Status))
@@ -353,6 +366,11 @@ AcpiExRegisterOverflow (
* The Value is larger than the maximum value that can fit into
* the register.
*/
+ ACPI_ERROR ((AE_INFO,
+ "Index value 0x%8.8X%8.8X overflows field width 0x%X",
+ ACPI_FORMAT_UINT64 (Value),
+ ObjDesc->CommonField.BitLength));
+
return (TRUE);
}
diff --git a/usr/src/uts/intel/io/acpica/executer/exmisc.c b/usr/src/uts/intel/io/acpica/executer/exmisc.c
index c5dd280fe4..63114f5584 100644
--- a/usr/src/uts/intel/io/acpica/executer/exmisc.c
+++ b/usr/src/uts/intel/io/acpica/executer/exmisc.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exmutex.c b/usr/src/uts/intel/io/acpica/executer/exmutex.c
index 6a893f0e90..4c702b8baa 100644
--- a/usr/src/uts/intel/io/acpica/executer/exmutex.c
+++ b/usr/src/uts/intel/io/acpica/executer/exmutex.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exnames.c b/usr/src/uts/intel/io/acpica/executer/exnames.c
index 2188ec0a2f..c3cb6b0b4c 100644
--- a/usr/src/uts/intel/io/acpica/executer/exnames.c
+++ b/usr/src/uts/intel/io/acpica/executer/exnames.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg1.c b/usr/src/uts/intel/io/acpica/executer/exoparg1.c
index 6ac685c82b..9a6cd9fcab 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg1.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg1.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg2.c b/usr/src/uts/intel/io/acpica/executer/exoparg2.c
index d131277aae..b76da75c49 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg2.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg2.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg3.c b/usr/src/uts/intel/io/acpica/executer/exoparg3.c
index 6adb1afac2..0edac68e60 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg3.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg3.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg6.c b/usr/src/uts/intel/io/acpica/executer/exoparg6.c
index fd6e039c01..bfc6585301 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg6.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg6.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exprep.c b/usr/src/uts/intel/io/acpica/executer/exprep.c
index ee96a6a090..9ec78275bd 100644
--- a/usr/src/uts/intel/io/acpica/executer/exprep.c
+++ b/usr/src/uts/intel/io/acpica/executer/exprep.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
#include "acinterp.h"
#include "amlcode.h"
#include "acnamesp.h"
+#include "acdispat.h"
#define _COMPONENT ACPI_EXECUTER
@@ -484,6 +485,32 @@ AcpiExPrepFieldValue (
ObjDesc->Field.RegionObj = AcpiNsGetAttachedObject (Info->RegionNode);
+ /* Fields specific to GenericSerialBus fields */
+
+ ObjDesc->Field.AccessLength = Info->AccessLength;
+
+ if (Info->ConnectionNode)
+ {
+ SecondDesc = Info->ConnectionNode->Object;
+ if (!(SecondDesc->Common.Flags & AOPOBJ_DATA_VALID))
+ {
+ Status = AcpiDsGetBufferArguments (SecondDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiUtDeleteObjectDesc (ObjDesc);
+ return_ACPI_STATUS (Status);
+ }
+ }
+
+ ObjDesc->Field.ResourceBuffer = SecondDesc->Buffer.Pointer;
+ ObjDesc->Field.ResourceLength = (UINT16) SecondDesc->Buffer.Length;
+ }
+ else if (Info->ResourceBuffer)
+ {
+ ObjDesc->Field.ResourceBuffer = Info->ResourceBuffer;
+ ObjDesc->Field.ResourceLength = Info->ResourceLength;
+ }
+
/* Allow full data read from EC address space */
if ((ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_EC) &&
diff --git a/usr/src/uts/intel/io/acpica/executer/exregion.c b/usr/src/uts/intel/io/acpica/executer/exregion.c
index b3b9aa6e66..8a40250794 100644
--- a/usr/src/uts/intel/io/acpica/executer/exregion.c
+++ b/usr/src/uts/intel/io/acpica/executer/exregion.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresnte.c b/usr/src/uts/intel/io/acpica/executer/exresnte.c
index cf8a55531f..03962edf97 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresnte.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresnte.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresolv.c b/usr/src/uts/intel/io/acpica/executer/exresolv.c
index 652d95dd54..e92fb2468e 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresolv.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresolv.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresop.c b/usr/src/uts/intel/io/acpica/executer/exresop.c
index 12c872f99a..1bcba69033 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresop.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresop.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstore.c b/usr/src/uts/intel/io/acpica/executer/exstore.c
index 8679e790b8..656b649478 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstore.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstore.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstoren.c b/usr/src/uts/intel/io/acpica/executer/exstoren.c
index 71d8809a2d..464d6b1065 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstoren.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstoren.c
@@ -7,7 +7,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstorob.c b/usr/src/uts/intel/io/acpica/executer/exstorob.c
index 4dfcf15fe3..e74d5b3485 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstorob.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstorob.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exsystem.c b/usr/src/uts/intel/io/acpica/executer/exsystem.c
index 10d6edc951..37f5b234a3 100644
--- a/usr/src/uts/intel/io/acpica/executer/exsystem.c
+++ b/usr/src/uts/intel/io/acpica/executer/exsystem.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exutils.c b/usr/src/uts/intel/io/acpica/executer/exutils.c
index 4daa44f527..b5eca064a2 100644
--- a/usr/src/uts/intel/io/acpica/executer/exutils.c
+++ b/usr/src/uts/intel/io/acpica/executer/exutils.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -499,4 +499,34 @@ AcpiExIntegerToString (
}
}
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiIsValidSpaceId
+ *
+ * PARAMETERS: SpaceId - ID to be validated
+ *
+ * RETURN: TRUE if valid/supported ID.
+ *
+ * DESCRIPTION: Validate an operation region SpaceID.
+ *
+ ******************************************************************************/
+
+BOOLEAN
+AcpiIsValidSpaceId (
+ UINT8 SpaceId)
+{
+
+ if ((SpaceId >= ACPI_NUM_PREDEFINED_REGIONS) &&
+ (SpaceId < ACPI_USER_REGION_BEGIN) &&
+ (SpaceId != ACPI_ADR_SPACE_DATA_TABLE) &&
+ (SpaceId != ACPI_ADR_SPACE_FIXED_HARDWARE))
+ {
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+
#endif
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwacpi.c b/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
index 577666100e..d9f16ef9d7 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,7 @@
ACPI_MODULE_NAME ("hwacpi")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/******************************************************************************
*
* FUNCTION: AcpiHwSetMode
@@ -204,3 +205,5 @@ AcpiHwGetMode (
return_UINT32 (ACPI_SYS_MODE_LEGACY);
}
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwesleep.c b/usr/src/uts/intel/io/acpica/hardware/hwesleep.c
new file mode 100644
index 0000000000..1587af019c
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/hardware/hwesleep.c
@@ -0,0 +1,277 @@
+/******************************************************************************
+ *
+ * Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
+ * extended FADT-V5 sleep registers.
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include "acpi.h"
+#include "accommon.h"
+
+#define _COMPONENT ACPI_HARDWARE
+ ACPI_MODULE_NAME ("hwesleep")
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExecuteSleepMethod
+ *
+ * PARAMETERS: MethodPathname - Pathname of method to execute
+ * IntegerArgument - Argument to pass to the method
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Execute a sleep/wake related method with one integer argument
+ * and no return value.
+ *
+ ******************************************************************************/
+
+void
+AcpiHwExecuteSleepMethod (
+ char *MethodPathname,
+ UINT32 IntegerArgument)
+{
+ ACPI_OBJECT_LIST ArgList;
+ ACPI_OBJECT Arg;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (HwExecuteSleepMethod);
+
+
+ /* One argument, IntegerArgument; No return value expected */
+
+ ArgList.Count = 1;
+ ArgList.Pointer = &Arg;
+ Arg.Type = ACPI_TYPE_INTEGER;
+ Arg.Integer.Value = (UINT64) IntegerArgument;
+
+ Status = AcpiEvaluateObject (NULL, MethodPathname, &ArgList, NULL);
+ if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ {
+ ACPI_EXCEPTION ((AE_INFO, Status, "While executing method %s",
+ MethodPathname));
+ }
+
+ return_VOID;
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedSleep
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Enter a system sleep state via the extended FADT sleep
+ * registers (V5 FADT).
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedSleep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+ UINT8 SleepTypeValue;
+ UINT64 SleepStatus;
+
+
+ ACPI_FUNCTION_TRACE (HwExtendedSleep);
+
+
+ /* Extended sleep registers must be valid */
+
+ if (!AcpiGbl_FADT.SleepControl.Address ||
+ !AcpiGbl_FADT.SleepStatus.Address)
+ {
+ return_ACPI_STATUS (AE_NOT_EXIST);
+ }
+
+ /* Clear wake status (WAK_STS) */
+
+ Status = AcpiWrite ((UINT64) ACPI_X_WAKE_STATUS, &AcpiGbl_FADT.SleepStatus);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ AcpiGbl_SystemAwakeAndRunning = FALSE;
+
+ /* Optionally execute _GTS (Going To Sleep) */
+
+ if (Flags & ACPI_EXECUTE_GTS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__GTS, SleepState);
+ }
+
+ /* Flush caches, as per ACPI specification */
+
+ ACPI_FLUSH_CPU_CACHE ();
+
+ /*
+ * Set the SLP_TYP and SLP_EN bits.
+ *
+ * Note: We only use the first value returned by the \_Sx method
+ * (AcpiGbl_SleepTypeA) - As per ACPI specification.
+ */
+ ACPI_DEBUG_PRINT ((ACPI_DB_INIT,
+ "Entering sleep state [S%u]\n", SleepState));
+
+ SleepTypeValue = ((AcpiGbl_SleepTypeA << ACPI_X_SLEEP_TYPE_POSITION) &
+ ACPI_X_SLEEP_TYPE_MASK);
+
+ Status = AcpiWrite ((UINT64) (SleepTypeValue | ACPI_X_SLEEP_ENABLE),
+ &AcpiGbl_FADT.SleepControl);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Wait for transition back to Working State */
+
+ do
+ {
+ Status = AcpiRead (&SleepStatus, &AcpiGbl_FADT.SleepStatus);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ } while (!(((UINT8) SleepStatus) & ACPI_X_WAKE_STATUS));
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedWakePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - ACPI_EXECUTE_BFS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform first part of OS-independent ACPI cleanup after
+ * a sleep. Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+ UINT8 SleepTypeValue;
+
+
+ ACPI_FUNCTION_TRACE (HwExtendedWakePrep);
+
+
+ Status = AcpiGetSleepTypeData (ACPI_STATE_S0,
+ &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
+ if (ACPI_SUCCESS (Status))
+ {
+ SleepTypeValue = ((AcpiGbl_SleepTypeA << ACPI_X_SLEEP_TYPE_POSITION) &
+ ACPI_X_SLEEP_TYPE_MASK);
+
+ (void) AcpiWrite ((UINT64) (SleepTypeValue | ACPI_X_SLEEP_ENABLE),
+ &AcpiGbl_FADT.SleepControl);
+ }
+
+ /* Optionally execute _BFS (Back From Sleep) */
+
+ if (Flags & ACPI_EXECUTE_BFS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__BFS, SleepState);
+ }
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedWake
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - Reserved, set to zero
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedWake (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_FUNCTION_TRACE (HwExtendedWake);
+
+
+ /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
+
+ AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+
+ /* Execute the wake methods */
+
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WAKING);
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__WAK, SleepState);
+
+ /*
+ * Some BIOS code assumes that WAK_STS will be cleared on resume
+ * and use it to determine whether the system is rebooting or
+ * resuming. Clear WAK_STS for compatibility.
+ */
+ (void) AcpiWrite ((UINT64) ACPI_X_WAKE_STATUS, &AcpiGbl_FADT.SleepStatus);
+ AcpiGbl_SystemAwakeAndRunning = TRUE;
+
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WORKING);
+ return_ACPI_STATUS (AE_OK);
+}
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwgpe.c b/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
index 335814f942..c94f17be26 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_HARDWARE
ACPI_MODULE_NAME ("hwgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -538,3 +540,4 @@ AcpiHwEnableAllWakeupGpes (
return_ACPI_STATUS (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwpci.c b/usr/src/uts/intel/io/acpica/hardware/hwpci.c
index 96a2dfcdbe..065e42d95e 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwpci.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwpci.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwregs.c b/usr/src/uts/intel/io/acpica/hardware/hwregs.c
index 61e61a1e6b..6d89883179 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwregs.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwregs.c
@@ -7,7 +7,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,6 +53,8 @@
ACPI_MODULE_NAME ("hwregs")
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Local Prototypes */
static ACPI_STATUS
@@ -67,6 +69,7 @@ AcpiHwWriteMultiple (
ACPI_GENERIC_ADDRESS *RegisterA,
ACPI_GENERIC_ADDRESS *RegisterB);
+#endif /* !ACPI_REDUCED_HARDWARE */
/******************************************************************************
*
@@ -170,6 +173,7 @@ AcpiHwRead (
ACPI_GENERIC_ADDRESS *Reg)
{
UINT64 Address;
+ UINT64 Value64;
ACPI_STATUS Status;
@@ -195,7 +199,9 @@ AcpiHwRead (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, Value, Reg->BitWidth);
+ Address, &Value64, Reg->BitWidth);
+
+ *Value = (UINT32) Value64;
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
@@ -254,7 +260,7 @@ AcpiHwWrite (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, Value, Reg->BitWidth);
+ Address, (UINT64) Value, Reg->BitWidth);
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
@@ -271,6 +277,7 @@ AcpiHwWrite (
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiHwClearAcpiStatus
@@ -321,7 +328,7 @@ UnlockAndExit:
/*******************************************************************************
*
- * FUNCTION: AcpiHwGetRegisterBitMask
+ * FUNCTION: AcpiHwGetBitRegisterInfo
*
* PARAMETERS: RegisterId - Index of ACPI Register to access
*
@@ -731,3 +738,4 @@ AcpiHwWriteMultiple (
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwsleep.c b/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
index 1c8ca2440f..1953ff846e 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
@@ -1,12 +1,12 @@
-
/******************************************************************************
*
- * Name: hwsleep.c - ACPI Hardware Sleep/Wake Interface
+ * Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
+ * original/legacy sleep/PM registers.
*
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,210 +49,38 @@
ACPI_MODULE_NAME ("hwsleep")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
- * FUNCTION: AcpiSetFirmwareWakingVector
- *
- * PARAMETERS: PhysicalAddress - 32-bit physical address of ACPI real mode
- * entry point.
- *
- * RETURN: Status
- *
- * DESCRIPTION: Sets the 32-bit FirmwareWakingVector field of the FACS
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiSetFirmwareWakingVector (
- UINT32 PhysicalAddress)
-{
- ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector);
-
-
- /* Set the 32-bit vector */
-
- AcpiGbl_FACS->FirmwareWakingVector = PhysicalAddress;
-
- /* Clear the 64-bit vector if it exists */
-
- if ((AcpiGbl_FACS->Length > 32) && (AcpiGbl_FACS->Version >= 1))
- {
- AcpiGbl_FACS->XFirmwareWakingVector = 0;
- }
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector)
-
-
-#if ACPI_MACHINE_WIDTH == 64
-/*******************************************************************************
- *
- * FUNCTION: AcpiSetFirmwareWakingVector64
- *
- * PARAMETERS: PhysicalAddress - 64-bit physical address of ACPI protected
- * mode entry point.
- *
- * RETURN: Status
- *
- * DESCRIPTION: Sets the 64-bit X_FirmwareWakingVector field of the FACS, if
- * it exists in the table. This function is intended for use with
- * 64-bit host operating systems.
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiSetFirmwareWakingVector64 (
- UINT64 PhysicalAddress)
-{
- ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector64);
-
-
- /* Determine if the 64-bit vector actually exists */
-
- if ((AcpiGbl_FACS->Length <= 32) || (AcpiGbl_FACS->Version < 1))
- {
- return_ACPI_STATUS (AE_NOT_EXIST);
- }
-
- /* Clear 32-bit vector, set the 64-bit X_ vector */
-
- AcpiGbl_FACS->FirmwareWakingVector = 0;
- AcpiGbl_FACS->XFirmwareWakingVector = PhysicalAddress;
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector64)
-#endif
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiEnterSleepStatePrep
+ * FUNCTION: AcpiHwLegacySleep
*
* PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
*
* RETURN: Status
*
- * DESCRIPTION: Prepare to enter a system sleep state (see ACPI 2.0 spec p 231)
- * This function must execute with interrupts enabled.
- * We break sleeping into 2 stages so that OSPM can handle
- * various OS-specific tasks between the two steps.
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiEnterSleepStatePrep (
- UINT8 SleepState)
-{
- ACPI_STATUS Status;
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
-
-
- ACPI_FUNCTION_TRACE (AcpiEnterSleepStatePrep);
-
-
- /* _PSW methods could be run here to enable wake-on keyboard, LAN, etc. */
-
- Status = AcpiGetSleepTypeData (SleepState,
- &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Execute the _PTS method (Prepare To Sleep) */
-
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- Arg.Integer.Value = SleepState;
-
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__PTS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Setup the argument to the _SST method (System STatus) */
-
- switch (SleepState)
- {
- case ACPI_STATE_S0:
- Arg.Integer.Value = ACPI_SST_WORKING;
- break;
-
- case ACPI_STATE_S1:
- case ACPI_STATE_S2:
- case ACPI_STATE_S3:
- Arg.Integer.Value = ACPI_SST_SLEEPING;
- break;
-
- case ACPI_STATE_S4:
- Arg.Integer.Value = ACPI_SST_SLEEP_CONTEXT;
- break;
-
- default:
- Arg.Integer.Value = ACPI_SST_INDICATOR_OFF; /* Default is off */
- break;
- }
-
- /*
- * Set the system indicators to show the desired sleep state.
- * _SST is an optional method (return no error if not found)
- */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "While executing method _SST"));
- }
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepStatePrep)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiEnterSleepState
- *
- * PARAMETERS: SleepState - Which sleep state to enter
- *
- * RETURN: Status
- *
- * DESCRIPTION: Enter a system sleep state
+ * DESCRIPTION: Enter a system sleep state via the legacy FADT PM registers
* THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
*
******************************************************************************/
ACPI_STATUS
-AcpiEnterSleepState (
- UINT8 SleepState)
+AcpiHwLegacySleep (
+ UINT8 SleepState,
+ UINT8 Flags)
{
- UINT32 Pm1aControl;
- UINT32 Pm1bControl;
ACPI_BIT_REGISTER_INFO *SleepTypeRegInfo;
ACPI_BIT_REGISTER_INFO *SleepEnableRegInfo;
+ UINT32 Pm1aControl;
+ UINT32 Pm1bControl;
UINT32 InValue;
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
ACPI_STATUS Status;
- ACPI_FUNCTION_TRACE (AcpiEnterSleepState);
+ ACPI_FUNCTION_TRACE (HwLegacySleep);
- if ((AcpiGbl_SleepTypeA > ACPI_SLEEP_TYPE_MAX) ||
- (AcpiGbl_SleepTypeB > ACPI_SLEEP_TYPE_MAX))
- {
- ACPI_ERROR ((AE_INFO, "Sleep values out of range: A=0x%X B=0x%X",
- AcpiGbl_SleepTypeA, AcpiGbl_SleepTypeB));
- return_ACPI_STATUS (AE_AML_OPERAND_VALUE);
- }
-
- SleepTypeRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_TYPE);
+ SleepTypeRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_TYPE);
SleepEnableRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_ENABLE);
/* Clear wake status */
@@ -302,17 +130,11 @@ AcpiEnterSleepState (
return_ACPI_STATUS (Status);
}
- /* Execute the _GTS method (Going To Sleep) */
-
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- Arg.Integer.Value = SleepState;
+ /* Optionally execute _GTS (Going To Sleep) */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__GTS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ if (Flags & ACPI_EXECUTE_GTS)
{
- return_ACPI_STATUS (Status);
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__GTS, SleepState);
}
/* Get current value of PM1A control */
@@ -390,7 +212,7 @@ AcpiEnterSleepState (
}
}
- /* Wait until we enter sleep state */
+ /* Wait for transition back to Working State */
do
{
@@ -400,110 +222,32 @@ AcpiEnterSleepState (
return_ACPI_STATUS (Status);
}
- /* Spin until we wake */
-
} while (!InValue);
return_ACPI_STATUS (AE_OK);
}
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepState)
-
/*******************************************************************************
*
- * FUNCTION: AcpiEnterSleepStateS4bios
- *
- * PARAMETERS: None
- *
- * RETURN: Status
- *
- * DESCRIPTION: Perform a S4 bios request.
- * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiEnterSleepStateS4bios (
- void)
-{
- UINT32 InValue;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiEnterSleepStateS4bios);
-
-
- /* Clear the wake status bit (PM1) */
-
- Status = AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- Status = AcpiHwClearAcpiStatus ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /*
- * 1) Disable/Clear all GPEs
- * 2) Enable all wakeup GPEs
- */
- Status = AcpiHwDisableAllGpes ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
- AcpiGbl_SystemAwakeAndRunning = FALSE;
-
- Status = AcpiHwEnableAllWakeupGpes ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- ACPI_FLUSH_CPU_CACHE ();
-
- Status = AcpiHwWritePort (AcpiGbl_FADT.SmiCommand,
- (UINT32) AcpiGbl_FADT.S4BiosRequest, 8);
-
- do {
- AcpiOsStall(1000);
- Status = AcpiReadBitRegister (ACPI_BITREG_WAKE_STATUS, &InValue);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
- } while (!InValue);
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepStateS4bios)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiLeaveSleepState
+ * FUNCTION: AcpiHwLegacyWakePrep
*
* PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - ACPI_EXECUTE_BFS to run optional method
*
* RETURN: Status
*
- * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * DESCRIPTION: Perform the first state of OS-independent ACPI cleanup after a
+ * sleep.
* Called with interrupts ENABLED.
*
******************************************************************************/
ACPI_STATUS
-AcpiLeaveSleepState (
- UINT8 SleepState)
+AcpiHwLegacyWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
{
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
ACPI_STATUS Status;
ACPI_BIT_REGISTER_INFO *SleepTypeRegInfo;
ACPI_BIT_REGISTER_INFO *SleepEnableRegInfo;
@@ -511,8 +255,7 @@ AcpiLeaveSleepState (
UINT32 Pm1bControl;
- ACPI_FUNCTION_TRACE (AcpiLeaveSleepState);
-
+ ACPI_FUNCTION_TRACE (HwLegacyWakePrep);
/*
* Set SLP_TYPE and SLP_EN to state S0.
@@ -553,40 +296,50 @@ AcpiLeaveSleepState (
}
}
- /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
+ /* Optionally execute _BFS (Back From Sleep) */
- AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+ if (Flags & ACPI_EXECUTE_BFS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__BFS, SleepState);
+ }
+ return_ACPI_STATUS (Status);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwLegacyWake
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - Reserved, set to zero
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
- /* Setup parameter object */
+ACPI_STATUS
+AcpiHwLegacyWake (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- /* Ignore any errors from these methods */
+ ACPI_FUNCTION_TRACE (HwLegacyWake);
- Arg.Integer.Value = ACPI_SST_WAKING;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _SST"));
- }
- Arg.Integer.Value = SleepState;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__BFS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _BFS"));
- }
+ /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__WAK, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _WAK"));
- }
- /* TBD: _WAK "sometimes" returns stuff - do we want to look at it? */
+ AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WAKING);
/*
+ * GPEs must be enabled before _WAK is called as GPEs
+ * might get fired there
+ *
* Restore the GPEs:
* 1) Disable/Clear all GPEs
* 2) Enable all runtime GPEs
@@ -596,7 +349,6 @@ AcpiLeaveSleepState (
{
return_ACPI_STATUS (Status);
}
- AcpiGbl_SystemAwakeAndRunning = TRUE;
Status = AcpiHwEnableAllRuntimeGpes ();
if (ACPI_FAILURE (Status))
@@ -604,6 +356,20 @@ AcpiLeaveSleepState (
return_ACPI_STATUS (Status);
}
+ /*
+ * Now we can execute _WAK, etc. Some machines require that the GPEs
+ * are enabled before the wake methods are executed.
+ */
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__WAK, SleepState);
+
+ /*
+ * Some BIOS code assumes that WAK_STS will be cleared on resume
+ * and use it to determine whether the system is rebooting or
+ * resuming. Clear WAK_STS for compatibility.
+ */
+ (void) AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
+ AcpiGbl_SystemAwakeAndRunning = TRUE;
+
/* Enable power button */
(void) AcpiWriteBitRegister(
@@ -625,15 +391,8 @@ AcpiLeaveSleepState (
return_ACPI_STATUS (Status);
}
- Arg.Integer.Value = ACPI_SST_WORKING;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _SST"));
- }
-
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WORKING);
return_ACPI_STATUS (Status);
}
-ACPI_EXPORT_SYMBOL (AcpiLeaveSleepState)
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwtimer.c b/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
index faf6770283..15dc58f0bf 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
ACPI_MODULE_NAME ("hwtimer")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/******************************************************************************
*
* FUNCTION: AcpiGetTimerResolution
@@ -214,3 +215,4 @@ AcpiGetTimerDuration (
ACPI_EXPORT_SYMBOL (AcpiGetTimerDuration)
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwvalid.c b/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
index 869c9f18fd..4462434bbf 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -149,6 +149,8 @@ AcpiHwValidateIoRequest (
(BitWidth != 16) &&
(BitWidth != 32))
{
+ ACPI_ERROR ((AE_INFO,
+ "Bad BitWidth parameter: %8.8X", BitWidth));
return (AE_BAD_PARAMETER);
}
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwxface.c b/usr/src/uts/intel/io/acpica/hardware/hwxface.c
index f26c511ec8..3fec9b566f 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwxface.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwxface.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -154,12 +154,6 @@ AcpiRead (
return (Status);
}
- Width = Reg->BitWidth;
- if (Width == 64)
- {
- Width = 32; /* Break into two 32-bit transfers */
- }
-
/* Initialize entire 64-bit return value to zero */
*ReturnValue = 0;
@@ -172,28 +166,20 @@ AcpiRead (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, &Value, Width);
+ Address, ReturnValue, Reg->BitWidth);
if (ACPI_FAILURE (Status))
{
return (Status);
}
- *ReturnValue = Value;
-
- if (Reg->BitWidth == 64)
- {
- /* Read the top 32 bits */
-
- Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- (Address + 4), &Value, 32);
- if (ACPI_FAILURE (Status))
- {
- return (Status);
- }
- *ReturnValue |= ((UINT64) Value << 32);
- }
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
+ Width = Reg->BitWidth;
+ if (Width == 64)
+ {
+ Width = 32; /* Break into two 32-bit transfers */
+ }
+
Status = AcpiHwReadPort ((ACPI_IO_ADDRESS)
Address, &Value, Width);
if (ACPI_FAILURE (Status))
@@ -262,12 +248,6 @@ AcpiWrite (
return (Status);
}
- Width = Reg->BitWidth;
- if (Width == 64)
- {
- Width = 32; /* Break into two 32-bit transfers */
- }
-
/*
* Two address spaces supported: Memory or IO. PCI_Config is
* not supported here because the GAS structure is insufficient
@@ -275,24 +255,20 @@ AcpiWrite (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, ACPI_LODWORD (Value), Width);
+ Address, Value, Reg->BitWidth);
if (ACPI_FAILURE (Status))
{
return (Status);
}
-
- if (Reg->BitWidth == 64)
- {
- Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- (Address + 4), ACPI_HIDWORD (Value), 32);
- if (ACPI_FAILURE (Status))
- {
- return (Status);
- }
- }
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
+ Width = Reg->BitWidth;
+ if (Width == 64)
+ {
+ Width = 32; /* Break into two 32-bit transfers */
+ }
+
Status = AcpiHwWritePort ((ACPI_IO_ADDRESS)
Address, ACPI_LODWORD (Value), Width);
if (ACPI_FAILURE (Status))
@@ -323,6 +299,7 @@ AcpiWrite (
ACPI_EXPORT_SYMBOL (AcpiWrite)
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiReadBitRegister
@@ -505,6 +482,8 @@ UnlockAndExit:
ACPI_EXPORT_SYMBOL (AcpiWriteBitRegister)
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/*******************************************************************************
*
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c b/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c
new file mode 100644
index 0000000000..b7cbbc813e
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c
@@ -0,0 +1,478 @@
+/******************************************************************************
+ *
+ * Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include "acpi.h"
+#include "accommon.h"
+
+#define _COMPONENT ACPI_HARDWARE
+ ACPI_MODULE_NAME ("hwxfsleep")
+
+/* Local prototypes */
+
+static ACPI_STATUS
+AcpiHwSleepDispatch (
+ UINT8 SleepState,
+ UINT8 Flags,
+ UINT32 FunctionId);
+
+/*
+ * Dispatch table used to efficiently branch to the various sleep
+ * functions.
+ */
+#define ACPI_SLEEP_FUNCTION_ID 0
+#define ACPI_WAKE_PREP_FUNCTION_ID 1
+#define ACPI_WAKE_FUNCTION_ID 2
+
+/* Legacy functions are optional, based upon ACPI_REDUCED_HARDWARE */
+
+static ACPI_SLEEP_FUNCTIONS AcpiSleepDispatch[] =
+{
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacySleep), AcpiHwExtendedSleep},
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacyWakePrep), AcpiHwExtendedWakePrep},
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacyWake), AcpiHwExtendedWake}
+};
+
+
+/*
+ * These functions are removed for the ACPI_REDUCED_HARDWARE case:
+ * AcpiSetFirmwareWakingVector
+ * AcpiSetFirmwareWakingVector64
+ * AcpiEnterSleepStateS4bios
+ */
+
+#if (!ACPI_REDUCED_HARDWARE)
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiSetFirmwareWakingVector
+ *
+ * PARAMETERS: PhysicalAddress - 32-bit physical address of ACPI real mode
+ * entry point.
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Sets the 32-bit FirmwareWakingVector field of the FACS
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiSetFirmwareWakingVector (
+ UINT32 PhysicalAddress)
+{
+ ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector);
+
+
+ /* Set the 32-bit vector */
+
+ AcpiGbl_FACS->FirmwareWakingVector = PhysicalAddress;
+
+ /* Clear the 64-bit vector if it exists */
+
+ if ((AcpiGbl_FACS->Length > 32) && (AcpiGbl_FACS->Version >= 1))
+ {
+ AcpiGbl_FACS->XFirmwareWakingVector = 0;
+ }
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector)
+
+
+#if ACPI_MACHINE_WIDTH == 64
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiSetFirmwareWakingVector64
+ *
+ * PARAMETERS: PhysicalAddress - 64-bit physical address of ACPI protected
+ * mode entry point.
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Sets the 64-bit X_FirmwareWakingVector field of the FACS, if
+ * it exists in the table. This function is intended for use with
+ * 64-bit host operating systems.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiSetFirmwareWakingVector64 (
+ UINT64 PhysicalAddress)
+{
+ ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector64);
+
+
+ /* Determine if the 64-bit vector actually exists */
+
+ if ((AcpiGbl_FACS->Length <= 32) || (AcpiGbl_FACS->Version < 1))
+ {
+ return_ACPI_STATUS (AE_NOT_EXIST);
+ }
+
+ /* Clear 32-bit vector, set the 64-bit X_ vector */
+
+ AcpiGbl_FACS->FirmwareWakingVector = 0;
+ AcpiGbl_FACS->XFirmwareWakingVector = PhysicalAddress;
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector64)
+#endif
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepStateS4bios
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform a S4 bios request.
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepStateS4bios (
+ void)
+{
+ UINT32 InValue;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepStateS4bios);
+
+
+ /* Clear the wake status bit (PM1) */
+
+ Status = AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiHwClearAcpiStatus ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * 1) Disable/Clear all GPEs
+ * 2) Enable all wakeup GPEs
+ */
+ Status = AcpiHwDisableAllGpes ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ AcpiGbl_SystemAwakeAndRunning = FALSE;
+
+ Status = AcpiHwEnableAllWakeupGpes ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ ACPI_FLUSH_CPU_CACHE ();
+
+ Status = AcpiHwWritePort (AcpiGbl_FADT.SmiCommand,
+ (UINT32) AcpiGbl_FADT.S4BiosRequest, 8);
+
+ do {
+ AcpiOsStall(1000);
+ Status = AcpiReadBitRegister (ACPI_BITREG_WAKE_STATUS, &InValue);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ } while (!InValue);
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepStateS4bios)
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwSleepDispatch
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter/exit
+ * FunctionId - Sleep, WakePrep, or Wake
+ *
+ * RETURN: Status from the invoked sleep handling function.
+ *
+ * DESCRIPTION: Dispatch a sleep/wake request to the appropriate handling
+ * function.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiHwSleepDispatch (
+ UINT8 SleepState,
+ UINT8 Flags,
+ UINT32 FunctionId)
+{
+ ACPI_STATUS Status;
+ ACPI_SLEEP_FUNCTIONS *SleepFunctions = &AcpiSleepDispatch[FunctionId];
+
+
+#if (!ACPI_REDUCED_HARDWARE)
+
+ /*
+ * If the Hardware Reduced flag is set (from the FADT), we must
+ * use the extended sleep registers
+ */
+ if (AcpiGbl_ReducedHardware ||
+ AcpiGbl_FADT.SleepControl.Address)
+ {
+ Status = SleepFunctions->ExtendedFunction (SleepState, Flags);
+ }
+ else
+ {
+ /* Legacy sleep */
+
+ Status = SleepFunctions->LegacyFunction (SleepState, Flags);
+ }
+
+ return (Status);
+
+#else
+ /*
+ * For the case where reduced-hardware-only code is being generated,
+ * we know that only the extended sleep registers are available
+ */
+ Status = SleepFunctions->ExtendedFunction (SleepState, Flags);
+ return (Status);
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepStatePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Prepare to enter a system sleep state.
+ * This function must execute with interrupts enabled.
+ * We break sleeping into 2 stages so that OSPM can handle
+ * various OS-specific tasks between the two steps.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepStatePrep (
+ UINT8 SleepState)
+{
+ ACPI_STATUS Status;
+ ACPI_OBJECT_LIST ArgList;
+ ACPI_OBJECT Arg;
+ UINT32 SstValue;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepStatePrep);
+
+
+ Status = AcpiGetSleepTypeData (SleepState,
+ &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Execute the _PTS method (Prepare To Sleep) */
+
+ ArgList.Count = 1;
+ ArgList.Pointer = &Arg;
+ Arg.Type = ACPI_TYPE_INTEGER;
+ Arg.Integer.Value = SleepState;
+
+ Status = AcpiEvaluateObject (NULL, METHOD_PATHNAME__PTS, &ArgList, NULL);
+ if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Setup the argument to the _SST method (System STatus) */
+
+ switch (SleepState)
+ {
+ case ACPI_STATE_S0:
+ SstValue = ACPI_SST_WORKING;
+ break;
+
+ case ACPI_STATE_S1:
+ case ACPI_STATE_S2:
+ case ACPI_STATE_S3:
+ SstValue = ACPI_SST_SLEEPING;
+ break;
+
+ case ACPI_STATE_S4:
+ SstValue = ACPI_SST_SLEEP_CONTEXT;
+ break;
+
+ default:
+ SstValue = ACPI_SST_INDICATOR_OFF; /* Default is off */
+ break;
+ }
+
+ /*
+ * Set the system indicators to show the desired sleep state.
+ * _SST is an optional method (return no error if not found)
+ */
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, SstValue);
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepStatePrep)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepState
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Enter a system sleep state
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepState (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepState);
+
+
+ if ((AcpiGbl_SleepTypeA > ACPI_SLEEP_TYPE_MAX) ||
+ (AcpiGbl_SleepTypeB > ACPI_SLEEP_TYPE_MAX))
+ {
+ ACPI_ERROR ((AE_INFO, "Sleep values out of range: A=0x%X B=0x%X",
+ AcpiGbl_SleepTypeA, AcpiGbl_SleepTypeB));
+ return_ACPI_STATUS (AE_AML_OPERAND_VALUE);
+ }
+
+ Status = AcpiHwSleepDispatch (SleepState, Flags, ACPI_SLEEP_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepState)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiLeaveSleepStatePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state we are exiting
+ * Flags - ACPI_EXECUTE_BFS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform the first state of OS-independent ACPI cleanup after a
+ * sleep. Called with interrupts DISABLED.
+ * We break wake/resume into 2 stages so that OSPM can handle
+ * various OS-specific tasks between the two steps.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiLeaveSleepStatePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiLeaveSleepStatePrep);
+
+
+ Status = AcpiHwSleepDispatch (SleepState, Flags, ACPI_WAKE_PREP_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiLeaveSleepStatePrep)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiLeaveSleepState
+ *
+ * PARAMETERS: SleepState - Which sleep state we are exiting
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiLeaveSleepState (
+ UINT8 SleepState)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiLeaveSleepState);
+
+
+ Status = AcpiHwSleepDispatch (SleepState, 0, ACPI_WAKE_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiLeaveSleepState)
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsaccess.c b/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
index 1bb2b4b6e6..9dfa76fb08 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsalloc.c b/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
index ac12ba4faf..c7e83e0eb4 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsdump.c b/usr/src/uts/intel/io/acpica/namespace/nsdump.c
index 58137abbc5..6b7c505779 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsdump.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsdump.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -270,7 +270,21 @@ AcpiNsDumpOneObject (
if (!ObjDesc)
{
- /* No attached object, we are done */
+ /* No attached object. Some types should always have an object */
+
+ switch (Type)
+ {
+ case ACPI_TYPE_INTEGER:
+ case ACPI_TYPE_PACKAGE:
+ case ACPI_TYPE_BUFFER:
+ case ACPI_TYPE_STRING:
+ case ACPI_TYPE_METHOD:
+ AcpiOsPrintf ("<No attached object>");
+ break;
+
+ default:
+ break;
+ }
AcpiOsPrintf ("\n");
return (AE_OK);
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c b/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
index 11aab7a3a9..9d30886608 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -141,7 +141,7 @@ AcpiNsDumpRootDevices (
return;
}
- Status = AcpiGetHandle (NULL, ACPI_NS_SYSTEM_BUS, &SysBusHandle);
+ Status = AcpiGetHandle (NULL, METHOD_NAME__SB_, &SysBusHandle);
if (ACPI_FAILURE (Status))
{
return;
diff --git a/usr/src/uts/intel/io/acpica/namespace/nseval.c b/usr/src/uts/intel/io/acpica/namespace/nseval.c
index c376ac7213..0183f7e774 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nseval.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nseval.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsinit.c b/usr/src/uts/intel/io/acpica/namespace/nsinit.c
index 1822bf519f..f3907aafb3 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsinit.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsload.c b/usr/src/uts/intel/io/acpica/namespace/nsload.c
index 5825f6bc9d..3ef33f9ce9 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsload.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsload.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsnames.c b/usr/src/uts/intel/io/acpica/namespace/nsnames.c
index e59b2e9569..f70900da1d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsnames.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsobject.c b/usr/src/uts/intel/io/acpica/namespace/nsobject.c
index dadd2e8d98..9b219a5624 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsobject.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsobject.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsparse.c b/usr/src/uts/intel/io/acpica/namespace/nsparse.c
index 983697eaaf..2c768c0b69 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsparse.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsparse.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nspredef.c b/usr/src/uts/intel/io/acpica/namespace/nspredef.c
index 95c3edf04d..3a61aa3666 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nspredef.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nspredef.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -216,14 +216,20 @@ AcpiNsCheckPredefinedNames (
}
/*
- * 1) We have a return value, but if one wasn't expected, just exit, this is
- * not a problem. For example, if the "Implicit Return" feature is
- * enabled, methods will always return a value.
+ * Return value validation and possible repair.
*
- * 2) If the return value can be of any type, then we cannot perform any
- * validation, exit.
+ * 1) Don't perform return value validation/repair if this feature
+ * has been disabled via a global option.
+ *
+ * 2) We have a return value, but if one wasn't expected, just exit,
+ * this is not a problem. For example, if the "Implicit Return"
+ * feature is enabled, methods will always return a value.
+ *
+ * 3) If the return value can be of any type, then we cannot perform
+ * any validation, just exit.
*/
- if ((!Predefined->Info.ExpectedBtypes) ||
+ if (AcpiGbl_DisableAutoRepair ||
+ (!Predefined->Info.ExpectedBtypes) ||
(Predefined->Info.ExpectedBtypes == ACPI_RTYPE_ALL))
{
goto Cleanup;
@@ -237,6 +243,7 @@ AcpiNsCheckPredefinedNames (
goto Cleanup;
}
Data->Predefined = Predefined;
+ Data->Node = Node;
Data->NodeFlags = Node->Flags;
Data->Pathname = Pathname;
@@ -658,6 +665,7 @@ AcpiNsCheckPackage (
case ACPI_PTYPE2_FIXED:
case ACPI_PTYPE2_MIN:
case ACPI_PTYPE2_COUNT:
+ case ACPI_PTYPE2_FIX_VAR:
/*
* These types all return a single Package that consists of a
@@ -673,7 +681,7 @@ AcpiNsCheckPackage (
{
/* Create the new outer package and populate it */
- Status = AcpiNsRepairPackageList (Data, ReturnObjectPtr);
+ Status = AcpiNsWrapWithPackage (Data, *Elements, ReturnObjectPtr);
if (ACPI_FAILURE (Status))
{
return (Status);
@@ -800,6 +808,29 @@ AcpiNsCheckPackageList (
break;
+ case ACPI_PTYPE2_FIX_VAR:
+ /*
+ * Each subpackage has a fixed number of elements and an
+ * optional element
+ */
+ ExpectedCount = Package->RetInfo.Count1 + Package->RetInfo.Count2;
+ if (SubPackage->Package.Count < ExpectedCount)
+ {
+ goto PackageTooSmall;
+ }
+
+ Status = AcpiNsCheckPackageElements (Data, SubElements,
+ Package->RetInfo.ObjectType1,
+ Package->RetInfo.Count1,
+ Package->RetInfo.ObjectType2,
+ SubPackage->Package.Count - Package->RetInfo.Count1, 0);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+ break;
+
+
case ACPI_PTYPE2_FIXED:
/* Each sub-package has a fixed length */
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsrepair.c b/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
index be73953ded..be9d3e889d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -74,11 +74,10 @@
* Buffer -> String
* Buffer -> Package of Integers
* Package -> Package of one Package
+ * An incorrect standalone object is wrapped with required outer package
*
* Additional possible repairs:
- *
* Required package elements that are NULL replaced by Integer/String/Buffer
- * Incorrect standalone package wrapped with required outer package
*
******************************************************************************/
@@ -100,11 +99,6 @@ AcpiNsConvertToBuffer (
ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ReturnObject);
-static ACPI_STATUS
-AcpiNsConvertToPackage (
- ACPI_OPERAND_OBJECT *OriginalObject,
- ACPI_OPERAND_OBJECT **ReturnObject);
-
/*******************************************************************************
*
@@ -172,10 +166,24 @@ AcpiNsRepairObject (
}
if (ExpectedBtypes & ACPI_RTYPE_PACKAGE)
{
- Status = AcpiNsConvertToPackage (ReturnObject, &NewObject);
+ /*
+ * A package is expected. We will wrap the existing object with a
+ * new package object. It is often the case that if a variable-length
+ * package is required, but there is only a single object needed, the
+ * BIOS will return that object instead of wrapping it with a Package
+ * object. Note: after the wrapping, the package will be validated
+ * for correct contents (expected object type or types).
+ */
+ Status = AcpiNsWrapWithPackage (Data, ReturnObject, &NewObject);
if (ACPI_SUCCESS (Status))
{
- goto ObjectRepaired;
+ /*
+ * The original object just had its reference count
+ * incremented for being inserted into the new package.
+ */
+ *ReturnObjectPtr = NewObject; /* New Package object */
+ Data->Flags |= ACPI_OBJECT_REPAIRED;
+ return (AE_OK);
}
}
@@ -188,24 +196,30 @@ ObjectRepaired:
/* Object was successfully repaired */
- /*
- * If the original object is a package element, we need to:
- * 1. Set the reference count of the new object to match the
- * reference count of the old object.
- * 2. Decrement the reference count of the original object.
- */
if (PackageIndex != ACPI_NOT_PACKAGE_ELEMENT)
{
- NewObject->Common.ReferenceCount =
- ReturnObject->Common.ReferenceCount;
-
- if (ReturnObject->Common.ReferenceCount > 1)
+ /*
+ * The original object is a package element. We need to
+ * decrement the reference count of the original object,
+ * for removing it from the package.
+ *
+ * However, if the original object was just wrapped with a
+ * package object as part of the repair, we don't need to
+ * change the reference count.
+ */
+ if (!(Data->Flags & ACPI_OBJECT_WRAPPED))
{
- ReturnObject->Common.ReferenceCount--;
+ NewObject->Common.ReferenceCount =
+ ReturnObject->Common.ReferenceCount;
+
+ if (ReturnObject->Common.ReferenceCount > 1)
+ {
+ ReturnObject->Common.ReferenceCount--;
+ }
}
ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
- "%s: Converted %s to expected %s at index %u\n",
+ "%s: Converted %s to expected %s at Package index %u\n",
Data->Pathname, AcpiUtGetObjectTypeName (ReturnObject),
AcpiUtGetObjectTypeName (NewObject), PackageIndex));
}
@@ -498,71 +512,6 @@ AcpiNsConvertToBuffer (
/*******************************************************************************
*
- * FUNCTION: AcpiNsConvertToPackage
- *
- * PARAMETERS: OriginalObject - Object to be converted
- * ReturnObject - Where the new converted object is returned
- *
- * RETURN: Status. AE_OK if conversion was successful.
- *
- * DESCRIPTION: Attempt to convert a Buffer object to a Package. Each byte of
- * the buffer is converted to a single integer package element.
- *
- ******************************************************************************/
-
-static ACPI_STATUS
-AcpiNsConvertToPackage (
- ACPI_OPERAND_OBJECT *OriginalObject,
- ACPI_OPERAND_OBJECT **ReturnObject)
-{
- ACPI_OPERAND_OBJECT *NewObject;
- ACPI_OPERAND_OBJECT **Elements;
- UINT32 Length;
- UINT8 *Buffer;
-
-
- switch (OriginalObject->Common.Type)
- {
- case ACPI_TYPE_BUFFER:
-
- /* Buffer-to-Package conversion */
-
- Length = OriginalObject->Buffer.Length;
- NewObject = AcpiUtCreatePackageObject (Length);
- if (!NewObject)
- {
- return (AE_NO_MEMORY);
- }
-
- /* Convert each buffer byte to an integer package element */
-
- Elements = NewObject->Package.Elements;
- Buffer = OriginalObject->Buffer.Pointer;
-
- while (Length--)
- {
- *Elements = AcpiUtCreateIntegerObject ((UINT64) *Buffer);
- if (!*Elements)
- {
- AcpiUtRemoveReference (NewObject);
- return (AE_NO_MEMORY);
- }
- Elements++;
- Buffer++;
- }
- break;
-
- default:
- return (AE_AML_OPERAND_TYPE);
- }
-
- *ReturnObject = NewObject;
- return (AE_OK);
-}
-
-
-/*******************************************************************************
- *
* FUNCTION: AcpiNsRepairNullElement
*
* PARAMETERS: Data - Pointer to validation data structure
@@ -696,6 +645,7 @@ AcpiNsRemoveNullElements (
case ACPI_PTYPE2_FIXED:
case ACPI_PTYPE2_MIN:
case ACPI_PTYPE2_REV_FIXED:
+ case ACPI_PTYPE2_FIX_VAR:
break;
default:
@@ -744,42 +694,43 @@ AcpiNsRemoveNullElements (
/*******************************************************************************
*
- * FUNCTION: AcpiNsRepairPackageList
+ * FUNCTION: AcpiNsWrapWithPackage
*
* PARAMETERS: Data - Pointer to validation data structure
- * ObjDescPtr - Pointer to the object to repair. The new
- * package object is returned here,
- * overwriting the old object.
+ * OriginalObject - Pointer to the object to repair.
+ * ObjDescPtr - The new package object is returned here
*
* RETURN: Status, new object in *ObjDescPtr
*
- * DESCRIPTION: Repair a common problem with objects that are defined to return
- * a variable-length Package of Packages. If the variable-length
- * is one, some BIOS code mistakenly simply declares a single
- * Package instead of a Package with one sub-Package. This
- * function attempts to repair this error by wrapping a Package
- * object around the original Package, creating the correct
- * Package with one sub-Package.
+ * DESCRIPTION: Repair a common problem with objects that are defined to
+ * return a variable-length Package of sub-objects. If there is
+ * only one sub-object, some BIOS code mistakenly simply declares
+ * the single object instead of a Package with one sub-object.
+ * This function attempts to repair this error by wrapping a
+ * Package object around the original object, creating the
+ * correct and expected Package with one sub-object.
*
* Names that can be repaired in this manner include:
- * _ALR, _CSD, _HPX, _MLS, _PRT, _PSS, _TRT, TSS
+ * _ALR, _CSD, _HPX, _MLS, _PLD, _PRT, _PSS, _TRT, _TSS,
+ * _BCL, _DOD, _FIX, _Sx
*
******************************************************************************/
ACPI_STATUS
-AcpiNsRepairPackageList (
+AcpiNsWrapWithPackage (
ACPI_PREDEFINED_DATA *Data,
+ ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ObjDescPtr)
{
ACPI_OPERAND_OBJECT *PkgObjDesc;
- ACPI_FUNCTION_NAME (NsRepairPackageList);
+ ACPI_FUNCTION_NAME (NsWrapWithPackage);
/*
* Create the new outer package and populate it. The new package will
- * have a single element, the lone subpackage.
+ * have a single element, the lone sub-object.
*/
PkgObjDesc = AcpiUtCreatePackageObject (1);
if (!PkgObjDesc)
@@ -787,15 +738,15 @@ AcpiNsRepairPackageList (
return (AE_NO_MEMORY);
}
- PkgObjDesc->Package.Elements[0] = *ObjDescPtr;
+ PkgObjDesc->Package.Elements[0] = OriginalObject;
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
+ "%s: Wrapped %s with expected Package object\n",
+ Data->Pathname, AcpiUtGetObjectTypeName (OriginalObject)));
/* Return the new object in the object pointer */
*ObjDescPtr = PkgObjDesc;
- Data->Flags |= ACPI_OBJECT_REPAIRED;
-
- ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
- "%s: Repaired incorrectly formed Package\n", Data->Pathname));
-
+ Data->Flags |= ACPI_OBJECT_REPAIRED | ACPI_OBJECT_WRAPPED;
return (AE_OK);
}
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c b/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
index 4c9c6d7825..a6a4d6874d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -516,11 +516,12 @@ AcpiNsRepair_HID (
}
/*
- * Copy and uppercase the string. From the ACPI specification:
+ * Copy and uppercase the string. From the ACPI 5.0 specification:
*
* A valid PNP ID must be of the form "AAA####" where A is an uppercase
* letter and # is a hex digit. A valid ACPI ID must be of the form
- * "ACPI####" where # is a hex digit.
+ * "NNNN####" where N is an uppercase letter or decimal digit, and
+ * # is a hex digit.
*/
for (Dest = NewString->String.Pointer; *Source; Dest++, Source++)
{
@@ -555,8 +556,23 @@ AcpiNsRepair_TSS (
{
ACPI_OPERAND_OBJECT *ReturnObject = *ReturnObjectPtr;
ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *Node;
+ /*
+ * We can only sort the _TSS return package if there is no _PSS in the
+ * same scope. This is because if _PSS is present, the ACPI specification
+ * dictates that the _TSS Power Dissipation field is to be ignored, and
+ * therefore some BIOSs leave garbage values in the _TSS Power field(s).
+ * In this case, it is best to just return the _TSS package as-is.
+ * (May, 2011)
+ */
+ Status = AcpiNsGetNode (Data->Node, "^_PSS", ACPI_NS_NO_UPSEARCH, &Node);
+ if (ACPI_SUCCESS (Status))
+ {
+ return (AE_OK);
+ }
+
Status = AcpiNsCheckSortedList (Data, ReturnObject, 5, 1,
ACPI_SORT_DESCENDING, "PowerDissipation");
diff --git a/usr/src/uts/intel/io/acpica/namespace/nssearch.c b/usr/src/uts/intel/io/acpica/namespace/nssearch.c
index 1951c2548b..d6f59ccbd1 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nssearch.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nssearch.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsutils.c b/usr/src/uts/intel/io/acpica/namespace/nsutils.c
index fad2a57c5e..43f734e819 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsutils.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsutils.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -405,7 +405,7 @@ AcpiNsBuildInternalName (
if (!AcpiNsValidPathSeparator (*ExternalName) &&
(*ExternalName != 0))
{
- return_ACPI_STATUS (AE_BAD_PARAMETER);
+ return_ACPI_STATUS (AE_BAD_PATHNAME);
}
/* Move on the next segment */
diff --git a/usr/src/uts/intel/io/acpica/namespace/nswalk.c b/usr/src/uts/intel/io/acpica/namespace/nswalk.c
index d44775212a..35c3e17896 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nswalk.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nswalk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c b/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
index 0854cac9e9..5006588e85 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfname.c b/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
index b0ccff5bdc..4690d09e90 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c b/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
index 9910248220..02f7777a39 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/osl.c b/usr/src/uts/intel/io/acpica/osl.c
index d5bfab754f..44f8d2f48f 100644
--- a/usr/src/uts/intel/io/acpica/osl.c
+++ b/usr/src/uts/intel/io/acpica/osl.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/*
* Copyright (c) 2009-2010, Intel Corporation.
@@ -315,6 +315,12 @@ AcpiOsTableOverride(ACPI_TABLE_HEADER *ExistingTable,
return (AE_OK);
}
+ACPI_STATUS
+AcpiOsPhysicalTableOverride(ACPI_TABLE_HEADER *ExistingTable,
+ ACPI_PHYSICAL_ADDRESS *NewAddress, UINT32 *NewTableLength)
+{
+ return (AE_SUPPORT);
+}
/*
* ACPI semaphore implementation
@@ -883,7 +889,7 @@ AcpiOsWritePort(ACPI_IO_ADDRESS Address, UINT32 Value, UINT32 Width)
static void
-osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
+osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT64 *Value,
UINT32 Width, int write)
{
size_t maplen = Width / 8;
@@ -902,6 +908,9 @@ osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
case 4:
OSL_RW(ptr, Value, uint32_t, write);
break;
+ case 8:
+ OSL_RW(ptr, Value, uint64_t, write);
+ break;
default:
cmn_err(CE_WARN, "!osl_rw_memory: invalid size %d",
Width);
@@ -913,7 +922,7 @@ osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
ACPI_STATUS
AcpiOsReadMemory(ACPI_PHYSICAL_ADDRESS Address,
- UINT32 *Value, UINT32 Width)
+ UINT64 *Value, UINT32 Width)
{
osl_rw_memory(Address, Value, Width, 0);
return (AE_OK);
@@ -921,7 +930,7 @@ AcpiOsReadMemory(ACPI_PHYSICAL_ADDRESS Address,
ACPI_STATUS
AcpiOsWriteMemory(ACPI_PHYSICAL_ADDRESS Address,
- UINT32 Value, UINT32 Width)
+ UINT64 Value, UINT32 Width)
{
osl_rw_memory(Address, &Value, Width, 1);
return (AE_OK);
diff --git a/usr/src/uts/intel/io/acpica/parser/psargs.c b/usr/src/uts/intel/io/acpica/parser/psargs.c
index e383def11e..8140fdbb9d 100644
--- a/usr/src/uts/intel/io/acpica/parser/psargs.c
+++ b/usr/src/uts/intel/io/acpica/parser/psargs.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -531,37 +531,57 @@ static ACPI_PARSE_OBJECT *
AcpiPsGetNextField (
ACPI_PARSE_STATE *ParserState)
{
- UINT32 AmlOffset = (UINT32)
- ACPI_PTR_DIFF (ParserState->Aml,
- ParserState->AmlStart);
+ UINT32 AmlOffset;
ACPI_PARSE_OBJECT *Field;
+ ACPI_PARSE_OBJECT *Arg = NULL;
UINT16 Opcode;
UINT32 Name;
+ UINT8 AccessType;
+ UINT8 AccessAttribute;
+ UINT8 AccessLength;
+ UINT32 PkgLength;
+ UINT8 *PkgEnd;
+ UINT32 BufferLength;
ACPI_FUNCTION_TRACE (PsGetNextField);
+ AmlOffset = (UINT32) ACPI_PTR_DIFF (
+ ParserState->Aml, ParserState->AmlStart);
+
/* Determine field type */
switch (ACPI_GET8 (ParserState->Aml))
{
- default:
+ case AML_FIELD_OFFSET_OP:
- Opcode = AML_INT_NAMEDFIELD_OP;
+ Opcode = AML_INT_RESERVEDFIELD_OP;
+ ParserState->Aml++;
break;
- case 0x00:
+ case AML_FIELD_ACCESS_OP:
- Opcode = AML_INT_RESERVEDFIELD_OP;
+ Opcode = AML_INT_ACCESSFIELD_OP;
ParserState->Aml++;
break;
- case 0x01:
+ case AML_FIELD_CONNECTION_OP:
- Opcode = AML_INT_ACCESSFIELD_OP;
+ Opcode = AML_INT_CONNECTION_OP;
+ ParserState->Aml++;
+ break;
+
+ case AML_FIELD_EXT_ACCESS_OP:
+
+ Opcode = AML_INT_EXTACCESSFIELD_OP;
ParserState->Aml++;
break;
+
+ default:
+
+ Opcode = AML_INT_NAMEDFIELD_OP;
+ break;
}
/* Allocate a new field op */
@@ -601,17 +621,118 @@ AcpiPsGetNextField (
case AML_INT_ACCESSFIELD_OP:
+ case AML_INT_EXTACCESSFIELD_OP:
/*
* Get AccessType and AccessAttrib and merge into the field Op
- * AccessType is first operand, AccessAttribute is second
+ * AccessType is first operand, AccessAttribute is second. stuff
+ * these bytes into the node integer value for convenience.
*/
- Field->Common.Value.Integer = (((UINT32) ACPI_GET8 (ParserState->Aml) << 8));
+
+ /* Get the two bytes (Type/Attribute) */
+
+ AccessType = ACPI_GET8 (ParserState->Aml);
ParserState->Aml++;
- Field->Common.Value.Integer |= ACPI_GET8 (ParserState->Aml);
+ AccessAttribute = ACPI_GET8 (ParserState->Aml);
ParserState->Aml++;
+
+ Field->Common.Value.Integer = (UINT8) AccessType;
+ Field->Common.Value.Integer |= (UINT16) (AccessAttribute << 8);
+
+ /* This opcode has a third byte, AccessLength */
+
+ if (Opcode == AML_INT_EXTACCESSFIELD_OP)
+ {
+ AccessLength = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml++;
+
+ Field->Common.Value.Integer |= (UINT32) (AccessLength << 16);
+ }
break;
+
+ case AML_INT_CONNECTION_OP:
+
+ /*
+ * Argument for Connection operator can be either a Buffer
+ * (resource descriptor), or a NameString.
+ */
+ if (ACPI_GET8 (ParserState->Aml) == AML_BUFFER_OP)
+ {
+ ParserState->Aml++;
+
+ PkgEnd = ParserState->Aml;
+ PkgLength = AcpiPsGetNextPackageLength (ParserState);
+ PkgEnd += PkgLength;
+
+ if (ParserState->Aml < PkgEnd)
+ {
+ /* Non-empty list */
+
+ Arg = AcpiPsAllocOp (AML_INT_BYTELIST_OP);
+ if (!Arg)
+ {
+ AcpiPsFreeOp (Field);
+ return_PTR (NULL);
+ }
+
+ /* Get the actual buffer length argument */
+
+ Opcode = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml++;
+
+ switch (Opcode)
+ {
+ case AML_BYTE_OP: /* AML_BYTEDATA_ARG */
+ BufferLength = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml += 1;
+ break;
+
+ case AML_WORD_OP: /* AML_WORDDATA_ARG */
+ BufferLength = ACPI_GET16 (ParserState->Aml);
+ ParserState->Aml += 2;
+ break;
+
+ case AML_DWORD_OP: /* AML_DWORDATA_ARG */
+ BufferLength = ACPI_GET32 (ParserState->Aml);
+ ParserState->Aml += 4;
+ break;
+
+ default:
+ BufferLength = 0;
+ break;
+ }
+
+ /* Fill in bytelist data */
+
+ Arg->Named.Value.Size = BufferLength;
+ Arg->Named.Data = ParserState->Aml;
+ }
+
+ /* Skip to End of byte data */
+
+ ParserState->Aml = PkgEnd;
+ }
+ else
+ {
+ Arg = AcpiPsAllocOp (AML_INT_NAMEPATH_OP);
+ if (!Arg)
+ {
+ AcpiPsFreeOp (Field);
+ return_PTR (NULL);
+ }
+
+ /* Get the Namestring argument */
+
+ Arg->Common.Value.Name = AcpiPsGetNextNamestring (ParserState);
+ }
+
+ /* Link the buffer/namestring to parent (CONNECTION_OP) */
+
+ AcpiPsAppendArg (Field, Arg);
+ break;
+
+
default:
/* Opcode was set in previous switch */
diff --git a/usr/src/uts/intel/io/acpica/parser/psloop.c b/usr/src/uts/intel/io/acpica/parser/psloop.c
index a98989e099..d578c7e36b 100644
--- a/usr/src/uts/intel/io/acpica/parser/psloop.c
+++ b/usr/src/uts/intel/io/acpica/parser/psloop.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psopcode.c b/usr/src/uts/intel/io/acpica/parser/psopcode.c
index 0d82976d7f..ef2257e86f 100644
--- a/usr/src/uts/intel/io/acpica/parser/psopcode.c
+++ b/usr/src/uts/intel/io/acpica/parser/psopcode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -328,12 +328,17 @@ const ACPI_OPCODE_INFO AcpiGbl_AmlOpInfo[AML_NUM_OPCODES] =
/* 79 */ ACPI_OP ("Mid", ARGP_MID_OP, ARGI_MID_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_3A_1T_1R, AML_FLAGS_EXEC_3A_1T_1R | AML_CONSTANT),
/* 7A */ ACPI_OP ("Continue", ARGP_CONTINUE_OP, ARGI_CONTINUE_OP, ACPI_TYPE_ANY, AML_CLASS_CONTROL, AML_TYPE_CONTROL, 0),
/* 7B */ ACPI_OP ("LoadTable", ARGP_LOAD_TABLE_OP, ARGI_LOAD_TABLE_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_6A_0T_1R, AML_FLAGS_EXEC_6A_0T_1R),
-/* 7C */ ACPI_OP ("DataTableRegion", ARGP_DATA_REGION_OP, ARGI_DATA_REGION_OP, ACPI_TYPE_REGION, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_COMPLEX, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE | AML_NAMED | AML_DEFER),
+/* 7C */ ACPI_OP ("DataTableRegion", ARGP_DATA_REGION_OP, ARGI_DATA_REGION_OP, ACPI_TYPE_REGION, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_COMPLEX, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE | AML_NAMED | AML_DEFER),
/* 7D */ ACPI_OP ("[EvalSubTree]", ARGP_SCOPE_OP, ARGI_SCOPE_OP, ACPI_TYPE_ANY, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_NO_OBJ, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE),
/* ACPI 3.0 opcodes */
-/* 7E */ ACPI_OP ("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R, AML_FLAGS_EXEC_0A_0T_1R)
+/* 7E */ ACPI_OP ("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R, AML_FLAGS_EXEC_0A_0T_1R),
+
+/* ACPI 5.0 opcodes */
+
+/* 7F */ ACPI_OP ("-ConnectField-", ARGP_CONNECTFIELD_OP, ARGI_CONNECTFIELD_OP, ACPI_TYPE_ANY, AML_CLASS_INTERNAL, AML_TYPE_BOGUS, AML_HAS_ARGS),
+/* 80 */ ACPI_OP ("-ExtAccessField-", ARGP_CONNECTFIELD_OP, ARGI_CONNECTFIELD_OP, ACPI_TYPE_ANY, AML_CLASS_INTERNAL, AML_TYPE_BOGUS, 0)
/*! [End] no source code translation !*/
};
@@ -353,7 +358,7 @@ static const UINT8 AcpiGbl_ShortOpIndex[256] =
/* 0x20 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
/* 0x28 */ _UNK, _UNK, _UNK, _UNK, _UNK, 0x63, _PFX, _PFX,
/* 0x30 */ 0x67, 0x66, 0x68, 0x65, 0x69, 0x64, 0x6A, 0x7D,
-/* 0x38 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
+/* 0x38 */ 0x7F, 0x80, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
/* 0x40 */ _UNK, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
/* 0x48 */ _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
/* 0x50 */ _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
diff --git a/usr/src/uts/intel/io/acpica/parser/psparse.c b/usr/src/uts/intel/io/acpica/parser/psparse.c
index f817749b75..bcf46adfe5 100644
--- a/usr/src/uts/intel/io/acpica/parser/psparse.c
+++ b/usr/src/uts/intel/io/acpica/parser/psparse.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psscope.c b/usr/src/uts/intel/io/acpica/parser/psscope.c
index 827531d460..1df5a33af3 100644
--- a/usr/src/uts/intel/io/acpica/parser/psscope.c
+++ b/usr/src/uts/intel/io/acpica/parser/psscope.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/pstree.c b/usr/src/uts/intel/io/acpica/parser/pstree.c
index d62bc7b9db..8271314e16 100644
--- a/usr/src/uts/intel/io/acpica/parser/pstree.c
+++ b/usr/src/uts/intel/io/acpica/parser/pstree.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -85,7 +85,12 @@ AcpiPsGetArg (
ACPI_FUNCTION_ENTRY ();
-
+/*
+ if (Op->Common.AmlOpcode == AML_INT_CONNECTION_OP)
+ {
+ return (Op->Common.Value.Arg);
+ }
+*/
/* Get the info structure for this opcode */
OpInfo = AcpiPsGetOpcodeInfo (Op->Common.AmlOpcode);
diff --git a/usr/src/uts/intel/io/acpica/parser/psutils.c b/usr/src/uts/intel/io/acpica/parser/psutils.c
index 426d2ccdd7..e28b75cc8f 100644
--- a/usr/src/uts/intel/io/acpica/parser/psutils.c
+++ b/usr/src/uts/intel/io/acpica/parser/psutils.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/pswalk.c b/usr/src/uts/intel/io/acpica/parser/pswalk.c
index dbb4a17fe9..9734c4d177 100644
--- a/usr/src/uts/intel/io/acpica/parser/pswalk.c
+++ b/usr/src/uts/intel/io/acpica/parser/pswalk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psxface.c b/usr/src/uts/intel/io/acpica/parser/psxface.c
index e7a5d9fe9a..2a012c2915 100644
--- a/usr/src/uts/intel/io/acpica/parser/psxface.c
+++ b/usr/src/uts/intel/io/acpica/parser/psxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsaddr.c b/usr/src/uts/intel/io/acpica/resources/rsaddr.c
index c11e8ca128..90e7d43b6e 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsaddr.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsaddr.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rscalc.c b/usr/src/uts/intel/io/acpica/resources/rscalc.c
index 15cf685857..845ed69c7d 100644
--- a/usr/src/uts/intel/io/acpica/resources/rscalc.c
+++ b/usr/src/uts/intel/io/acpica/resources/rscalc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -345,6 +345,26 @@ AcpiRsGetAmlLength (
break;
+ case ACPI_RESOURCE_TYPE_GPIO:
+
+ TotalSize = (ACPI_RS_LENGTH) (TotalSize + (Resource->Data.Gpio.PinTableLength * 2) +
+ Resource->Data.Gpio.ResourceSource.StringLength +
+ Resource->Data.Gpio.VendorLength);
+
+ break;
+
+
+ case ACPI_RESOURCE_TYPE_SERIAL_BUS:
+
+ TotalSize = AcpiGbl_AmlResourceSerialBusSizes [Resource->Data.CommonSerialBus.Type];
+
+ TotalSize = (ACPI_RS_LENGTH) (TotalSize +
+ Resource->Data.I2cSerialBus.ResourceSource.StringLength +
+ Resource->Data.I2cSerialBus.VendorLength);
+
+ break;
+
+
default:
break;
}
@@ -395,12 +415,13 @@ AcpiRsGetListLength (
UINT32 ExtraStructBytes;
UINT8 ResourceIndex;
UINT8 MinimumAmlResourceLength;
+ AML_RESOURCE *AmlResource;
ACPI_FUNCTION_TRACE (RsGetListLength);
- *SizeNeeded = 0;
+ *SizeNeeded = ACPI_RS_SIZE_MIN; /* Minimum size is one EndTag */
EndAml = AmlBuffer + AmlBufferLength;
/* Walk the list of AML resource descriptors */
@@ -412,9 +433,15 @@ AcpiRsGetListLength (
Status = AcpiUtValidateResource (AmlBuffer, &ResourceIndex);
if (ACPI_FAILURE (Status))
{
+ /*
+ * Exit on failure. Cannot continue because the descriptor length
+ * may be bogus also.
+ */
return_ACPI_STATUS (Status);
}
+ AmlResource = (void *) AmlBuffer;
+
/* Get the resource length and base (minimum) AML size */
ResourceLength = AcpiUtGetResourceLength (AmlBuffer);
@@ -460,10 +487,8 @@ AcpiRsGetListLength (
case ACPI_RESOURCE_NAME_END_TAG:
/*
- * End Tag:
- * This is the normal exit, add size of EndTag
+ * End Tag: This is the normal exit
*/
- *SizeNeeded += ACPI_RS_SIZE_MIN;
return_ACPI_STATUS (AE_OK);
@@ -494,6 +519,30 @@ AcpiRsGetListLength (
ResourceLength - ExtraStructBytes, MinimumAmlResourceLength);
break;
+ case ACPI_RESOURCE_NAME_GPIO:
+
+ /* Vendor data is optional */
+
+ if (AmlResource->Gpio.VendorLength)
+ {
+ ExtraStructBytes += AmlResource->Gpio.VendorOffset -
+ AmlResource->Gpio.PinTableOffset + AmlResource->Gpio.VendorLength;
+ }
+ else
+ {
+ ExtraStructBytes += AmlResource->LargeHeader.ResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER) -
+ AmlResource->Gpio.PinTableOffset;
+ }
+ break;
+
+ case ACPI_RESOURCE_NAME_SERIAL_BUS:
+
+ MinimumAmlResourceLength = AcpiGbl_ResourceAmlSerialBusSizes[
+ AmlResource->CommonSerialBus.Type];
+ ExtraStructBytes += AmlResource->CommonSerialBus.ResourceLength -
+ MinimumAmlResourceLength;
+ break;
default:
break;
@@ -505,8 +554,16 @@ AcpiRsGetListLength (
* Important: Round the size up for the appropriate alignment. This
* is a requirement on IA64.
*/
- BufferSize = AcpiGbl_ResourceStructSizes[ResourceIndex] +
+ if (AcpiUtGetResourceType (AmlBuffer) == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ BufferSize = AcpiGbl_ResourceStructSerialBusSizes[
+ AmlResource->CommonSerialBus.Type] + ExtraStructBytes;
+ }
+ else
+ {
+ BufferSize = AcpiGbl_ResourceStructSizes[ResourceIndex] +
ExtraStructBytes;
+ }
BufferSize = (UINT32) ACPI_ROUND_UP_TO_NATIVE_WORD (BufferSize);
*SizeNeeded += BufferSize;
diff --git a/usr/src/uts/intel/io/acpica/resources/rscreate.c b/usr/src/uts/intel/io/acpica/resources/rscreate.c
index 40cf447c29..f06ea0cb4d 100644
--- a/usr/src/uts/intel/io/acpica/resources/rscreate.c
+++ b/usr/src/uts/intel/io/acpica/resources/rscreate.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,79 @@
/*******************************************************************************
*
+ * FUNCTION: AcpiBufferToResource
+ *
+ * PARAMETERS: AmlBuffer - Pointer to the resource byte stream
+ * AmlBufferLength - Length of the AmlBuffer
+ * ResourcePtr - Where the converted resource is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Convert a raw AML buffer to a resource list
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiBufferToResource (
+ UINT8 *AmlBuffer,
+ UINT16 AmlBufferLength,
+ ACPI_RESOURCE **ResourcePtr)
+{
+ ACPI_STATUS Status;
+ ACPI_SIZE ListSizeNeeded;
+ void *Resource;
+ void *CurrentResourcePtr;
+
+ /*
+ * Note: we allow AE_AML_NO_RESOURCE_END_TAG, since an end tag
+ * is not required here.
+ */
+
+ /* Get the required length for the converted resource */
+
+ Status = AcpiRsGetListLength (AmlBuffer, AmlBufferLength,
+ &ListSizeNeeded);
+ if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ Status = AE_OK;
+ }
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Allocate a buffer for the converted resource */
+
+ Resource = ACPI_ALLOCATE_ZEROED (ListSizeNeeded);
+ CurrentResourcePtr = Resource;
+ if (!Resource)
+ {
+ return (AE_NO_MEMORY);
+ }
+
+ /* Perform the AML-to-Resource conversion */
+
+ Status = AcpiUtWalkAmlResources (AmlBuffer, AmlBufferLength,
+ AcpiRsConvertAmlToResources, &CurrentResourcePtr);
+ if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ Status = AE_OK;
+ }
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_FREE (Resource);
+ }
+ else
+ {
+ *ResourcePtr = Resource;
+ }
+
+ return (Status);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiRsCreateResourceList
*
* PARAMETERS: AmlBuffer - Pointer to the resource byte stream
diff --git a/usr/src/uts/intel/io/acpica/resources/rsdump.c b/usr/src/uts/intel/io/acpica/resources/rsdump.c
index 5470677447..68564739eb 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsdump.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsdump.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -91,6 +91,11 @@ AcpiRsDumpByteList (
UINT8 *Data);
static void
+AcpiRsDumpWordList (
+ UINT16 Length,
+ UINT16 *Data);
+
+static void
AcpiRsDumpDwordList (
UINT8 Length,
UINT32 *Data);
@@ -289,6 +294,87 @@ ACPI_RSDUMP_INFO AcpiRsDumpGenericReg[6] =
{ACPI_RSD_UINT64, ACPI_RSD_OFFSET (GenericReg.Address), "Address", NULL}
};
+ACPI_RSDUMP_INFO AcpiRsDumpGpio[16] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpGpio), "GPIO", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.RevisionId), "RevisionId", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.ConnectionType), "ConnectionType", AcpiGbl_CtDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (Gpio.ProducerConsumer), "ProducerConsumer", AcpiGbl_ConsumeDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.PinConfig), "PinConfig", AcpiGbl_PpcDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.Sharable), "Sharable", AcpiGbl_ShrDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.IoRestriction), "IoRestriction", AcpiGbl_IorDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (Gpio.Triggering), "Triggering", AcpiGbl_HeDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.Polarity), "Polarity", AcpiGbl_LlDecode},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.DriveStrength), "DriveStrength", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.DebounceTimeout), "DebounceTimeout", NULL},
+ {ACPI_RSD_SOURCE, ACPI_RSD_OFFSET (Gpio.ResourceSource), "ResourceSource", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.PinTableLength), "PinTableLength", NULL},
+ {ACPI_RSD_WORDLIST, ACPI_RSD_OFFSET (Gpio.PinTable), "PinTable", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.VendorLength), "VendorLength", NULL},
+ {ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (Gpio.VendorData), "VendorData", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpFixedDma[4] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpFixedDma), "FixedDma", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (FixedDma.RequestLines), "RequestLines", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (FixedDma.Channels), "Channels", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (FixedDma.Width), "TransferWidth", AcpiGbl_DtsDecode},
+};
+
+#define ACPI_RS_DUMP_COMMON_SERIAL_BUS \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.RevisionId), "RevisionId", NULL}, \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.Type), "Type", AcpiGbl_SbtDecode}, \
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (CommonSerialBus.ProducerConsumer), "ProducerConsumer", AcpiGbl_ConsumeDecode}, \
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (CommonSerialBus.SlaveMode), "SlaveMode", AcpiGbl_SmDecode}, \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.TypeRevisionId), "TypeRevisionId", NULL}, \
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (CommonSerialBus.TypeDataLength), "TypeDataLength", NULL}, \
+ {ACPI_RSD_SOURCE, ACPI_RSD_OFFSET (CommonSerialBus.ResourceSource), "ResourceSource", NULL}, \
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (CommonSerialBus.VendorLength), "VendorLength", NULL}, \
+ {ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (CommonSerialBus.VendorData), "VendorData", NULL},
+
+ACPI_RSDUMP_INFO AcpiRsDumpCommonSerialBus[10] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpCommonSerialBus), "Common Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpI2cSerialBus[13] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpI2cSerialBus), "I2C Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (I2cSerialBus.AccessMode), "AccessMode", AcpiGbl_AmDecode},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (I2cSerialBus.ConnectionSpeed), "ConnectionSpeed", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (I2cSerialBus.SlaveAddress), "SlaveAddress", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpSpiSerialBus[17] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpSpiSerialBus), "Spi Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (SpiSerialBus.WireMode), "WireMode", AcpiGbl_WmDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (SpiSerialBus.DevicePolarity), "DevicePolarity", AcpiGbl_DpDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.DataBitLength), "DataBitLength", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.ClockPhase), "ClockPhase", AcpiGbl_CphDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.ClockPolarity), "ClockPolarity", AcpiGbl_CpoDecode},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (SpiSerialBus.DeviceSelection), "DeviceSelection", NULL},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (SpiSerialBus.ConnectionSpeed), "ConnectionSpeed", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpUartSerialBus[19] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpUartSerialBus), "Uart Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.FlowControl), "FlowControl", AcpiGbl_FcDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.StopBits), "StopBits", AcpiGbl_SbDecode},
+ {ACPI_RSD_3BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.DataBits), "DataBits", AcpiGbl_BpbDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.Endian), "Endian", AcpiGbl_EdDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (UartSerialBus.Parity), "Parity", AcpiGbl_PtDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (UartSerialBus.LinesEnabled), "LinesEnabled", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (UartSerialBus.RxFifoSize), "RxFifoSize", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (UartSerialBus.TxFifoSize), "TxFifoSize", NULL},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (UartSerialBus.DefaultBaudRate), "ConnectionSpeed", NULL},
+};
/*
* Tables used for common address descriptor flag fields
@@ -391,7 +477,15 @@ AcpiRsDumpDescriptor (
/* Data items, 8/16/32/64 bit */
case ACPI_RSD_UINT8:
- AcpiRsOutInteger8 (Name, ACPI_GET8 (Target));
+ if (Table->Pointer)
+ {
+ AcpiRsOutString (Name, ACPI_CAST_PTR (char,
+ Table->Pointer [*Target]));
+ }
+ else
+ {
+ AcpiRsOutInteger8 (Name, ACPI_GET8 (Target));
+ }
break;
case ACPI_RSD_UINT16:
@@ -418,6 +512,11 @@ AcpiRsDumpDescriptor (
Table->Pointer [*Target & 0x03]));
break;
+ case ACPI_RSD_3BITFLAG:
+ AcpiRsOutString (Name, ACPI_CAST_PTR (char,
+ Table->Pointer [*Target & 0x07]));
+ break;
+
case ACPI_RSD_SHORTLIST:
/*
* Short byte list (single line output) for DMA and IRQ resources
@@ -430,6 +529,19 @@ AcpiRsDumpDescriptor (
}
break;
+ case ACPI_RSD_SHORTLISTX:
+ /*
+ * Short byte list (single line output) for GPIO vendor data
+ * Note: The list length is obtained from the previous table entry
+ */
+ if (PreviousTarget)
+ {
+ AcpiRsOutTitle (Name);
+ AcpiRsDumpShortByteList (*PreviousTarget,
+ *(ACPI_CAST_INDIRECT_PTR (UINT8, Target)));
+ }
+ break;
+
case ACPI_RSD_LONGLIST:
/*
* Long byte list for Vendor resource data
@@ -453,6 +565,18 @@ AcpiRsDumpDescriptor (
}
break;
+ case ACPI_RSD_WORDLIST:
+ /*
+ * Word list for GPIO Pin Table
+ * Note: The list length is obtained from the previous table entry
+ */
+ if (PreviousTarget)
+ {
+ AcpiRsDumpWordList (*PreviousTarget,
+ *(ACPI_CAST_INDIRECT_PTR (UINT16, Target)));
+ }
+ break;
+
case ACPI_RSD_ADDRESS:
/*
* Common flags for all Address resources
@@ -613,13 +737,20 @@ AcpiRsDumpResourceList (
/* Dump the resource descriptor */
- AcpiRsDumpDescriptor (&ResourceList->Data,
- AcpiGbl_DumpResourceDispatch[Type]);
+ if (Type == ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ {
+ AcpiRsDumpDescriptor (&ResourceList->Data,
+ AcpiGbl_DumpSerialBusDispatch[ResourceList->Data.CommonSerialBus.Type]);
+ }
+ else
+ {
+ AcpiRsDumpDescriptor (&ResourceList->Data,
+ AcpiGbl_DumpResourceDispatch[Type]);
+ }
/* Point to the next resource structure */
- ResourceList = ACPI_ADD_PTR (ACPI_RESOURCE, ResourceList,
- ResourceList->Length);
+ ResourceList = ACPI_NEXT_RESOURCE (ResourceList);
/* Exit when END_TAG descriptor is reached */
@@ -796,5 +927,20 @@ AcpiRsDumpDwordList (
}
}
+static void
+AcpiRsDumpWordList (
+ UINT16 Length,
+ UINT16 *Data)
+{
+ UINT16 i;
+
+
+ for (i = 0; i < Length; i++)
+ {
+ AcpiOsPrintf ("%25s%2.2X : %4.4X\n",
+ "Word", i, Data[i]);
+ }
+}
+
#endif
diff --git a/usr/src/uts/intel/io/acpica/resources/rsinfo.c b/usr/src/uts/intel/io/acpica/resources/rsinfo.c
index 5753667806..e7287d5455 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsinfo.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsinfo.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -82,7 +82,10 @@ ACPI_RSCONVERT_INFO *AcpiGbl_SetResourceDispatch[] =
AcpiRsConvertAddress64, /* 0x0D, ACPI_RESOURCE_TYPE_ADDRESS64 */
AcpiRsConvertExtAddress64, /* 0x0E, ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
AcpiRsConvertExtIrq, /* 0x0F, ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
- AcpiRsConvertGenericReg /* 0x10, ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsConvertGenericReg, /* 0x10, ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsConvertGpio, /* 0x11, ACPI_RESOURCE_TYPE_GPIO */
+ AcpiRsConvertFixedDma, /* 0x12, ACPI_RESOURCE_TYPE_FIXED_DMA */
+ NULL, /* 0x13, ACPI_RESOURCE_TYPE_SERIAL_BUS - Use subtype table below */
};
/* Dispatch tables for AML-to-resource (Get Resource) conversion functions */
@@ -101,7 +104,7 @@ ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[] =
AcpiRsConvertEndDpf, /* 0x07, ACPI_RESOURCE_NAME_END_DEPENDENT */
AcpiRsConvertIo, /* 0x08, ACPI_RESOURCE_NAME_IO */
AcpiRsConvertFixedIo, /* 0x09, ACPI_RESOURCE_NAME_FIXED_IO */
- NULL, /* 0x0A, Reserved */
+ AcpiRsConvertFixedDma, /* 0x0A, ACPI_RESOURCE_NAME_FIXED_DMA */
NULL, /* 0x0B, Reserved */
NULL, /* 0x0C, Reserved */
NULL, /* 0x0D, Reserved */
@@ -121,7 +124,20 @@ ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[] =
AcpiRsConvertAddress16, /* 0x08, ACPI_RESOURCE_NAME_ADDRESS16 */
AcpiRsConvertExtIrq, /* 0x09, ACPI_RESOURCE_NAME_EXTENDED_IRQ */
AcpiRsConvertAddress64, /* 0x0A, ACPI_RESOURCE_NAME_ADDRESS64 */
- AcpiRsConvertExtAddress64 /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 */
+ AcpiRsConvertExtAddress64, /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 */
+ AcpiRsConvertGpio, /* 0x0C, ACPI_RESOURCE_NAME_GPIO */
+ NULL, /* 0x0D, Reserved */
+ NULL, /* 0x0E, ACPI_RESOURCE_NAME_SERIAL_BUS - Use subtype table below */
+};
+
+/* Subtype table for SerialBus -- I2C, SPI, and UART */
+
+ACPI_RSCONVERT_INFO *AcpiGbl_ConvertResourceSerialBusDispatch[] =
+{
+ NULL,
+ AcpiRsConvertI2cSerialBus,
+ AcpiRsConvertSpiSerialBus,
+ AcpiRsConvertUartSerialBus,
};
@@ -148,6 +164,17 @@ ACPI_RSDUMP_INFO *AcpiGbl_DumpResourceDispatch[] =
AcpiRsDumpExtAddress64, /* ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
AcpiRsDumpExtIrq, /* ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
AcpiRsDumpGenericReg, /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsDumpGpio, /* ACPI_RESOURCE_TYPE_GPIO */
+ AcpiRsDumpFixedDma, /* ACPI_RESOURCE_TYPE_FIXED_DMA */
+ NULL, /* ACPI_RESOURCE_TYPE_SERIAL_BUS */
+};
+
+ACPI_RSDUMP_INFO *AcpiGbl_DumpSerialBusDispatch[] =
+{
+ NULL,
+ AcpiRsDumpI2cSerialBus, /* AML_RESOURCE_I2C_BUS_TYPE */
+ AcpiRsDumpSpiSerialBus, /* AML_RESOURCE_SPI_BUS_TYPE */
+ AcpiRsDumpUartSerialBus, /* AML_RESOURCE_UART_BUS_TYPE */
};
#endif
@@ -175,7 +202,10 @@ const UINT8 AcpiGbl_AmlResourceSizes[] =
sizeof (AML_RESOURCE_ADDRESS64), /* ACPI_RESOURCE_TYPE_ADDRESS64 */
sizeof (AML_RESOURCE_EXTENDED_ADDRESS64),/*ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
sizeof (AML_RESOURCE_EXTENDED_IRQ), /* ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
- sizeof (AML_RESOURCE_GENERIC_REGISTER) /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ sizeof (AML_RESOURCE_GENERIC_REGISTER), /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ sizeof (AML_RESOURCE_GPIO), /* ACPI_RESOURCE_TYPE_GPIO */
+ sizeof (AML_RESOURCE_FIXED_DMA), /* ACPI_RESOURCE_TYPE_FIXED_DMA */
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS), /* ACPI_RESOURCE_TYPE_SERIAL_BUS */
};
@@ -193,7 +223,7 @@ const UINT8 AcpiGbl_ResourceStructSizes[] =
ACPI_RS_SIZE_MIN,
ACPI_RS_SIZE (ACPI_RESOURCE_IO),
ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_IO),
- 0,
+ ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_DMA),
0,
0,
0,
@@ -213,6 +243,23 @@ const UINT8 AcpiGbl_ResourceStructSizes[] =
ACPI_RS_SIZE (ACPI_RESOURCE_ADDRESS16),
ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_IRQ),
ACPI_RS_SIZE (ACPI_RESOURCE_ADDRESS64),
- ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_ADDRESS64)
+ ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_ADDRESS64),
+ ACPI_RS_SIZE (ACPI_RESOURCE_GPIO),
+ ACPI_RS_SIZE (ACPI_RESOURCE_COMMON_SERIALBUS)
+};
+
+const UINT8 AcpiGbl_AmlResourceSerialBusSizes[] =
+{
+ 0,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS),
+ sizeof (AML_RESOURCE_SPI_SERIALBUS),
+ sizeof (AML_RESOURCE_UART_SERIALBUS),
};
+const UINT8 AcpiGbl_ResourceStructSerialBusSizes[] =
+{
+ 0,
+ ACPI_RS_SIZE (ACPI_RESOURCE_I2C_SERIALBUS),
+ ACPI_RS_SIZE (ACPI_RESOURCE_SPI_SERIALBUS),
+ ACPI_RS_SIZE (ACPI_RESOURCE_UART_SERIALBUS),
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rsio.c b/usr/src/uts/intel/io/acpica/resources/rsio.c
index 2d0f9f7baa..065108cfa1 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsio.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsio.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsirq.c b/usr/src/uts/intel/io/acpica/resources/rsirq.c
index ff952f96ff..c2a88ee89a 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsirq.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsirq.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -274,3 +274,35 @@ ACPI_RSCONVERT_INFO AcpiRsConvertDma[6] =
ACPI_RS_OFFSET (Data.Dma.ChannelCount)}
};
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertFixedDma
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertFixedDma[4] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_FIXED_DMA,
+ ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_DMA),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertFixedDma)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_FIXED_DMA,
+ sizeof (AML_RESOURCE_FIXED_DMA),
+ 0},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * RequestLines
+ * Channels
+ */
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.FixedDma.RequestLines),
+ AML_OFFSET (FixedDma.RequestLines),
+ 2},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.FixedDma.Width),
+ AML_OFFSET (FixedDma.Width),
+ 1},
+
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rslist.c b/usr/src/uts/intel/io/acpica/resources/rslist.c
index c64065f2ca..187d56cd42 100644
--- a/usr/src/uts/intel/io/acpica/resources/rslist.c
+++ b/usr/src/uts/intel/io/acpica/resources/rslist.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,6 +77,8 @@ AcpiRsConvertAmlToResources (
ACPI_RESOURCE **ResourcePtr = ACPI_CAST_INDIRECT_PTR (
ACPI_RESOURCE, Context);
ACPI_RESOURCE *Resource;
+ AML_RESOURCE *AmlResource;
+ ACPI_RSCONVERT_INFO *ConversionTable;
ACPI_STATUS Status;
@@ -94,11 +96,42 @@ AcpiRsConvertAmlToResources (
"Misaligned resource pointer %p", Resource));
}
- /* Convert the AML byte stream resource to a local resource struct */
+ /* Get the appropriate conversion info table */
+
+ AmlResource = ACPI_CAST_PTR (AML_RESOURCE, Aml);
+ if (AcpiUtGetResourceType (Aml) == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ if (AmlResource->CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE)
+ {
+ ConversionTable = NULL;
+ }
+ else
+ {
+ /* This is an I2C, SPI, or UART SerialBus descriptor */
+
+ ConversionTable =
+ AcpiGbl_ConvertResourceSerialBusDispatch[
+ AmlResource->CommonSerialBus.Type];
+ }
+ }
+ else
+ {
+ ConversionTable =
+ AcpiGbl_GetResourceDispatch[ResourceIndex];
+ }
+
+ if (!ConversionTable)
+ {
+ ACPI_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ ResourceIndex));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
+
+ /* Convert the AML byte stream resource to a local resource struct */
Status = AcpiRsConvertAmlToResource (
- Resource, ACPI_CAST_PTR (AML_RESOURCE, Aml),
- AcpiGbl_GetResourceDispatch[ResourceIndex]);
+ Resource, AmlResource, ConversionTable);
if (ACPI_FAILURE (Status))
{
ACPI_EXCEPTION ((AE_INFO, Status,
@@ -113,7 +146,7 @@ AcpiRsConvertAmlToResources (
/* Point to the next structure in the output buffer */
- *ResourcePtr = ACPI_ADD_PTR (void, Resource, Resource->Length);
+ *ResourcePtr = ACPI_NEXT_RESOURCE (Resource);
return_ACPI_STATUS (AE_OK);
}
@@ -145,6 +178,7 @@ AcpiRsConvertResourcesToAml (
{
UINT8 *Aml = OutputBuffer;
UINT8 *EndAml = OutputBuffer + AmlSizeNeeded;
+ ACPI_RSCONVERT_INFO *ConversionTable;
ACPI_STATUS Status;
@@ -167,9 +201,36 @@ AcpiRsConvertResourcesToAml (
/* Perform the conversion */
+ if (Resource->Type == ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ {
+ if (Resource->Data.CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE)
+ {
+ ConversionTable = NULL;
+ }
+ else
+ {
+ /* This is an I2C, SPI, or UART SerialBus descriptor */
+
+ ConversionTable = AcpiGbl_ConvertResourceSerialBusDispatch[
+ Resource->Data.CommonSerialBus.Type];
+ }
+ }
+ else
+ {
+ ConversionTable = AcpiGbl_SetResourceDispatch[Resource->Type];
+ }
+
+ if (!ConversionTable)
+ {
+ ACPI_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ Resource->Type));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
+
Status = AcpiRsConvertResourceToAml (Resource,
- ACPI_CAST_PTR (AML_RESOURCE, Aml),
- AcpiGbl_SetResourceDispatch[Resource->Type]);
+ ACPI_CAST_PTR (AML_RESOURCE, Aml),
+ ConversionTable);
if (ACPI_FAILURE (Status))
{
ACPI_EXCEPTION ((AE_INFO, Status,
@@ -204,7 +265,7 @@ AcpiRsConvertResourcesToAml (
/* Point to the next input resource descriptor */
- Resource = ACPI_ADD_PTR (ACPI_RESOURCE, Resource, Resource->Length);
+ Resource = ACPI_NEXT_RESOURCE (Resource);
}
/* Completed buffer, but did not find an EndTag resource descriptor */
diff --git a/usr/src/uts/intel/io/acpica/resources/rsmemory.c b/usr/src/uts/intel/io/acpica/resources/rsmemory.c
index 06262fa64b..83d131956f 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsmemory.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsmemory.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsmisc.c b/usr/src/uts/intel/io/acpica/resources/rsmisc.c
index 45415faacc..dfad696ca1 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsmisc.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsmisc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,11 @@ AcpiRsConvertAmlToResource (
ACPI_FUNCTION_TRACE (RsConvertAmlToResource);
+ if (!Info)
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
if (((ACPI_SIZE) Resource) & 0x3)
{
/* Each internal resource struct is expected to be 32-bit aligned */
@@ -112,7 +117,6 @@ AcpiRsConvertAmlToResource (
* table length (# of table entries)
*/
Count = INIT_TABLE_LENGTH (Info);
-
while (Count)
{
/*
@@ -162,6 +166,15 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_3BITFLAG:
+ /*
+ * Mask and shift the flag bits
+ */
+ ACPI_SET8 (Destination) = (UINT8)
+ ((ACPI_GET8 (Source) >> Info->Value) & 0x07);
+ break;
+
+
case ACPI_RSC_COUNT:
ItemCount = ACPI_GET8 (Source);
@@ -182,6 +195,75 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_COUNT_GPIO_PIN:
+
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ItemCount = ACPI_GET16 (Target) - ACPI_GET16 (Source);
+
+ Resource->Length = Resource->Length + ItemCount;
+ ItemCount = ItemCount / 2;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_VEN:
+
+ ItemCount = ACPI_GET8 (Source);
+ ACPI_SET8 (Destination) = (UINT8) ItemCount;
+
+ Resource->Length = Resource->Length +
+ (Info->Value * ItemCount);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_RES:
+
+ /*
+ * Vendor data is optional (length/offset may both be zero)
+ * Examine vendor data length field first
+ */
+ Target = ACPI_ADD_PTR (void, Aml, (Info->Value + 2));
+ if (ACPI_GET16 (Target))
+ {
+ /* Use vendor offset to get resource source length */
+
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ItemCount = ACPI_GET16 (Target) - ACPI_GET16 (Source);
+ }
+ else
+ {
+ /* No vendor data to worry about */
+
+ ItemCount = Aml->LargeHeader.ResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER) -
+ ACPI_GET16 (Source);
+ }
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_VEN:
+
+ ItemCount = ACPI_GET16 (Source) - Info->Value;
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_RES:
+
+ ItemCount = (AmlResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER)) -
+ ACPI_GET16 (Source) - Info->Value;
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
case ACPI_RSC_LENGTH:
Resource->Length = Resource->Length + Info->Value;
@@ -204,6 +286,66 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_MOVE_GPIO_PIN:
+
+ /* Generate and set the PIN data pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount * 2));
+ *(UINT16 **) Destination = ACPI_CAST_PTR (UINT16, Target);
+
+ /* Copy the PIN data */
+
+ Source = ACPI_ADD_PTR (void, Aml, ACPI_GET16 (Source));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_GPIO_RES:
+
+ /* Generate and set the ResourceSource string pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the ResourceSource string */
+
+ Source = ACPI_ADD_PTR (void, Aml, ACPI_GET16 (Source));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+
+ /* Generate and set the Vendor Data pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the Vendor Data */
+
+ Source = ACPI_ADD_PTR (void, Aml, Info->Value);
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_RES:
+
+ /* Generate and set the ResourceSource string pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the ResourceSource string */
+
+ Source = ACPI_ADD_PTR (void, Aml, (ACPI_GET16 (Source) + Info->Value));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
case ACPI_RSC_SET8:
ACPI_MEMSET (Destination, Info->AmlOffset, Info->Value);
@@ -243,11 +385,12 @@ AcpiRsConvertAmlToResource (
* Optional ResourceSource (Index and String). This is the more
* complicated case used by the Interrupt() macro
*/
- Target = ACPI_ADD_PTR (char, Resource, Info->AmlOffset + (ItemCount * 4));
+ Target = ACPI_ADD_PTR (char, Resource,
+ Info->AmlOffset + (ItemCount * 4));
Resource->Length +=
- AcpiRsGetResourceSource (AmlResourceLength,
- (ACPI_RS_LENGTH) (((ItemCount - 1) * sizeof (UINT32)) + Info->Value),
+ AcpiRsGetResourceSource (AmlResourceLength, (ACPI_RS_LENGTH)
+ (((ItemCount - 1) * sizeof (UINT32)) + Info->Value),
Destination, Aml, Target);
break;
@@ -356,6 +499,7 @@ AcpiRsConvertResourceToAml (
{
void *Source = NULL;
void *Destination;
+ char *Target;
ACPI_RSDESC_SIZE AmlLength = 0;
UINT8 Count;
UINT16 Temp16 = 0;
@@ -365,6 +509,11 @@ AcpiRsConvertResourceToAml (
ACPI_FUNCTION_TRACE (RsConvertResourceToAml);
+ if (!Info)
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
/*
* First table entry must be ACPI_RSC_INITxxx and must contain the
* table length (# of table entries)
@@ -420,6 +569,15 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_3BITFLAG:
+ /*
+ * Mask and shift the flag bits
+ */
+ ACPI_SET8 (Destination) |= (UINT8)
+ ((ACPI_GET8 (Source) & 0x07) << Info->Value);
+ break;
+
+
case ACPI_RSC_COUNT:
ItemCount = ACPI_GET8 (Source);
@@ -437,6 +595,68 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_COUNT_GPIO_PIN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) AmlLength;
+
+ AmlLength = (UINT16) (AmlLength + ItemCount * 2);
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ACPI_SET16 (Target) = (UINT16) AmlLength;
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_VEN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) ItemCount;
+
+ AmlLength = (UINT16) (AmlLength + (Info->Value * ItemCount));
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_RES:
+
+ /* Set resource source string length */
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) AmlLength;
+
+ /* Compute offset for the Vendor Data */
+
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+
+ /* Set vendor offset only if there is vendor data */
+
+ if (Resource->Data.Gpio.VendorLength)
+ {
+ ACPI_SET16 (Target) = (UINT16) AmlLength;
+ }
+
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_VEN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = ItemCount + Info->Value;
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_RES:
+
+ ItemCount = ACPI_GET16 (Source);
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
case ACPI_RSC_LENGTH:
AcpiRsSetResourceLength (Info->Value, Aml);
@@ -456,6 +676,44 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_MOVE_GPIO_PIN:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ ACPI_GET16 (Destination));
+ Source = * (UINT16 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_GPIO_RES:
+
+ /* Used for both ResourceSource string and VendorData */
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ ACPI_GET16 (Destination));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ (AmlLength - ItemCount));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_RES:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ (AmlLength - ItemCount));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
case ACPI_RSC_ADDRESS:
/* Set the Resource Type, General Flags, and Type-Specific Flags */
diff --git a/usr/src/uts/intel/io/acpica/resources/rsserial.c b/usr/src/uts/intel/io/acpica/resources/rsserial.c
new file mode 100644
index 0000000000..3a7784e96c
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/resources/rsserial.c
@@ -0,0 +1,425 @@
+/*******************************************************************************
+ *
+ * Module Name: rsserial - GPIO/SerialBus resource descriptors
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __RSIRQ_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acresrc.h"
+
+#define _COMPONENT ACPI_RESOURCES
+ ACPI_MODULE_NAME ("rsserial")
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertGpio
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertGpio[17] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_GPIO,
+ ACPI_RS_SIZE (ACPI_RESOURCE_GPIO),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertGpio)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_GPIO,
+ sizeof (AML_RESOURCE_GPIO),
+ 0},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * RevisionId
+ * ConnectionType
+ */
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.RevisionId),
+ AML_OFFSET (Gpio.RevisionId),
+ 2},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.Gpio.ProducerConsumer),
+ AML_OFFSET (Gpio.Flags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Sharable),
+ AML_OFFSET (Gpio.IntFlags),
+ 3},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.IoRestriction),
+ AML_OFFSET (Gpio.IntFlags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Triggering),
+ AML_OFFSET (Gpio.IntFlags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Polarity),
+ AML_OFFSET (Gpio.IntFlags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.PinConfig),
+ AML_OFFSET (Gpio.PinConfig),
+ 1},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * DriveStrength
+ * DebounceTimeout
+ */
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.Gpio.DriveStrength),
+ AML_OFFSET (Gpio.DriveStrength),
+ 2},
+
+ /* Pin Table */
+
+ {ACPI_RSC_COUNT_GPIO_PIN, ACPI_RS_OFFSET (Data.Gpio.PinTableLength),
+ AML_OFFSET (Gpio.PinTableOffset),
+ AML_OFFSET (Gpio.ResSourceOffset)},
+
+ {ACPI_RSC_MOVE_GPIO_PIN, ACPI_RS_OFFSET (Data.Gpio.PinTable),
+ AML_OFFSET (Gpio.PinTableOffset),
+ 0},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.Index),
+ AML_OFFSET (Gpio.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.StringLength),
+ AML_OFFSET (Gpio.ResSourceOffset),
+ AML_OFFSET (Gpio.VendorOffset)},
+
+ {ACPI_RSC_MOVE_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.StringPtr),
+ AML_OFFSET (Gpio.ResSourceOffset),
+ 0},
+
+ /* Vendor Data */
+
+ {ACPI_RSC_COUNT_GPIO_VEN, ACPI_RS_OFFSET (Data.Gpio.VendorLength),
+ AML_OFFSET (Gpio.VendorLength),
+ 1},
+
+ {ACPI_RSC_MOVE_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.VendorData),
+ AML_OFFSET (Gpio.VendorOffset),
+ 0},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertI2cSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertI2cSerialBus[16] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_I2C_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertI2cSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_I2C_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* I2C bus type specific */
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.I2cSerialBus.AccessMode),
+ AML_OFFSET (I2cSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.I2cSerialBus.ConnectionSpeed),
+ AML_OFFSET (I2cSerialBus.ConnectionSpeed),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.I2cSerialBus.SlaveAddress),
+ AML_OFFSET (I2cSerialBus.SlaveAddress),
+ 1},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertSpiSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertSpiSerialBus[20] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_SPI_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertSpiSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_SPI_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* Spi bus type specific */
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.SpiSerialBus.WireMode),
+ AML_OFFSET (SpiSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.SpiSerialBus.DevicePolarity),
+ AML_OFFSET (SpiSerialBus.TypeSpecificFlags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.DataBitLength),
+ AML_OFFSET (SpiSerialBus.DataBitLength),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.ClockPhase),
+ AML_OFFSET (SpiSerialBus.ClockPhase),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.ClockPolarity),
+ AML_OFFSET (SpiSerialBus.ClockPolarity),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.SpiSerialBus.DeviceSelection),
+ AML_OFFSET (SpiSerialBus.DeviceSelection),
+ 1},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.SpiSerialBus.ConnectionSpeed),
+ AML_OFFSET (SpiSerialBus.ConnectionSpeed),
+ 1},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertUartSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertUartSerialBus[22] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_UART_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertUartSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_UART_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_UART_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_UART_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* Uart bus type specific */
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.FlowControl),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.StopBits),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 2},
+
+ {ACPI_RSC_3BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.DataBits),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 4},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.Endian),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 7},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.UartSerialBus.Parity),
+ AML_OFFSET (UartSerialBus.Parity),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.UartSerialBus.LinesEnabled),
+ AML_OFFSET (UartSerialBus.LinesEnabled),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.UartSerialBus.RxFifoSize),
+ AML_OFFSET (UartSerialBus.RxFifoSize),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.UartSerialBus.TxFifoSize),
+ AML_OFFSET (UartSerialBus.TxFifoSize),
+ 1},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.UartSerialBus.DefaultBaudRate),
+ AML_OFFSET (UartSerialBus.DefaultBaudRate),
+ 1},
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rsutils.c b/usr/src/uts/intel/io/acpica/resources/rsutils.c
index 400bbcd91f..062678651e 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsutils.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -173,6 +173,9 @@ AcpiRsMoveData (
* since there are no alignment or endian issues
*/
case ACPI_RSC_MOVE8:
+ case ACPI_RSC_MOVE_GPIO_RES:
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+ case ACPI_RSC_MOVE_SERIAL_RES:
ACPI_MEMCPY (Destination, Source, ItemCount);
return;
@@ -182,6 +185,7 @@ AcpiRsMoveData (
* misaligned memory transfers
*/
case ACPI_RSC_MOVE16:
+ case ACPI_RSC_MOVE_GPIO_PIN:
ACPI_MOVE_16_TO_16 (&ACPI_CAST_PTR (UINT16, Destination)[i],
&ACPI_CAST_PTR (UINT16, Source)[i]);
break;
@@ -653,6 +657,61 @@ AcpiRsGetPrsMethodData (
/*******************************************************************************
*
+ * FUNCTION: AcpiRsGetAeiMethodData
+ *
+ * PARAMETERS: Node - Device node
+ * RetBuffer - Pointer to a buffer structure for the
+ * results
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: This function is called to get the _AEI value of an object
+ * contained in an object specified by the handle passed in
+ *
+ * If the function fails an appropriate status will be returned
+ * and the contents of the callers buffer is undefined.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiRsGetAeiMethodData (
+ ACPI_NAMESPACE_NODE *Node,
+ ACPI_BUFFER *RetBuffer)
+{
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (RsGetAeiMethodData);
+
+
+ /* Parameters guaranteed valid by caller */
+
+ /* Execute the method, no parameters */
+
+ Status = AcpiUtEvaluateObject (Node, METHOD_NAME__AEI,
+ ACPI_BTYPE_BUFFER, &ObjDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * Make the call to create a resource linked list from the
+ * byte stream buffer that comes back from the _CRS method
+ * execution.
+ */
+ Status = AcpiRsCreateResourceList (ObjDesc, RetBuffer);
+
+ /* On exit, we must delete the object returned by evaluateObject */
+
+ AcpiUtRemoveReference (ObjDesc);
+ return_ACPI_STATUS (Status);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiRsGetMethodData
*
* PARAMETERS: Handle - Handle to the containing object
diff --git a/usr/src/uts/intel/io/acpica/resources/rsxface.c b/usr/src/uts/intel/io/acpica/resources/rsxface.c
index 0aea206b73..d8c3938b31 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsxface.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsxface.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -351,6 +351,52 @@ AcpiSetCurrentResources (
ACPI_EXPORT_SYMBOL (AcpiSetCurrentResources)
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiGetEventResources
+ *
+ * PARAMETERS: DeviceHandle - Handle to the device object for the
+ * device we are getting resources
+ * InBuffer - Pointer to a buffer containing the
+ * resources to be set for the device
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: This function is called to get the event resources for a
+ * specific device. The caller must first acquire a handle for
+ * the desired device. The resource data is passed to the routine
+ * the buffer pointed to by the InBuffer variable. Uses the
+ * _AEI method.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiGetEventResources (
+ ACPI_HANDLE DeviceHandle,
+ ACPI_BUFFER *RetBuffer)
+{
+ ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *Node;
+
+
+ ACPI_FUNCTION_TRACE (AcpiGetEventResources);
+
+
+ /* Validate parameters then dispatch to internal routine */
+
+ Status = AcpiRsValidateParameters (DeviceHandle, RetBuffer, &Node);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiRsGetAeiMethodData (Node, RetBuffer);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiGetEventResources)
+
+
/******************************************************************************
*
* FUNCTION: AcpiResourceToAddress64
@@ -544,8 +590,9 @@ AcpiRsMatchVendorResource (
*
* PARAMETERS: DeviceHandle - Handle to the device object for the
* device we are querying
- * Name - Method name of the resources we want
- * (METHOD_NAME__CRS or METHOD_NAME__PRS)
+ * Name - Method name of the resources we want.
+ * (METHOD_NAME__CRS, METHOD_NAME__PRS, or
+ * METHOD_NAME__AEI)
* UserFunction - Called for each resource
* Context - Passed to UserFunction
*
@@ -577,12 +624,13 @@ AcpiWalkResources (
if (!DeviceHandle || !UserFunction || !Name ||
(!ACPI_COMPARE_NAME (Name, METHOD_NAME__CRS) &&
- !ACPI_COMPARE_NAME (Name, METHOD_NAME__PRS)))
+ !ACPI_COMPARE_NAME (Name, METHOD_NAME__PRS) &&
+ !ACPI_COMPARE_NAME (Name, METHOD_NAME__AEI)))
{
return_ACPI_STATUS (AE_BAD_PARAMETER);
}
- /* Get the _CRS or _PRS resource list */
+ /* Get the _CRS/_PRS/_AEI resource list */
Buffer.Length = ACPI_ALLOCATE_LOCAL_BUFFER;
Status = AcpiRsGetMethodData (DeviceHandle, Name, &Buffer);
diff --git a/usr/src/uts/intel/io/acpica/tables/tbfadt.c b/usr/src/uts/intel/io/acpica/tables/tbfadt.c
index f8c63408b9..e877320447 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbfadt.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbfadt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,14 +77,15 @@ AcpiTbSetupFadtRegisters (
typedef struct acpi_fadt_info
{
char *Name;
- UINT8 Address64;
- UINT8 Address32;
- UINT8 Length;
+ UINT16 Address64;
+ UINT16 Address32;
+ UINT16 Length;
UINT8 DefaultLength;
UINT8 Type;
} ACPI_FADT_INFO;
+#define ACPI_FADT_OPTIONAL 0
#define ACPI_FADT_REQUIRED 1
#define ACPI_FADT_SEPARATE_LENGTH 2
@@ -102,7 +103,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
ACPI_FADT_OFFSET (Pm1bEventBlock),
ACPI_FADT_OFFSET (Pm1EventLength),
ACPI_PM1_REGISTER_WIDTH * 2, /* Enable + Status register */
- 0},
+ ACPI_FADT_OPTIONAL},
{"Pm1aControlBlock",
ACPI_FADT_OFFSET (XPm1aControlBlock),
@@ -116,7 +117,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
ACPI_FADT_OFFSET (Pm1bControlBlock),
ACPI_FADT_OFFSET (Pm1ControlLength),
ACPI_PM1_REGISTER_WIDTH,
- 0},
+ ACPI_FADT_OPTIONAL},
{"Pm2ControlBlock",
ACPI_FADT_OFFSET (XPm2ControlBlock),
@@ -156,7 +157,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
typedef struct acpi_fadt_pm_info
{
ACPI_GENERIC_ADDRESS *Target;
- UINT8 Source;
+ UINT16 Source;
UINT8 RegisterNum;
} ACPI_FADT_PM_INFO;
@@ -280,8 +281,13 @@ AcpiTbParseFadt (
AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XDsdt,
ACPI_SIG_DSDT, ACPI_TABLE_INDEX_DSDT);
- AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XFacs,
- ACPI_SIG_FACS, ACPI_TABLE_INDEX_FACS);
+ /* If Hardware Reduced flag is set, there is no FACS */
+
+ if (!AcpiGbl_ReducedHardware)
+ {
+ AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XFacs,
+ ACPI_SIG_FACS, ACPI_TABLE_INDEX_FACS);
+ }
}
@@ -309,13 +315,13 @@ AcpiTbCreateLocalFadt (
/*
* Check if the FADT is larger than the largest table that we expect
- * (the ACPI 2.0/3.0 version). If so, truncate the table, and issue
+ * (the ACPI 5.0 version). If so, truncate the table, and issue
* a warning.
*/
if (Length > sizeof (ACPI_TABLE_FADT))
{
ACPI_WARNING ((AE_INFO,
- "FADT (revision %u) is longer than ACPI 2.0 version, "
+ "FADT (revision %u) is longer than ACPI 5.0 version, "
"truncating length %u to %u",
Table->Revision, Length, (UINT32) sizeof (ACPI_TABLE_FADT)));
}
@@ -329,6 +335,14 @@ AcpiTbCreateLocalFadt (
ACPI_MEMCPY (&AcpiGbl_FADT, Table,
ACPI_MIN (Length, sizeof (ACPI_TABLE_FADT)));
+ /* Take a copy of the Hardware Reduced flag */
+
+ AcpiGbl_ReducedHardware = FALSE;
+ if (AcpiGbl_FADT.Flags & ACPI_FADT_HW_REDUCED)
+ {
+ AcpiGbl_ReducedHardware = TRUE;
+ }
+
/* Convert the local copy of the FADT to the common internal format */
AcpiTbConvertFadt ();
@@ -386,10 +400,6 @@ AcpiTbConvertFadt (
UINT32 i;
- /* Update the local FADT table header length */
-
- AcpiGbl_FADT.Header.Length = sizeof (ACPI_TABLE_FADT);
-
/*
* Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary.
* Later code will always use the X 64-bit field.
@@ -423,6 +433,13 @@ AcpiTbConvertFadt (
}
/*
+ * Now we can update the local FADT length to the length of the
+ * current FADT version as defined by the ACPI specification.
+ * Thus, we will have a common FADT internally.
+ */
+ AcpiGbl_FADT.Header.Length = sizeof (ACPI_TABLE_FADT);
+
+ /*
* Expand the ACPI 1.0 32-bit addresses to the ACPI 2.0 64-bit "X"
* generic address structures as necessary. Later code will always use
* the 64-bit address structures.
@@ -530,6 +547,13 @@ AcpiTbValidateFadt (
AcpiGbl_FADT.XDsdt = (UINT64) AcpiGbl_FADT.Dsdt;
}
+ /* If Hardware Reduced flag is set, we are all done */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return;
+ }
+
/* Examine all of the 64-bit extended address fields (X fields) */
for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++)
diff --git a/usr/src/uts/intel/io/acpica/tables/tbfind.c b/usr/src/uts/intel/io/acpica/tables/tbfind.c
index 3b7c420f01..d7e2440b32 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbfind.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbfind.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/tables/tbinstal.c b/usr/src/uts/intel/io/acpica/tables/tbinstal.c
index ab69691a1e..f100ab43f7 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbinstal.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbinstal.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -128,7 +128,6 @@ AcpiTbAddTable (
{
UINT32 i;
ACPI_STATUS Status = AE_OK;
- ACPI_TABLE_HEADER *OverrideTable = NULL;
ACPI_FUNCTION_TRACE (TbAddTable);
@@ -242,26 +241,10 @@ AcpiTbAddTable (
/*
* ACPI Table Override:
* Allow the host to override dynamically loaded tables.
+ * NOTE: the table is fully mapped at this point, and the mapping will
+ * be deleted by TbTableOverride if the table is actually overridden.
*/
- Status = AcpiOsTableOverride (TableDesc->Pointer, &OverrideTable);
- if (ACPI_SUCCESS (Status) && OverrideTable)
- {
- ACPI_INFO ((AE_INFO,
- "%4.4s @ 0x%p Table override, replaced with:",
- TableDesc->Pointer->Signature,
- ACPI_CAST_PTR (void, TableDesc->Address)));
-
- /* We can delete the table that was passed as a parameter */
-
- AcpiTbDeleteTable (TableDesc);
-
- /* Setup descriptor for the new table */
-
- TableDesc->Address = ACPI_PTR_TO_PHYSADDR (OverrideTable);
- TableDesc->Pointer = OverrideTable;
- TableDesc->Length = OverrideTable->Length;
- TableDesc->Flags = ACPI_TABLE_ORIGIN_OVERRIDE;
- }
+ (void) AcpiTbTableOverride (TableDesc->Pointer, TableDesc);
/* Add the table to the global root table list */
@@ -283,6 +266,98 @@ Release:
/*******************************************************************************
*
+ * FUNCTION: AcpiTbTableOverride
+ *
+ * PARAMETERS: TableHeader - Header for the original table
+ * TableDesc - Table descriptor initialized for the
+ * original table. May or may not be mapped.
+ *
+ * RETURN: Pointer to the entire new table. NULL if table not overridden.
+ * If overridden, installs the new table within the input table
+ * descriptor.
+ *
+ * DESCRIPTION: Attempt table override by calling the OSL override functions.
+ * Note: If the table is overridden, then the entire new table
+ * is mapped and returned by this function.
+ *
+ ******************************************************************************/
+
+ACPI_TABLE_HEADER *
+AcpiTbTableOverride (
+ ACPI_TABLE_HEADER *TableHeader,
+ ACPI_TABLE_DESC *TableDesc)
+{
+ ACPI_STATUS Status;
+ ACPI_TABLE_HEADER *NewTable = NULL;
+ ACPI_PHYSICAL_ADDRESS NewAddress = 0;
+ UINT32 NewTableLength = 0;
+ UINT8 NewFlags;
+ char *OverrideType;
+
+
+ /* (1) Attempt logical override (returns a logical address) */
+
+ Status = AcpiOsTableOverride (TableHeader, &NewTable);
+ if (ACPI_SUCCESS (Status) && NewTable)
+ {
+ NewAddress = ACPI_PTR_TO_PHYSADDR (NewTable);
+ NewTableLength = NewTable->Length;
+ NewFlags = ACPI_TABLE_ORIGIN_OVERRIDE;
+ OverrideType = "Logical";
+ goto FinishOverride;
+ }
+
+ /* (2) Attempt physical override (returns a physical address) */
+
+ Status = AcpiOsPhysicalTableOverride (TableHeader,
+ &NewAddress, &NewTableLength);
+ if (ACPI_SUCCESS (Status) && NewAddress && NewTableLength)
+ {
+ /* Map the entire new table */
+
+ NewTable = AcpiOsMapMemory (NewAddress, NewTableLength);
+ if (!NewTable)
+ {
+ ACPI_EXCEPTION ((AE_INFO, AE_NO_MEMORY,
+ "%4.4s %p Attempted physical table override failed",
+ TableHeader->Signature,
+ ACPI_CAST_PTR (void, TableDesc->Address)));
+ return (NULL);
+ }
+
+ OverrideType = "Physical";
+ NewFlags = ACPI_TABLE_ORIGIN_MAPPED;
+ goto FinishOverride;
+ }
+
+ return (NULL); /* There was no override */
+
+
+FinishOverride:
+
+ ACPI_INFO ((AE_INFO,
+ "%4.4s %p %s table override, new table: %p",
+ TableHeader->Signature,
+ ACPI_CAST_PTR (void, TableDesc->Address),
+ OverrideType, NewTable));
+
+ /* We can now unmap/delete the original table (if fully mapped) */
+
+ AcpiTbDeleteTable (TableDesc);
+
+ /* Setup descriptor for the new table */
+
+ TableDesc->Address = NewAddress;
+ TableDesc->Pointer = NewTable;
+ TableDesc->Length = NewTableLength;
+ TableDesc->Flags = NewFlags;
+
+ return (NewTable);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiTbResizeRootTableList
*
* PARAMETERS: None
@@ -435,8 +510,10 @@ AcpiTbDeleteTable (
ACPI_FREE (TableDesc->Pointer);
break;
+ /* Not mapped or allocated, there is nothing we can do */
+
default:
- break;
+ return;
}
TableDesc->Pointer = NULL;
diff --git a/usr/src/uts/intel/io/acpica/tables/tbutils.c b/usr/src/uts/intel/io/acpica/tables/tbutils.c
index bff6540e07..e8b5705e96 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbutils.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbutils.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,7 @@
#define _COMPONENT ACPI_TABLES
ACPI_MODULE_NAME ("tbutils")
+
/* Local prototypes */
static void
@@ -68,6 +69,7 @@ AcpiTbGetRootTableEntry (
UINT32 TableEntrySize);
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiTbInitializeFacs
@@ -88,10 +90,19 @@ AcpiTbInitializeFacs (
ACPI_STATUS Status;
+ /* If Hardware Reduced flag is set, there is no FACS */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ AcpiGbl_FACS = NULL;
+ return (AE_OK);
+ }
+
Status = AcpiGetTableByIndex (ACPI_TABLE_INDEX_FACS,
ACPI_CAST_INDIRECT_PTR (ACPI_TABLE_HEADER, &AcpiGbl_FACS));
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -415,7 +426,7 @@ AcpiTbCopyDsdt (
* RETURN: None
*
* DESCRIPTION: Install an ACPI table into the global data structure. The
- * table override mechanism is implemented here to allow the host
+ * table override mechanism is called to allow the host
* OS to replace any table before it is installed in the root
* table array.
*
@@ -427,11 +438,9 @@ AcpiTbInstallTable (
char *Signature,
UINT32 TableIndex)
{
- UINT8 Flags;
- ACPI_STATUS Status;
- ACPI_TABLE_HEADER *TableToInstall;
- ACPI_TABLE_HEADER *MappedTable;
- ACPI_TABLE_HEADER *OverrideTable = NULL;
+ ACPI_TABLE_HEADER *Table;
+ ACPI_TABLE_HEADER *FinalTable;
+ ACPI_TABLE_DESC *TableDesc;
if (!Address)
@@ -443,70 +452,84 @@ AcpiTbInstallTable (
/* Map just the table header */
- MappedTable = AcpiOsMapMemory (Address, sizeof (ACPI_TABLE_HEADER));
- if (!MappedTable)
+ Table = AcpiOsMapMemory (Address, sizeof (ACPI_TABLE_HEADER));
+ if (!Table)
{
+ ACPI_ERROR ((AE_INFO, "Could not map memory for table [%s] at %p",
+ Signature, ACPI_CAST_PTR (void, Address)));
return;
}
/* If a particular signature is expected (DSDT/FACS), it must match */
if (Signature &&
- !ACPI_COMPARE_NAME (MappedTable->Signature, Signature))
+ !ACPI_COMPARE_NAME (Table->Signature, Signature))
{
ACPI_ERROR ((AE_INFO,
"Invalid signature 0x%X for ACPI table, expected [%s]",
- *ACPI_CAST_PTR (UINT32, MappedTable->Signature), Signature));
+ *ACPI_CAST_PTR (UINT32, Table->Signature), Signature));
goto UnmapAndExit;
}
/*
+ * Initialize the table entry. Set the pointer to NULL, since the
+ * table is not fully mapped at this time.
+ */
+ TableDesc = &AcpiGbl_RootTableList.Tables[TableIndex];
+
+ TableDesc->Address = Address;
+ TableDesc->Pointer = NULL;
+ TableDesc->Length = Table->Length;
+ TableDesc->Flags = ACPI_TABLE_ORIGIN_MAPPED;
+ ACPI_MOVE_32_TO_32 (TableDesc->Signature.Ascii, Table->Signature);
+
+ /*
* ACPI Table Override:
*
* Before we install the table, let the host OS override it with a new
* one if desired. Any table within the RSDT/XSDT can be replaced,
* including the DSDT which is pointed to by the FADT.
+ *
+ * NOTE: If the table is overridden, then FinalTable will contain a
+ * mapped pointer to the full new table. If the table is not overridden,
+ * or if there has been a physical override, then the table will be
+ * fully mapped later (in verify table). In any case, we must
+ * unmap the header that was mapped above.
*/
- Status = AcpiOsTableOverride (MappedTable, &OverrideTable);
- if (ACPI_SUCCESS (Status) && OverrideTable)
+ FinalTable = AcpiTbTableOverride (Table, TableDesc);
+ if (!FinalTable)
{
- ACPI_INFO ((AE_INFO,
- "%4.4s @ 0x%p Table override, replaced with:",
- MappedTable->Signature, ACPI_CAST_PTR (void, Address)));
-
- AcpiGbl_RootTableList.Tables[TableIndex].Pointer = OverrideTable;
- Address = ACPI_PTR_TO_PHYSADDR (OverrideTable);
-
- TableToInstall = OverrideTable;
- Flags = ACPI_TABLE_ORIGIN_OVERRIDE;
- }
- else
- {
- TableToInstall = MappedTable;
- Flags = ACPI_TABLE_ORIGIN_MAPPED;
+ FinalTable = Table; /* There was no override */
}
- /* Initialize the table entry */
-
- AcpiGbl_RootTableList.Tables[TableIndex].Address = Address;
- AcpiGbl_RootTableList.Tables[TableIndex].Length = TableToInstall->Length;
- AcpiGbl_RootTableList.Tables[TableIndex].Flags = Flags;
-
- ACPI_MOVE_32_TO_32 (
- &(AcpiGbl_RootTableList.Tables[TableIndex].Signature),
- TableToInstall->Signature);
+ AcpiTbPrintTableHeader (TableDesc->Address, FinalTable);
- AcpiTbPrintTableHeader (Address, TableToInstall);
+ /* Set the global integer width (based upon revision of the DSDT) */
if (TableIndex == ACPI_TABLE_INDEX_DSDT)
{
- /* Global integer width is based upon revision of the DSDT */
+ AcpiUtSetIntegerWidth (FinalTable->Revision);
+ }
- AcpiUtSetIntegerWidth (TableToInstall->Revision);
+ /*
+ * If we have a physical override during this early loading of the ACPI
+ * tables, unmap the table for now. It will be mapped again later when
+ * it is actually used. This supports very early loading of ACPI tables,
+ * before virtual memory is fully initialized and running within the
+ * host OS. Note: A logical override has the ACPI_TABLE_ORIGIN_OVERRIDE
+ * flag set and will not be deleted below.
+ */
+ if (FinalTable != Table)
+ {
+ AcpiTbDeleteTable (TableDesc);
}
+
UnmapAndExit:
- AcpiOsUnmapMemory (MappedTable, sizeof (ACPI_TABLE_HEADER));
+
+ /* Always unmap the table header that we mapped above */
+
+ AcpiOsUnmapMemory (Table, sizeof (ACPI_TABLE_HEADER));
}
diff --git a/usr/src/uts/intel/io/acpica/tables/tbxface.c b/usr/src/uts/intel/io/acpica/tables/tbxface.c
index 465bf95c64..58dae6935d 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbxface.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbxface.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/tables/tbxfroot.c b/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
index e447108ebe..17e52ef7f4 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utaddress.c b/usr/src/uts/intel/io/acpica/utilities/utaddress.c
new file mode 100644
index 0000000000..0b357b681b
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/utilities/utaddress.c
@@ -0,0 +1,322 @@
+/******************************************************************************
+ *
+ * Module Name: utaddress - OpRegion address range check
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __UTADDRESS_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acnamesp.h"
+
+
+#define _COMPONENT ACPI_UTILITIES
+ ACPI_MODULE_NAME ("utaddress")
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtAddAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - OpRegion start address
+ * Length - OpRegion length
+ * RegionNode - OpRegion namespace node
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Add the Operation Region address range to the global list.
+ * The only supported Space IDs are Memory and I/O. Called when
+ * the OpRegion address/length operands are fully evaluated.
+ *
+ * MUTEX: Locks the namespace
+ *
+ * NOTE: Because this interface is only called when an OpRegion argument
+ * list is evaluated, there cannot be any duplicate RegionNodes.
+ * Duplicate Address/Length values are allowed, however, so that multiple
+ * address conflicts can be detected.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiUtAddAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ ACPI_NAMESPACE_NODE *RegionNode)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (UtAddAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
+ /* Allocate/init a new info block, add it to the appropriate list */
+
+ RangeInfo = ACPI_ALLOCATE (sizeof (ACPI_ADDRESS_RANGE));
+ if (!RangeInfo)
+ {
+ return_ACPI_STATUS (AE_NO_MEMORY);
+ }
+
+ RangeInfo->StartAddress = Address;
+ RangeInfo->EndAddress = (Address + Length - 1);
+ RangeInfo->RegionNode = RegionNode;
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_FREE (RangeInfo);
+ return_ACPI_STATUS (Status);
+ }
+
+ RangeInfo->Next = AcpiGbl_AddressRangeList[SpaceId];
+ AcpiGbl_AddressRangeList[SpaceId] = RangeInfo;
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_NAMES,
+ "\nAdded [%4.4s] address range: 0x%p-0x%p\n",
+ AcpiUtGetNodeName (RangeInfo->RegionNode),
+ ACPI_CAST_PTR (void, Address),
+ ACPI_CAST_PTR (void, RangeInfo->EndAddress)));
+
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtRemoveAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * RegionNode - OpRegion namespace node
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Remove the Operation Region from the global list. The only
+ * supported Space IDs are Memory and I/O. Called when an
+ * OpRegion is deleted.
+ *
+ * MUTEX: Assumes the namespace is locked
+ *
+ ******************************************************************************/
+
+void
+AcpiUtRemoveAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_NAMESPACE_NODE *RegionNode)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_ADDRESS_RANGE *Prev;
+
+
+ ACPI_FUNCTION_TRACE (UtRemoveAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_VOID;
+ }
+
+ /* Get the appropriate list head and check the list */
+
+ RangeInfo = Prev = AcpiGbl_AddressRangeList[SpaceId];
+ while (RangeInfo)
+ {
+ if (RangeInfo->RegionNode == RegionNode)
+ {
+ if (RangeInfo == Prev) /* Found at list head */
+ {
+ AcpiGbl_AddressRangeList[SpaceId] = RangeInfo->Next;
+ }
+ else
+ {
+ Prev->Next = RangeInfo->Next;
+ }
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_NAMES,
+ "\nRemoved [%4.4s] address range: 0x%p-0x%p\n",
+ AcpiUtGetNodeName (RangeInfo->RegionNode),
+ ACPI_CAST_PTR (void, RangeInfo->StartAddress),
+ ACPI_CAST_PTR (void, RangeInfo->EndAddress)));
+
+ ACPI_FREE (RangeInfo);
+ return_VOID;
+ }
+
+ Prev = RangeInfo;
+ RangeInfo = RangeInfo->Next;
+ }
+
+ return_VOID;
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtCheckAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - Start address
+ * Length - Length of address range
+ * Warn - TRUE if warning on overlap desired
+ *
+ * RETURN: Count of the number of conflicts detected. Zero is always
+ * returned for Space IDs other than Memory or I/O.
+ *
+ * DESCRIPTION: Check if the input address range overlaps any of the
+ * ASL operation region address ranges. The only supported
+ * Space IDs are Memory and I/O.
+ *
+ * MUTEX: Assumes the namespace is locked.
+ *
+ ******************************************************************************/
+
+UINT32
+AcpiUtCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ BOOLEAN Warn)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_PHYSICAL_ADDRESS EndAddress;
+ char *Pathname;
+ UINT32 OverlapCount = 0;
+
+
+ ACPI_FUNCTION_TRACE (UtCheckAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_UINT32 (0);
+ }
+
+ RangeInfo = AcpiGbl_AddressRangeList[SpaceId];
+ EndAddress = Address + Length - 1;
+
+ /* Check entire list for all possible conflicts */
+
+ while (RangeInfo)
+ {
+ /*
+ * Check if the requested Address/Length overlaps this AddressRange.
+ * Four cases to consider:
+ *
+ * 1) Input address/length is contained completely in the address range
+ * 2) Input address/length overlaps range at the range start
+ * 3) Input address/length overlaps range at the range end
+ * 4) Input address/length completely encompasses the range
+ */
+ if ((Address <= RangeInfo->EndAddress) &&
+ (EndAddress >= RangeInfo->StartAddress))
+ {
+ /* Found an address range overlap */
+
+ OverlapCount++;
+ if (Warn) /* Optional warning message */
+ {
+ Pathname = AcpiNsGetExternalPathname (RangeInfo->RegionNode);
+
+ ACPI_WARNING ((AE_INFO,
+ "0x%p-0x%p %s conflicts with Region %s %d",
+ ACPI_CAST_PTR (void, Address),
+ ACPI_CAST_PTR (void, EndAddress),
+ AcpiUtGetRegionName (SpaceId), Pathname, OverlapCount));
+ ACPI_FREE (Pathname);
+ }
+ }
+
+ RangeInfo = RangeInfo->Next;
+ }
+
+ return_UINT32 (OverlapCount);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtDeleteAddressLists
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Delete all global address range lists (called during
+ * subsystem shutdown).
+ *
+ ******************************************************************************/
+
+void
+AcpiUtDeleteAddressLists (
+ void)
+{
+ ACPI_ADDRESS_RANGE *Next;
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ int i;
+
+
+ /* Delete all elements in all address range lists */
+
+ for (i = 0; i < ACPI_ADDRESS_RANGE_MAX; i++)
+ {
+ Next = AcpiGbl_AddressRangeList[i];
+
+ while (Next)
+ {
+ RangeInfo = Next;
+ Next = RangeInfo->Next;
+ ACPI_FREE (RangeInfo);
+ }
+
+ AcpiGbl_AddressRangeList[i] = NULL;
+ }
+}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utalloc.c b/usr/src/uts/intel/io/acpica/utilities/utalloc.c
index 4c024f1a30..d2eb11edcb 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utalloc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utalloc.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utcache.c b/usr/src/uts/intel/io/acpica/utilities/utcache.c
index 96d64b05a5..7b3abe2894 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utcache.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utcache.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utclib.c b/usr/src/uts/intel/io/acpica/utilities/utclib.c
index 70d17bb1be..d0153ec5d2 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utclib.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utclib.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utcopy.c b/usr/src/uts/intel/io/acpica/utilities/utcopy.c
index 907f272f7e..ab04a7a7aa 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utcopy.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utcopy.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdebug.c b/usr/src/uts/intel/io/acpica/utilities/utdebug.c
index 24d2eb5cfa..051d47b443 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdebug.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdebug.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdecode.c b/usr/src/uts/intel/io/acpica/utilities/utdecode.c
index feca6c725c..718d73db4e 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdecode.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdecode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -187,7 +187,9 @@ const char *AcpiGbl_RegionTypes[ACPI_NUM_PREDEFINED_REGIONS] =
"SMBus",
"SystemCMOS",
"PCIBARTarget",
- "IPMI"
+ "IPMI",
+ "GeneralPurposeIo",
+ "GenericSerialBus"
};
@@ -565,20 +567,21 @@ AcpiUtGetMutexName (
/* Names for Notify() values, used for debug output */
-static const char *AcpiGbl_NotifyValueNames[] =
+static const char *AcpiGbl_NotifyValueNames[ACPI_NOTIFY_MAX + 1] =
{
- "Bus Check",
- "Device Check",
- "Device Wake",
- "Eject Request",
- "Device Check Light",
- "Frequency Mismatch",
- "Bus Mode Mismatch",
- "Power Fault",
- "Capabilities Check",
- "Device PLD Check",
- "Reserved",
- "System Locality Update"
+ /* 00 */ "Bus Check",
+ /* 01 */ "Device Check",
+ /* 02 */ "Device Wake",
+ /* 03 */ "Eject Request",
+ /* 04 */ "Device Check Light",
+ /* 05 */ "Frequency Mismatch",
+ /* 06 */ "Bus Mode Mismatch",
+ /* 07 */ "Power Fault",
+ /* 08 */ "Capabilities Check",
+ /* 09 */ "Device PLD Check",
+ /* 10 */ "Reserved",
+ /* 11 */ "System Locality Update",
+ /* 12 */ "Shutdown Request"
};
const char *
@@ -594,9 +597,13 @@ AcpiUtGetNotifyName (
{
return ("Reserved");
}
- else /* Greater or equal to 0x80 */
+ else if (NotifyValue <= ACPI_MAX_DEVICE_SPECIFIC_NOTIFY)
{
- return ("**Device Specific**");
+ return ("Device Specific");
+ }
+ else
+ {
+ return ("Hardware Specific");
}
}
#endif
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdelete.c b/usr/src/uts/intel/io/acpica/utilities/utdelete.c
index b87d2f0d59..fb32611121 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdelete.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdelete.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -167,7 +167,7 @@ AcpiUtDeleteInternalObj (
case ACPI_TYPE_PROCESSOR:
case ACPI_TYPE_THERMAL:
- /* Walk the notify handler list for this object */
+ /* Walk the address handler list for this object */
HandlerDesc = Object->CommonNotify.Handler;
while (HandlerDesc)
@@ -235,6 +235,16 @@ AcpiUtDeleteInternalObj (
ACPI_DEBUG_PRINT ((ACPI_DB_ALLOCATIONS,
"***** Region %p\n", Object));
+ /*
+ * Update AddressRange list. However, only permanent regions
+ * are installed in this list. (Not created within a method)
+ */
+ if (!(Object->Region.Node->Flags & ANOBJ_TEMPORARY))
+ {
+ AcpiUtRemoveAddressRange (Object->Region.SpaceId,
+ Object->Region.Node);
+ }
+
SecondDesc = AcpiNsGetSecondaryObject (Object);
if (SecondDesc)
{
@@ -513,6 +523,7 @@ AcpiUtUpdateObjectReference (
ACPI_STATUS Status = AE_OK;
ACPI_GENERIC_STATE *StateList = NULL;
ACPI_OPERAND_OBJECT *NextObject = NULL;
+ ACPI_OPERAND_OBJECT *PrevObject;
ACPI_GENERIC_STATE *State;
UINT32 i;
@@ -542,10 +553,20 @@ AcpiUtUpdateObjectReference (
case ACPI_TYPE_POWER:
case ACPI_TYPE_THERMAL:
- /* Update the notify objects for these types (if present) */
-
- AcpiUtUpdateRefCount (Object->CommonNotify.SystemNotify, Action);
- AcpiUtUpdateRefCount (Object->CommonNotify.DeviceNotify, Action);
+ /*
+ * Update the notify objects for these types (if present)
+ * Two lists, system and device notify handlers.
+ */
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ PrevObject = Object->CommonNotify.NotifyList[i];
+ while (PrevObject)
+ {
+ NextObject = PrevObject->Notify.Next[i];
+ AcpiUtUpdateRefCount (PrevObject, Action);
+ PrevObject = NextObject;
+ }
+ }
break;
case ACPI_TYPE_PACKAGE:
diff --git a/usr/src/uts/intel/io/acpica/utilities/uteval.c b/usr/src/uts/intel/io/acpica/utilities/uteval.c
index 0042f411ff..0bcb894b58 100644
--- a/usr/src/uts/intel/io/acpica/utilities/uteval.c
+++ b/usr/src/uts/intel/io/acpica/utilities/uteval.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utglobal.c b/usr/src/uts/intel/io/acpica/utilities/utglobal.c
index 336f6706c1..f913e32228 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utglobal.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utglobal.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -155,6 +155,7 @@ const ACPI_PREDEFINED_NAMES AcpiGbl_PreDefinedNames[] =
};
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
* Event and Hardware globals
@@ -199,6 +200,7 @@ ACPI_FIXED_EVENT_INFO AcpiGbl_FixedEventInfo[ACPI_NUM_FIXED_EVENTS] =
/* ACPI_EVENT_SLEEP_BUTTON */ {ACPI_BITREG_SLEEP_BUTTON_STATUS, ACPI_BITREG_SLEEP_BUTTON_ENABLE, ACPI_BITMASK_SLEEP_BUTTON_STATUS, ACPI_BITMASK_SLEEP_BUTTON_ENABLE},
/* ACPI_EVENT_RTC */ {ACPI_BITREG_RT_CLOCK_STATUS, ACPI_BITREG_RT_CLOCK_ENABLE, ACPI_BITMASK_RT_CLOCK_STATUS, ACPI_BITMASK_RT_CLOCK_ENABLE},
};
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -233,6 +235,13 @@ AcpiUtInitGlobals (
return_ACPI_STATUS (Status);
}
+ /* Address Range lists */
+
+ for (i = 0; i < ACPI_ADDRESS_RANGE_MAX; i++)
+ {
+ AcpiGbl_AddressRangeList[i] = NULL;
+ }
+
/* Mutex locked flags */
for (i = 0; i < ACPI_NUM_MUTEX; i++)
@@ -262,6 +271,8 @@ AcpiUtInitGlobals (
AcpiFixedEventCount[i] = 0;
}
+#if (!ACPI_REDUCED_HARDWARE)
+
/* GPE support */
AcpiGbl_AllGpesInitialized = FALSE;
@@ -270,15 +281,18 @@ AcpiUtInitGlobals (
AcpiGbl_GpeFadtBlocks[1] = NULL;
AcpiCurrentGpeCount = 0;
+ AcpiGbl_GlobalEventHandler = NULL;
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* Global handlers */
- AcpiGbl_SystemNotify.Handler = NULL;
- AcpiGbl_DeviceNotify.Handler = NULL;
+ AcpiGbl_GlobalNotify[0].Handler = NULL;
+ AcpiGbl_GlobalNotify[1].Handler = NULL;
AcpiGbl_ExceptionHandler = NULL;
AcpiGbl_InitHandler = NULL;
AcpiGbl_TableHandler = NULL;
AcpiGbl_InterfaceHandler = NULL;
- AcpiGbl_GlobalEventHandler = NULL;
/* Global Lock support */
diff --git a/usr/src/uts/intel/io/acpica/utilities/utids.c b/usr/src/uts/intel/io/acpica/utilities/utids.c
index 36dc3afce6..226d8c1631 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utids.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utids.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utinit.c b/usr/src/uts/intel/io/acpica/utilities/utinit.c
index 9e253f01d3..6831595e68 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utinit.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,21 +58,33 @@
static void AcpiUtTerminate (
void);
+#if (!ACPI_REDUCED_HARDWARE)
+static void
+AcpiUtFreeGpeLists (
+ void);
+
+#else
+
+#define AcpiUtFreeGpeLists()
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
- * FUNCTION: AcpiUtTerminate
+ * FUNCTION: AcpiUtFreeGpeLists
*
* PARAMETERS: none
*
* RETURN: none
*
- * DESCRIPTION: Free global memory
+ * DESCRIPTION: Free global GPE lists
*
******************************************************************************/
static void
-AcpiUtTerminate (
+AcpiUtFreeGpeLists (
void)
{
ACPI_GPE_BLOCK_INFO *GpeBlock;
@@ -81,9 +93,6 @@ AcpiUtTerminate (
ACPI_GPE_XRUPT_INFO *NextGpeXruptInfo;
- ACPI_FUNCTION_TRACE (UtTerminate);
-
-
/* Free global GPE blocks and related info structures */
GpeXruptInfo = AcpiGbl_GpeXruptListHead;
@@ -103,7 +112,30 @@ AcpiUtTerminate (
ACPI_FREE (GpeXruptInfo);
GpeXruptInfo = NextGpeXruptInfo;
}
+}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+/******************************************************************************
+ *
+ * FUNCTION: AcpiUtTerminate
+ *
+ * PARAMETERS: none
+ *
+ * RETURN: none
+ *
+ * DESCRIPTION: Free global memory
+ *
+ ******************************************************************************/
+
+static void
+AcpiUtTerminate (
+ void)
+{
+ ACPI_FUNCTION_TRACE (UtTerminate);
+ AcpiUtFreeGpeLists ();
+ AcpiUtDeleteAddressLists ();
return_VOID;
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utlock.c b/usr/src/uts/intel/io/acpica/utilities/utlock.c
index fd2fb2bb1d..61585ad499 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utlock.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utlock.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmath.c b/usr/src/uts/intel/io/acpica/utilities/utmath.c
index 032b72d074..d0fad7cb6c 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmath.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmath.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmisc.c b/usr/src/uts/intel/io/acpica/utilities/utmisc.c
index d20ad7e3a1..062c768e72 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmisc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmisc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -466,6 +466,44 @@ AcpiUtStrlwr (
return;
}
+
+
+/******************************************************************************
+ *
+ * FUNCTION: AcpiUtStricmp
+ *
+ * PARAMETERS: String1 - first string to compare
+ * String2 - second string to compare
+ *
+ * RETURN: int that signifies string relationship. Zero means strings
+ * are equal.
+ *
+ * DESCRIPTION: Implementation of the non-ANSI stricmp function (compare
+ * strings with no case sensitivity)
+ *
+ ******************************************************************************/
+
+int
+AcpiUtStricmp (
+ char *String1,
+ char *String2)
+{
+ int c1;
+ int c2;
+
+
+ do
+ {
+ c1 = tolower ((int) *String1);
+ c2 = tolower ((int) *String2);
+
+ String1++;
+ String2++;
+ }
+ while ((c1 == c2) && (c1));
+
+ return (c1 - c2);
+}
#endif
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmutex.c b/usr/src/uts/intel/io/acpica/utilities/utmutex.c
index f7af803de1..f1cdb01804 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmutex.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmutex.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -335,15 +335,10 @@ ACPI_STATUS
AcpiUtReleaseMutex (
ACPI_MUTEX_HANDLE MutexId)
{
- ACPI_THREAD_ID ThisThreadId;
-
-
ACPI_FUNCTION_NAME (UtReleaseMutex);
-
- ThisThreadId = AcpiOsGetThreadId ();
ACPI_DEBUG_PRINT ((ACPI_DB_MUTEX, "Thread %u releasing Mutex [%s]\n",
- (UINT32) ThisThreadId, AcpiUtGetMutexName (MutexId)));
+ (UINT32) AcpiOsGetThreadId (), AcpiUtGetMutexName (MutexId)));
if (MutexId > ACPI_MAX_MUTEX)
{
@@ -374,7 +369,7 @@ AcpiUtReleaseMutex (
*/
for (i = MutexId; i < ACPI_NUM_MUTEX; i++)
{
- if (AcpiGbl_MutexInfo[i].ThreadId == ThisThreadId)
+ if (AcpiGbl_MutexInfo[i].ThreadId == AcpiOsGetThreadId ())
{
if (i == MutexId)
{
diff --git a/usr/src/uts/intel/io/acpica/utilities/utobject.c b/usr/src/uts/intel/io/acpica/utilities/utobject.c
index ea621c741e..fea140946c 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utobject.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utobject.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utosi.c b/usr/src/uts/intel/io/acpica/utilities/utosi.c
index 053d6adc9b..dcce8a99e1 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utosi.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utosi.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utresrc.c b/usr/src/uts/intel/io/acpica/utilities/utresrc.c
index 468ffd6a1a..c2c135b7d2 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utresrc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utresrc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,7 +46,7 @@
#include "acpi.h"
#include "accommon.h"
-#include "amlresrc.h"
+#include "acresrc.h"
#define _COMPONENT ACPI_UTILITIES
@@ -179,6 +179,154 @@ const char *AcpiGbl_TypDecode[] =
"TypeF"
};
+const char *AcpiGbl_PpcDecode[] =
+{
+ "PullDefault",
+ "PullUp",
+ "PullDown",
+ "PullNone"
+};
+
+const char *AcpiGbl_IorDecode[] =
+{
+ "IoRestrictionNone",
+ "IoRestrictionInputOnly",
+ "IoRestrictionOutputOnly",
+ "IoRestrictionNoneAndPreserve"
+};
+
+const char *AcpiGbl_DtsDecode[] =
+{
+ "Width8bit",
+ "Width16bit",
+ "Width32bit",
+ "Width64bit",
+ "Width128bit",
+ "Width256bit",
+};
+
+/* GPIO connection type */
+
+const char *AcpiGbl_CtDecode[] =
+{
+ "Interrupt",
+ "I/O"
+};
+
+/* Serial bus type */
+
+const char *AcpiGbl_SbtDecode[] =
+{
+ "/* UNKNOWN serial bus type */",
+ "I2C",
+ "SPI",
+ "UART"
+};
+
+/* I2C serial bus access mode */
+
+const char *AcpiGbl_AmDecode[] =
+{
+ "AddressingMode7Bit",
+ "AddressingMode10Bit"
+};
+
+/* I2C serial bus slave mode */
+
+const char *AcpiGbl_SmDecode[] =
+{
+ "ControllerInitiated",
+ "DeviceInitiated"
+};
+
+/* SPI serial bus wire mode */
+
+const char *AcpiGbl_WmDecode[] =
+{
+ "FourWireMode",
+ "ThreeWireMode"
+};
+
+/* SPI serial clock phase */
+
+const char *AcpiGbl_CphDecode[] =
+{
+ "ClockPhaseFirst",
+ "ClockPhaseSecond"
+};
+
+/* SPI serial bus clock polarity */
+
+const char *AcpiGbl_CpoDecode[] =
+{
+ "ClockPolarityLow",
+ "ClockPolarityHigh"
+};
+
+/* SPI serial bus device polarity */
+
+const char *AcpiGbl_DpDecode[] =
+{
+ "PolarityLow",
+ "PolarityHigh"
+};
+
+/* UART serial bus endian */
+
+const char *AcpiGbl_EdDecode[] =
+{
+ "LittleEndian",
+ "BigEndian"
+};
+
+/* UART serial bus bits per byte */
+
+const char *AcpiGbl_BpbDecode[] =
+{
+ "DataBitsFive",
+ "DataBitsSix",
+ "DataBitsSeven",
+ "DataBitsEight",
+ "DataBitsNine",
+ "/* UNKNOWN Bits per byte */",
+ "/* UNKNOWN Bits per byte */",
+ "/* UNKNOWN Bits per byte */"
+};
+
+/* UART serial bus stop bits */
+
+const char *AcpiGbl_SbDecode[] =
+{
+ "StopBitsNone",
+ "StopBitsOne",
+ "StopBitsOnePlusHalf",
+ "StopBitsTwo"
+};
+
+/* UART serial bus flow control */
+
+const char *AcpiGbl_FcDecode[] =
+{
+ "FlowControlNone",
+ "FlowControlHardware",
+ "FlowControlXON",
+ "/* UNKNOWN flow control keyword */"
+};
+
+/* UART serial bus parity type */
+
+const char *AcpiGbl_PtDecode[] =
+{
+ "ParityTypeNone",
+ "ParityTypeEven",
+ "ParityTypeOdd",
+ "ParityTypeMark",
+ "ParityTypeSpace",
+ "/* UNKNOWN parity keyword */",
+ "/* UNKNOWN parity keyword */",
+ "/* UNKNOWN parity keyword */"
+};
+
#endif
@@ -200,7 +348,7 @@ const UINT8 AcpiGbl_ResourceAmlSizes[] =
ACPI_AML_SIZE_SMALL (AML_RESOURCE_END_DEPENDENT),
ACPI_AML_SIZE_SMALL (AML_RESOURCE_IO),
ACPI_AML_SIZE_SMALL (AML_RESOURCE_FIXED_IO),
- 0,
+ ACPI_AML_SIZE_SMALL (AML_RESOURCE_FIXED_DMA),
0,
0,
0,
@@ -220,7 +368,18 @@ const UINT8 AcpiGbl_ResourceAmlSizes[] =
ACPI_AML_SIZE_LARGE (AML_RESOURCE_ADDRESS16),
ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_IRQ),
ACPI_AML_SIZE_LARGE (AML_RESOURCE_ADDRESS64),
- ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_ADDRESS64)
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_ADDRESS64),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_GPIO),
+ 0,
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_COMMON_SERIALBUS),
+};
+
+const UINT8 AcpiGbl_ResourceAmlSerialBusSizes[] =
+{
+ 0,
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_I2C_SERIALBUS),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_SPI_SERIALBUS),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_UART_SERIALBUS),
};
@@ -238,35 +397,49 @@ static const UINT8 AcpiGbl_ResourceTypes[] =
0,
0,
0,
- ACPI_SMALL_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_SMALL_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- 0,
+ ACPI_SMALL_VARIABLE_LENGTH, /* 04 IRQ */
+ ACPI_FIXED_LENGTH, /* 05 DMA */
+ ACPI_SMALL_VARIABLE_LENGTH, /* 06 StartDependentFunctions */
+ ACPI_FIXED_LENGTH, /* 07 EndDependentFunctions */
+ ACPI_FIXED_LENGTH, /* 08 IO */
+ ACPI_FIXED_LENGTH, /* 09 FixedIO */
+ ACPI_FIXED_LENGTH, /* 0A FixedDMA */
0,
0,
0,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
+ ACPI_VARIABLE_LENGTH, /* 0E VendorShort */
+ ACPI_FIXED_LENGTH, /* 0F EndTag */
/* Large descriptors */
0,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
+ ACPI_FIXED_LENGTH, /* 01 Memory24 */
+ ACPI_FIXED_LENGTH, /* 02 GenericRegister */
+ 0,
+ ACPI_VARIABLE_LENGTH, /* 04 VendorLong */
+ ACPI_FIXED_LENGTH, /* 05 Memory32 */
+ ACPI_FIXED_LENGTH, /* 06 Memory32Fixed */
+ ACPI_VARIABLE_LENGTH, /* 07 Dword* address */
+ ACPI_VARIABLE_LENGTH, /* 08 Word* address */
+ ACPI_VARIABLE_LENGTH, /* 09 ExtendedIRQ */
+ ACPI_VARIABLE_LENGTH, /* 0A Qword* address */
+ ACPI_FIXED_LENGTH, /* 0B Extended* address */
+ ACPI_VARIABLE_LENGTH, /* 0C Gpio* */
0,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH
+ ACPI_VARIABLE_LENGTH /* 0E *SerialBus */
};
+/*
+ * For the iASL compiler/disassembler, we don't want any error messages
+ * because the disassembler uses the resource validation code to determine
+ * if Buffer objects are actually Resource Templates.
+ */
+#ifdef ACPI_ASL_COMPILER
+#define ACPI_RESOURCE_ERROR(plist)
+#else
+#define ACPI_RESOURCE_ERROR(plist) ACPI_ERROR(plist)
+#endif
+
/*******************************************************************************
*
@@ -297,6 +470,7 @@ AcpiUtWalkAmlResources (
UINT8 ResourceIndex;
UINT32 Length;
UINT32 Offset = 0;
+ UINT8 EndTag[2] = {0x79, 0x00};
ACPI_FUNCTION_TRACE (UtWalkAmlResources);
@@ -322,6 +496,10 @@ AcpiUtWalkAmlResources (
Status = AcpiUtValidateResource (Aml, &ResourceIndex);
if (ACPI_FAILURE (Status))
{
+ /*
+ * Exit on failure. Cannot continue because the descriptor length
+ * may be bogus also.
+ */
return_ACPI_STATUS (Status);
}
@@ -336,7 +514,7 @@ AcpiUtWalkAmlResources (
Status = UserFunction (Aml, Length, Offset, ResourceIndex, Context);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ return_ACPI_STATUS (Status);
}
}
@@ -371,7 +549,19 @@ AcpiUtWalkAmlResources (
/* Did not find an EndTag descriptor */
- return (AE_AML_NO_RESOURCE_END_TAG);
+ if (UserFunction)
+ {
+ /* Insert an EndTag anyway. AcpiRsGetListLength always leaves room */
+
+ (void) AcpiUtValidateResource (EndTag, &ResourceIndex);
+ Status = UserFunction (EndTag, 2, Offset, ResourceIndex, Context);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ }
+
+ return_ACPI_STATUS (AE_AML_NO_RESOURCE_END_TAG);
}
@@ -396,6 +586,7 @@ AcpiUtValidateResource (
void *Aml,
UINT8 *ReturnIndex)
{
+ AML_RESOURCE *AmlResource;
UINT8 ResourceType;
UINT8 ResourceIndex;
ACPI_RS_LENGTH ResourceLength;
@@ -420,7 +611,7 @@ AcpiUtValidateResource (
if (ResourceType > ACPI_RESOURCE_NAME_LARGE_MAX)
{
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
}
/*
@@ -439,17 +630,18 @@ AcpiUtValidateResource (
((ResourceType & ACPI_RESOURCE_NAME_SMALL_MASK) >> 3);
}
- /* Check validity of the resource type, zero indicates name is invalid */
-
+ /*
+ * Check validity of the resource type, via AcpiGbl_ResourceTypes. Zero
+ * indicates an invalid resource.
+ */
if (!AcpiGbl_ResourceTypes[ResourceIndex])
{
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
}
-
/*
- * 2) Validate the ResourceLength field. This ensures that the length
- * is at least reasonable, and guarantees that it is non-zero.
+ * Validate the ResourceLength field. This ensures that the length
+ * is at least reasonable, and guarantees that it is non-zero.
*/
ResourceLength = AcpiUtGetResourceLength (Aml);
MinimumResourceLength = AcpiGbl_ResourceAmlSizes[ResourceIndex];
@@ -464,7 +656,7 @@ AcpiUtValidateResource (
if (ResourceLength != MinimumResourceLength)
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -474,7 +666,7 @@ AcpiUtValidateResource (
if (ResourceLength < MinimumResourceLength)
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -485,7 +677,7 @@ AcpiUtValidateResource (
if ((ResourceLength > MinimumResourceLength) ||
(ResourceLength < (MinimumResourceLength - 1)))
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -493,7 +685,22 @@ AcpiUtValidateResource (
/* Shouldn't happen (because of validation earlier), but be sure */
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
+ }
+
+ AmlResource = ACPI_CAST_PTR (AML_RESOURCE, Aml);
+ if (ResourceType == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ /* Validate the BusType field */
+
+ if ((AmlResource->CommonSerialBus.Type == 0) ||
+ (AmlResource->CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE))
+ {
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid/unsupported SerialBus resource descriptor: BusType 0x%2.2X",
+ AmlResource->CommonSerialBus.Type));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
}
/* Optionally return the resource table index */
@@ -504,6 +711,22 @@ AcpiUtValidateResource (
}
return (AE_OK);
+
+
+InvalidResource:
+
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ ResourceType));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+
+BadResourceLength:
+
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid resource descriptor length: Type "
+ "0x%2.2X, Length 0x%4.4X, MinLength 0x%4.4X",
+ ResourceType, ResourceLength, MinimumResourceLength));
+ return (AE_AML_BAD_RESOURCE_LENGTH);
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utstate.c b/usr/src/uts/intel/io/acpica/utilities/utstate.c
index bc84915ab0..62a1aefec7 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utstate.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utstate.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/uttrack.c b/usr/src/uts/intel/io/acpica/utilities/uttrack.c
index 6ecec63f31..7d58f5c8a8 100644
--- a/usr/src/uts/intel/io/acpica/utilities/uttrack.c
+++ b/usr/src/uts/intel/io/acpica/utilities/uttrack.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -633,21 +633,21 @@ AcpiUtDumpAllocations (
switch (ACPI_GET_DESCRIPTOR_TYPE (Descriptor))
{
case ACPI_DESC_TYPE_OPERAND:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_OPERAND))
+ if (Element->Size == sizeof (ACPI_OPERAND_OBJECT))
{
DescriptorType = ACPI_DESC_TYPE_OPERAND;
}
break;
case ACPI_DESC_TYPE_PARSER:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_PARSER))
+ if (Element->Size == sizeof (ACPI_PARSE_OBJECT))
{
DescriptorType = ACPI_DESC_TYPE_PARSER;
}
break;
case ACPI_DESC_TYPE_NAMED:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_NAMED))
+ if (Element->Size == sizeof (ACPI_NAMESPACE_NODE))
{
DescriptorType = ACPI_DESC_TYPE_NAMED;
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxface.c b/usr/src/uts/intel/io/acpica/utilities/utxface.c
index 63b081b758..3e858c9b0f 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utxface.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,7 +56,6 @@
#ifndef ACPI_ASL_COMPILER
-
/*******************************************************************************
*
* FUNCTION: AcpiInitializeSubsystem
@@ -162,6 +161,8 @@ AcpiEnableSubsystem (
ACPI_FUNCTION_TRACE (AcpiEnableSubsystem);
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Enable ACPI mode */
if (!(Flags & ACPI_NO_ACPI_ENABLE))
@@ -189,6 +190,8 @@ AcpiEnableSubsystem (
return_ACPI_STATUS (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/*
* Install the default OpRegion handlers. These are installed unless
* other handlers have already been installed via the
@@ -206,6 +209,7 @@ AcpiEnableSubsystem (
}
}
+#if (!ACPI_REDUCED_HARDWARE)
/*
* Initialize ACPI Event handling (Fixed and General Purpose)
*
@@ -248,6 +252,8 @@ AcpiEnableSubsystem (
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
return_ACPI_STATUS (Status);
}
@@ -785,5 +791,47 @@ AcpiInstallInterfaceHandler (
ACPI_EXPORT_SYMBOL (AcpiInstallInterfaceHandler)
-#endif /* !ACPI_ASL_COMPILER */
+/*****************************************************************************
+ *
+ * FUNCTION: AcpiCheckAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - Start address
+ * Length - Length
+ * Warn - TRUE if warning on overlap desired
+ *
+ * RETURN: Count of the number of conflicts detected.
+ *
+ * DESCRIPTION: Check if the input address range overlaps any of the
+ * ASL operation region address ranges.
+ *
+ ****************************************************************************/
+
+UINT32
+AcpiCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ ACPI_SIZE Length,
+ BOOLEAN Warn)
+{
+ UINT32 Overlaps;
+ ACPI_STATUS Status;
+
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return (0);
+ }
+
+ Overlaps = AcpiUtCheckAddressRange (SpaceId, Address,
+ (UINT32) Length, Warn);
+
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return (Overlaps);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiCheckAddressRange)
+
+#endif /* !ACPI_ASL_COMPILER */
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxferror.c b/usr/src/uts/intel/io/acpica/utilities/utxferror.c
index a371308d6b..8ffb274894 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utxferror.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utxferror.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c b/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c
new file mode 100644
index 0000000000..c4784310d3
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c
@@ -0,0 +1,213 @@
+/*******************************************************************************
+ *
+ * Module Name: utxfmutex - external AML mutex access functions
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __UTXFMUTEX_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acnamesp.h"
+
+
+#define _COMPONENT ACPI_UTILITIES
+ ACPI_MODULE_NAME ("utxfmutex")
+
+
+/* Local prototypes */
+
+static ACPI_STATUS
+AcpiUtGetMutexObject (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ ACPI_OPERAND_OBJECT **RetObj);
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtGetMutexObject
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ * RetObj - Where the mutex object is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Get an AML mutex object. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiUtGetMutexObject (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ ACPI_OPERAND_OBJECT **RetObj)
+{
+ ACPI_NAMESPACE_NODE *MutexNode;
+ ACPI_OPERAND_OBJECT *MutexObj;
+ ACPI_STATUS Status;
+
+
+ /* Parameter validation */
+
+ if (!RetObj || (!Handle && !Pathname))
+ {
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Get a the namespace node for the mutex */
+
+ MutexNode = Handle;
+ if (Pathname != NULL)
+ {
+ Status = AcpiGetHandle (Handle, Pathname,
+ ACPI_CAST_PTR (ACPI_HANDLE, &MutexNode));
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+ }
+
+ /* Ensure that we actually have a Mutex object */
+
+ if (!MutexNode ||
+ (MutexNode->Type != ACPI_TYPE_MUTEX))
+ {
+ return (AE_TYPE);
+ }
+
+ /* Get the low-level mutex object */
+
+ MutexObj = AcpiNsGetAttachedObject (MutexNode);
+ if (!MutexObj)
+ {
+ return (AE_NULL_OBJECT);
+ }
+
+ *RetObj = MutexObj;
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiAcquireMutex
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ * Timeout - Max time to wait for the lock (millisec)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Acquire an AML mutex. This is a device driver interface to
+ * AML mutex objects, and allows for transaction locking between
+ * drivers and AML code. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiAcquireMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ UINT16 Timeout)
+{
+ ACPI_STATUS Status;
+ ACPI_OPERAND_OBJECT *MutexObj;
+
+
+ /* Get the low-level mutex associated with Handle:Pathname */
+
+ Status = AcpiUtGetMutexObject (Handle, Pathname, &MutexObj);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Acquire the OS mutex */
+
+ Status = AcpiOsAcquireMutex (MutexObj->Mutex.OsMutex, Timeout);
+ return (Status);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiReleaseMutex
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Release an AML mutex. This is a device driver interface to
+ * AML mutex objects, and allows for transaction locking between
+ * drivers and AML code. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiReleaseMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname)
+{
+ ACPI_STATUS Status;
+ ACPI_OPERAND_OBJECT *MutexObj;
+
+
+ /* Get the low-level mutex associated with Handle:Pathname */
+
+ Status = AcpiUtGetMutexObject (Handle, Pathname, &MutexObj);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Release the OS mutex */
+
+ AcpiOsReleaseMutex (MutexObj->Mutex.OsMutex);
+ return (AE_OK);
+}
diff --git a/usr/src/uts/intel/io/dktp/dcdev/dadk.c b/usr/src/uts/intel/io/dktp/dcdev/dadk.c
index 35f97482b8..f74a0d4137 100644
--- a/usr/src/uts/intel/io/dktp/dcdev/dadk.c
+++ b/usr/src/uts/intel/io/dktp/dcdev/dadk.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
/*
@@ -170,6 +171,8 @@ static int dadk_debug = DGEOM;
#endif /* DADK_DEBUG */
+#define ONE_MIN ((longlong_t)60 * NANOSEC)
+
static int dadk_check_media_time = 3000000; /* 3 Second State Check */
static int dadk_dk_maxphys = 0x80000;
@@ -1376,6 +1379,47 @@ static struct dadkio_derr dadk_errtab[] = {
{COMMAND_DONE_ERROR, GDA_FATAL}, /* 23 DERR_RESV */
};
+/*
+ * A bad disk can result in a large number of errors spewed to the log.
+ * This can in turn lead to /var/adm/messages filling up the file system on
+ * a machine with a small root or /var file system.
+ *
+ * Instead of logging every error, if we're seeing repeated errors on a disk
+ * only log them periodically.
+ */
+static void
+dadk_logerr(struct dadk *dadkp, struct cmpkt *pktp, char *label,
+ int severity, daddr_t blkno, daddr_t err_blkno,
+ char **cmdvec, char **senvec)
+{
+ hrtime_t now;
+
+ now = gethrtime();
+ if ((now - dadkp->dad_last_log) < ONE_MIN) {
+ atomic_add_32(&dadkp->dad_err_cnt, 1);
+ return;
+ }
+
+ if (dadkp->dad_err_cnt > 0) {
+ dev_info_t *dev = dadkp->dad_sd->sd_dev;
+ char name[256], buf[256];
+
+ if (dev)
+ (void) snprintf(name, sizeof (name), "%s (%s%d)",
+ ddi_pathname(dev, buf), label,
+ ddi_get_instance(dev));
+ else
+ (void) strlcpy(name, label, sizeof (name));
+ cmn_err(CE_WARN, "%s: %d additional unlogged errors\n",
+ name, dadkp->dad_err_cnt);
+ }
+
+ gda_errmsg(dadkp->dad_sd, pktp, label, severity, blkno, err_blkno,
+ cmdvec, senvec);
+ dadkp->dad_err_cnt = 0;
+ dadkp->dad_last_log = now;
+}
+
static int
dadk_chkerr(struct cmpkt *pktp)
{
@@ -1462,7 +1506,7 @@ dadk_chkerr(struct cmpkt *pktp)
return (COMMAND_DONE);
}
if (pktp->cp_passthru == NULL) {
- gda_errmsg(dadkp->dad_sd, pktp, dadk_name,
+ dadk_logerr(dadkp, pktp, dadk_name,
dadk_errtab[scb].d_severity, pktp->cp_srtsec,
err_blkno, dadk_cmds, dadk_sense);
}
@@ -1519,7 +1563,7 @@ dadk_recorderr(struct cmpkt *pktp, struct dadkio_rwcmd *rwcmdp)
if (rwcmdp->flags & DADKIO_FLAG_SILENT)
return;
- gda_errmsg(dadkp->dad_sd, pktp, dadk_name, dadk_errtab[scb].d_severity,
+ dadk_logerr(dadkp, pktp, dadk_name, dadk_errtab[scb].d_severity,
rwcmdp->blkaddr, rwcmdp->status.failed_blk,
dadk_cmds, dadk_sense);
}
diff --git a/usr/src/uts/intel/io/ipmi/ipmivars.h b/usr/src/uts/intel/io/ipmi/ipmivars.h
index 7fd819cd3d..f547d6f043 100644
--- a/usr/src/uts/intel/io/ipmi/ipmivars.h
+++ b/usr/src/uts/intel/io/ipmi/ipmivars.h
@@ -78,6 +78,7 @@ struct ipmi_request {
#define SMIC_CTL_STS 1
#define SMIC_FLAGS 2
+struct ipmi_softc;
#define IPMI_BUSY 0x1
#define IPMI_CLOSING 0x2
diff --git a/usr/src/uts/intel/io/vmxnet/buildNumber.h b/usr/src/uts/intel/io/vmxnet/buildNumber.h
new file mode 100644
index 0000000000..97f18a3cbc
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/buildNumber.h
@@ -0,0 +1,12 @@
+#define BUILD_NUMBER \
+ "build-425873"
+#define BUILD_NUMBER_NUMERIC \
+ 425873
+#define BUILD_NUMBER_NUMERIC_STRING \
+ "425873"
+#define PRODUCT_BUILD_NUMBER \
+ "product-build-6261"
+#define PRODUCT_BUILD_NUMBER_NUMERIC \
+ 6261
+#define PRODUCT_BUILD_NUMBER_NUMERIC_STRING \
+ "6261"
diff --git a/usr/src/uts/intel/io/vmxnet/includeCheck.h b/usr/src/uts/intel/io/vmxnet/includeCheck.h
new file mode 100644
index 0000000000..c414d6daf5
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/includeCheck.h
@@ -0,0 +1,159 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of VMware Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission of VMware Inc.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * includeCheck.h --
+ *
+ * Restrict include file use.
+ *
+ * In every .h file, define one or more of these
+ *
+ * INCLUDE_ALLOW_VMX
+ * INCLUDE_ALLOW_USERLEVEL
+ * INCLUDE_ALLOW_VMCORE
+ * INCLUDE_ALLOW_MODULE
+ * INCLUDE_ALLOW_VMKERNEL
+ * INCLUDE_ALLOW_DISTRIBUTE
+ * INCLUDE_ALLOW_VMK_MODULE
+ * INCLUDE_ALLOW_VMKDRIVERS
+ * INCLUDE_ALLOW_VMIROM
+ *
+ * Then include this file.
+ *
+ * Any file that has INCLUDE_ALLOW_DISTRIBUTE defined will potentially
+ * be distributed in source form along with GPLed code. Ensure
+ * that this is acceptable.
+ */
+
+
+/*
+ * Declare a VMCORE-only variable to help classify object
+ * files. The variable goes in the common block and does
+ * not create multiple definition link-time conflicts.
+ */
+
+#if defined VMCORE && defined VMX86_DEVEL && defined VMX86_DEBUG && \
+ defined linux && !defined MODULE && \
+ !defined COMPILED_WITH_VMCORE
+#define COMPILED_WITH_VMCORE compiled_with_vmcore
+#ifdef ASM
+ .comm compiled_with_vmcore, 0
+#else
+ asm(".comm compiled_with_vmcore, 0");
+#endif /* ASM */
+#endif
+
+
+#if defined VMCORE && \
+ !(defined VMX86_VMX || defined VMM || \
+ defined MONITOR_APP || defined VMMON)
+#error "Makefile problem: VMCORE without VMX86_VMX or \
+ VMM or MONITOR_APP or MODULE."
+#endif
+
+#if defined VMCORE && !defined INCLUDE_ALLOW_VMCORE
+#error "The surrounding include file is not allowed in vmcore."
+#endif
+#undef INCLUDE_ALLOW_VMCORE
+
+#if defined VMX86_VMX && !defined VMCORE && \
+ !(defined INCLUDE_ALLOW_VMX || defined INCLUDE_ALLOW_USERLEVEL)
+#error "The surrounding include file is not allowed in the VMX."
+#endif
+#undef INCLUDE_ALLOW_VMX
+
+#if defined USERLEVEL && !defined VMX86_VMX && !defined VMCORE && \
+ !defined INCLUDE_ALLOW_USERLEVEL
+#error "The surrounding include file is not allowed at userlevel."
+#endif
+#undef INCLUDE_ALLOW_USERLEVEL
+
+#if defined MODULE && !defined VMKERNEL_MODULE && \
+ !defined VMMON && !defined INCLUDE_ALLOW_MODULE
+#error "The surrounding include file is not allowed in driver modules."
+#endif
+#undef INCLUDE_ALLOW_MODULE
+
+#if defined VMMON && !defined INCLUDE_ALLOW_VMMON
+#error "The surrounding include file is not allowed in vmmon."
+#endif
+#undef INCLUDE_ALLOW_VMMON
+
+#if defined VMKERNEL && !defined INCLUDE_ALLOW_VMKERNEL
+#error "The surrounding include file is not allowed in the vmkernel."
+#endif
+#undef INCLUDE_ALLOW_VMKERNEL
+
+#if defined GPLED_CODE && !defined INCLUDE_ALLOW_DISTRIBUTE
+#error "The surrounding include file is not allowed in GPL code."
+#endif
+#undef INCLUDE_ALLOW_DISTRIBUTE
+
+#if defined VMKERNEL_MODULE && !defined VMKERNEL && \
+ !defined INCLUDE_ALLOW_VMK_MODULE && !defined INCLUDE_ALLOW_VMKDRIVERS
+#error "The surrounding include file is not allowed in vmkernel modules."
+#endif
+#undef INCLUDE_ALLOW_VMK_MODULE
+#undef INCLUDE_ALLOW_VMKDRIVERS
+
+#if defined VMIROM && ! defined INCLUDE_ALLOW_VMIROM
+#error "The surrounding include file is not allowed in vmirom."
+#endif
+#undef INCLUDE_ALLOW_VMIROM
diff --git a/usr/src/uts/intel/io/vmxnet/net.h b/usr/src/uts/intel/io/vmxnet/net.h
new file mode 100644
index 0000000000..41b6eb1d14
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/net.h
@@ -0,0 +1,220 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/************************************************************
+ *
+ * net.h
+ *
+ * This file should contain all network global defines.
+ * No vlance/vmxnet/vnet/vmknet specific stuff should be
+ * put here only defines used/usable by all network code.
+ * --gustav
+ *
+ ************************************************************/
+
+#ifndef VMWARE_DEVICES_NET_H
+#define VMWARE_DEVICES_NET_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMCORE
+
+#include "includeCheck.h"
+#include "vm_device_version.h"
+
+#ifdef VMCORE
+#include "config.h"
+#include "str.h"
+#include "strutil.h"
+#endif
+
+#define ETHERNET_MTU 1518
+#define ETH_MIN_FRAME_LEN 60
+
+#ifndef ETHER_ADDR_LEN
+#define ETHER_ADDR_LEN 6 /* length of MAC address */
+#endif
+#define ETH_HEADER_LEN 14 /* length of Ethernet header */
+#define IP_ADDR_LEN 4 /* length of IPv4 address */
+#define IP_HEADER_LEN 20 /* minimum length of IPv4 header */
+
+#define ETHER_MAX_QUEUED_PACKET 1600
+
+
+/*
+ * State's that a NIC can be in currently we only use this
+ * in VLance but if we implement/emulate new adapters that
+ * we also want to be able to morph a new corresponding
+ * state should be added.
+ */
+
+#define LANCE_CHIP 0x2934
+#define VMXNET_CHIP 0x4392
+
+/*
+ * Size of reserved IO space needed by the LANCE adapter and
+ * the VMXNET adapter. If you add more ports to Vmxnet than
+ * there is reserved space you must bump VMXNET_CHIP_IO_RESV_SIZE.
+ * The sizes must be powers of 2.
+ */
+
+#define LANCE_CHIP_IO_RESV_SIZE 0x20
+#define VMXNET_CHIP_IO_RESV_SIZE 0x40
+
+#define MORPH_PORT_SIZE 4
+
+#ifdef VMCORE
+typedef struct Net_AdapterCount {
+ uint8 vlance;
+ uint8 vmxnet2;
+ uint8 vmxnet3;
+ uint8 e1000;
+ uint8 e1000e;
+} Net_AdapterCount;
+#endif
+
+#ifdef USERLEVEL
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * Net_AddAddrToLADRF --
+ *
+ * Given a MAC address, sets the corresponding bit in the LANCE style
+ * Logical Address Filter 'ladrf'.
+ * The caller should have initialized the ladrf to all 0's, as this
+ * function only ORs on a bit in the array.
+ * 'addr' is presumed to be ETHER_ADDR_LEN in size;
+ * 'ladrf' is presumed to point to a 64-bit vector.
+ *
+ * Derived from a long history of derivations, originally inspired by
+ * sample code from the AMD "Network Products: Ethernet Controllers 1998
+ * Data Book, Book 2", pages 1-53..1-55.
+ *
+ * Returns:
+ * None.
+ *
+ * Side effects:
+ * Updates 'ladrf'.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static INLINE void
+Net_AddAddrToLadrf(const uint8 *addr, // IN: pointer to MAC address
+ uint8 *ladrf) // IN/OUT: pointer to ladrf
+{
+#define CRC_POLYNOMIAL_BE 0x04c11db7UL /* Ethernet CRC, big endian */
+
+ uint16 hashcode;
+ int32 crc = 0xffffffff; /* init CRC for each address */
+ int32 j;
+ int32 bit;
+ int32 byte;
+
+ ASSERT(addr);
+ ASSERT(ladrf);
+
+ for (byte = 0; byte < ETHER_ADDR_LEN; byte++) { /* for each address byte */
+ /* process each address bit */
+ for (bit = *addr++, j = 0;
+ j < 8;
+ j++, bit >>= 1) {
+ crc = (crc << 1) ^ ((((crc < 0 ? 1 : 0) ^ bit) & 0x01) ?
+ CRC_POLYNOMIAL_BE : 0);
+ }
+ }
+ hashcode = (crc & 1); /* hashcode is 6 LSb of CRC ... */
+ for (j = 0; j < 5; j++) { /* ... in reverse order. */
+ hashcode = (hashcode << 1) | ((crc>>=1) & 1);
+ }
+
+ ladrf[hashcode >> 3] |= 1 << (hashcode & 0x07);
+}
+#endif // USERLEVEL
+
+#ifdef VMCORE
+/*
+ *----------------------------------------------------------------------
+ *
+ * Net_GetNumAdapters --
+ *
+ * Returns the number of each type of network adapter configured in this
+ * VM.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE void
+Net_GetNumAdapters(Net_AdapterCount *counts)
+{
+ uint32 i;
+
+ counts->vlance = 0;
+ counts->vmxnet2 = 0;
+ counts->vmxnet3 = 0;
+ counts->e1000 = 0;
+ counts->e1000e = 0;
+
+ for (i = 0; i < MAX_ETHERNET_CARDS; i++) {
+ char* adapterStr;
+
+ if (!Config_GetBool(FALSE, "ethernet%d.present", i)) {
+ continue;
+ }
+ adapterStr = Config_GetString("vlance", "ethernet%d.virtualDev", i);
+ if (Str_Strcasecmp(adapterStr, "vmxnet3") == 0) {
+ counts->vmxnet3++;
+ } else if (Str_Strcasecmp(adapterStr, "vlance") == 0) {
+ counts->vlance++;
+ } else if (Str_Strcasecmp(adapterStr, "vmxnet") == 0) {
+ counts->vmxnet2++;
+ } else if (Str_Strcasecmp(adapterStr, "e1000") == 0) {
+ counts->e1000++;
+ } else if (Str_Strcasecmp(adapterStr, "e1000e") == 0) {
+ counts->e1000e++;
+ } else {
+ LOG_ONCE(("%s: unknown adapter: %s\n", __FUNCTION__, adapterStr));
+ }
+ free(adapterStr);
+ }
+}
+
+#endif // VMCORE
+
+#endif // VMWARE_DEVICES_NET_H
diff --git a/usr/src/uts/intel/io/vmxnet/net_sg.h b/usr/src/uts/intel/io/vmxnet/net_sg.h
new file mode 100644
index 0000000000..f6c30fb2b5
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/net_sg.h
@@ -0,0 +1,84 @@
+/*********************************************************
+ * Copyright (C) 2000 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * net_sg.h --
+ *
+ * Network packet scatter gather structure.
+ */
+
+
+#ifndef _NET_SG_H
+#define _NET_SG_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#define NET_SG_DEFAULT_LENGTH 16
+
+/*
+ * A single scatter-gather element for a network packet.
+ * The address is split into low and high to save space.
+ * If we make it 64 bits then Windows pads things out such that
+ * we lose a lot of space for each scatter gather array.
+ * This adds up when you have embedded scatter-gather
+ * arrays for transmit and receive ring buffers.
+ */
+typedef struct NetSG_Elem {
+ uint32 addrLow;
+ uint16 addrHi;
+ uint16 length;
+} NetSG_Elem;
+
+typedef enum NetSG_AddrType {
+ NET_SG_MACH_ADDR,
+ NET_SG_PHYS_ADDR,
+ NET_SG_VIRT_ADDR,
+} NetSG_AddrType;
+
+typedef struct NetSG_Array {
+ uint16 addrType;
+ uint16 length;
+ NetSG_Elem sg[NET_SG_DEFAULT_LENGTH];
+} NetSG_Array;
+
+#define NET_SG_SIZE(len) (sizeof(NetSG_Array) + (len - NET_SG_DEFAULT_LENGTH) * sizeof(NetSG_Elem))
+
+#define NET_SG_MAKE_PA(elem) (PA)QWORD(elem.addrHi, elem.addrLow)
+#define NET_SG_MAKE_PTR(elem) (char *)(uintptr_t)QWORD(elem.addrHi, elem.addrLow)
+
+#endif
diff --git a/usr/src/uts/intel/io/vmxnet/vm_basic_types.h b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h
new file mode 100644
index 0000000000..adeac1b708
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h
@@ -0,0 +1,1037 @@
+/*********************************************************
+ * Copyright (C) 1998-2009 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of VMware Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission of VMware Inc.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ *
+ * vm_basic_types.h --
+ *
+ * basic data types.
+ */
+
+
+#ifndef _VM_BASIC_TYPES_H_
+#define _VM_BASIC_TYPES_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMMON
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_VMKDRIVERS
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_DISTRIBUTE
+#define INCLUDE_ALLOW_VMCORE
+#define INCLUDE_ALLOW_VMIROM
+#include "includeCheck.h"
+
+/* STRICT ANSI means the Xserver build and X defines Bool differently. */
+#if !defined(_XTYPEDEF_BOOL) && \
+ (!defined(__STRICT_ANSI__) || defined(__FreeBSD__) || defined(__MINGW32__))
+#define _XTYPEDEF_BOOL
+typedef char Bool;
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define IsBool(x) (((x) & ~1) == 0)
+#define IsBool2(x, y) ((((x) | (y)) & ~1) == 0)
+
+/*
+ * Macros __i386__ and __ia64 are intrinsically defined by GCC
+ */
+#if defined _MSC_VER && defined _M_X64
+# define __x86_64__
+#elif defined _MSC_VER && defined _M_IX86
+# define __i386__
+#endif
+
+#ifdef __i386__
+#define VM_I386
+#endif
+
+#ifdef __x86_64__
+#define VM_X86_64
+#define VM_I386
+#define vm_x86_64 (1)
+#else
+#define vm_x86_64 (0)
+#endif
+
+
+#ifdef _MSC_VER
+
+#pragma warning (3 :4505) // unreferenced local function
+#pragma warning (disable :4018) // signed/unsigned mismatch
+#pragma warning (disable :4761) // integral size mismatch in argument; conversion supplied
+#pragma warning (disable :4305) // truncation from 'const int' to 'short'
+#pragma warning (disable :4244) // conversion from 'unsigned short' to 'unsigned char'
+#pragma warning (disable :4267) // truncation of 'size_t'
+#pragma warning (disable :4146) // unary minus operator applied to unsigned type, result still unsigned
+#pragma warning (disable :4142) // benign redefinition of type
+
+#endif
+
+#if defined(__APPLE__) || defined(HAVE_STDINT_H)
+
+/*
+ * TODO: This is a C99 standard header. We should be able to test for
+ * #if __STDC_VERSION__ >= 199901L, but that breaks the Netware build
+ * (which doesn't have stdint.h).
+ */
+
+#include <stdint.h>
+
+typedef uint64_t uint64;
+typedef int64_t int64;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint8_t uint8;
+typedef int8_t int8;
+
+/*
+ * Note: C does not specify whether char is signed or unsigned, and
+ * both gcc and msvc implement processor-specific signedness. With
+ * three types:
+ * typeof(char) != typeof(signed char) != typeof(unsigned char)
+ *
+ * Be careful here, because gcc (4.0.1 and others) likes to warn about
+ * conversions between signed char * and char *.
+ */
+
+#else /* !HAVE_STDINT_H */
+
+#ifdef _MSC_VER
+
+typedef unsigned __int64 uint64;
+typedef signed __int64 int64;
+
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+/* The Xserver source compiles with -ansi -pendantic */
+# if !defined(__STRICT_ANSI__) || defined(__FreeBSD__)
+# if defined(VM_X86_64)
+typedef unsigned long uint64;
+typedef long int64;
+# else
+typedef unsigned long long uint64;
+typedef long long int64;
+# endif
+# endif
+#else
+# error - Need compiler define for int64/uint64
+#endif /* _MSC_VER */
+
+typedef unsigned int uint32;
+typedef unsigned short uint16;
+typedef unsigned char uint8;
+
+typedef int int32;
+typedef short int16;
+typedef signed char int8;
+
+#endif /* HAVE_STDINT_H */
+
+/*
+ * FreeBSD (for the tools build) unconditionally defines these in
+ * sys/inttypes.h so don't redefine them if this file has already
+ * been included. [greg]
+ *
+ * This applies to Solaris as well.
+ */
+
+/*
+ * Before trying to do the includes based on OS defines, see if we can use
+ * feature-based defines to get as much functionality as possible
+ */
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_INTTYPES_H
+#include <sys/inttypes.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/param.h> /* For __FreeBSD_version */
+#endif
+
+#if !defined(USING_AUTOCONF)
+# if defined(__FreeBSD__) || defined(sun)
+# ifdef KLD_MODULE
+# include <sys/types.h>
+# else
+# if __FreeBSD_version >= 500043
+# if !defined(VMKERNEL)
+# include <inttypes.h>
+# endif
+# include <sys/types.h>
+# else
+# include <sys/inttypes.h>
+# endif
+# endif
+# elif defined __APPLE__
+# if KERNEL
+# include <sys/unistd.h>
+# include <sys/types.h> /* mostly for size_t */
+# include <stdint.h>
+# else
+# include <unistd.h>
+# include <inttypes.h>
+# include <stdlib.h>
+# include <stdint.h>
+# endif
+# else
+# if !defined(__intptr_t_defined) && !defined(intptr_t)
+# ifdef VM_I386
+# define __intptr_t_defined
+# ifdef VM_X86_64
+typedef int64 intptr_t;
+# else
+typedef int32 intptr_t;
+# endif
+# elif defined(__arm__)
+typedef int32 intptr_t;
+# endif
+# endif
+
+# ifndef _STDINT_H
+# ifdef VM_I386
+# ifdef VM_X86_64
+typedef uint64 uintptr_t;
+# else
+typedef uint32 uintptr_t;
+# endif
+# elif defined(__arm__)
+typedef uint32 uintptr_t;
+# endif
+# endif
+# endif
+#endif
+
+
+/*
+ * Time
+ * XXX These should be cleaned up. -- edward
+ */
+
+typedef int64 VmTimeType; /* Time in microseconds */
+typedef int64 VmTimeRealClock; /* Real clock kept in microseconds */
+typedef int64 VmTimeVirtualClock; /* Virtual Clock kept in CPU cycles */
+
+/*
+ * Printf format specifiers for size_t and 64-bit number.
+ * Use them like this:
+ * printf("%"FMT64"d\n", big);
+ *
+ * FMTH is for handles/fds.
+ */
+
+#ifdef _MSC_VER
+ #define FMT64 "I64"
+ #ifdef VM_X86_64
+ #define FMTSZ "I64"
+ #define FMTPD "I64"
+ #define FMTH "I64"
+ #else
+ #define FMTSZ "I"
+ #define FMTPD "I"
+ #define FMTH "I"
+ #endif
+#elif defined __APPLE__
+ /* Mac OS hosts use the same formatters for 32- and 64-bit. */
+ #define FMT64 "ll"
+ #if KERNEL
+ #define FMTSZ "l"
+ #else
+ #define FMTSZ "z"
+ #endif
+ #define FMTPD "l"
+ #define FMTH ""
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+ #define FMTH ""
+ #if defined(N_PLAT_NLM) || defined(sun) || \
+ (defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) < 5))
+ /*
+ * Why (__FreeBSD__ + 0)? See bug 141008.
+ * Yes, we really need to test both (__FreeBSD__ + 0) and
+ * ((__FreeBSD__ + 0) < 5). No, we can't remove "+ 0" from
+ * ((__FreeBSD__ + 0) < 5).
+ */
+ #ifdef VM_X86_64
+ #define FMTSZ "l"
+ #define FMTPD "l"
+ #else
+ #define FMTSZ ""
+ #define FMTPD ""
+ #endif
+ #elif defined(__linux__) \
+ || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \
+ || (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L) \
+ || (defined(_POSIX2_VERSION) && _POSIX2_VERSION >= 200112L)
+ /* BSD, Linux */
+ #define FMTSZ "z"
+
+ #if defined(VM_X86_64)
+ #define FMTPD "l"
+ #else
+ #define FMTPD ""
+ #endif
+ #else
+ /* Systems with a pre-C99 libc */
+ #define FMTSZ "Z"
+ #ifdef VM_X86_64
+ #define FMTPD "l"
+ #else
+ #define FMTPD ""
+ #endif
+ #endif
+ #ifdef VM_X86_64
+ #define FMT64 "l"
+ #elif defined(sun) || defined(__FreeBSD__)
+ #define FMT64 "ll"
+ #else
+ #define FMT64 "L"
+ #endif
+#else
+ #error - Need compiler define for FMT64 and FMTSZ
+#endif
+
+/*
+ * Suffix for 64-bit constants. Use it like this:
+ * CONST64(0x7fffffffffffffff) for signed or
+ * CONST64U(0x7fffffffffffffff) for unsigned.
+ *
+ * 2004.08.30(thutt):
+ * The vmcore/asm64/gen* programs are compiled as 32-bit
+ * applications, but must handle 64 bit constants. If the
+ * 64-bit-constant defining macros are already defined, the
+ * definition will not be overwritten.
+ */
+
+#if !defined(CONST64) || !defined(CONST64U)
+#ifdef _MSC_VER
+#define CONST64(c) c##I64
+#define CONST64U(c) c##uI64
+#elif defined __APPLE__
+#define CONST64(c) c##LL
+#define CONST64U(c) c##uLL
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+#ifdef VM_X86_64
+#define CONST64(c) c##L
+#define CONST64U(c) c##uL
+#else
+#define CONST64(c) c##LL
+#define CONST64U(c) c##uLL
+#endif
+#else
+#error - Need compiler define for CONST64
+#endif
+#endif
+
+/*
+ * Use CONST3264/CONST3264U if you want a constant to be
+ * treated as a 32-bit number on 32-bit compiles and
+ * a 64-bit number on 64-bit compiles. Useful in the case
+ * of shifts, like (CONST3264U(1) << x), where x could be
+ * more than 31 on a 64-bit compile.
+ */
+
+#ifdef VM_X86_64
+ #define CONST3264(a) CONST64(a)
+ #define CONST3264U(a) CONST64U(a)
+#else
+ #define CONST3264(a) (a)
+ #define CONST3264U(a) (a)
+#endif
+
+#define MIN_INT8 ((int8)0x80)
+#define MAX_INT8 ((int8)0x7f)
+
+#define MIN_UINT8 ((uint8)0)
+#define MAX_UINT8 ((uint8)0xff)
+
+#define MIN_INT16 ((int16)0x8000)
+#define MAX_INT16 ((int16)0x7fff)
+
+#define MIN_UINT16 ((uint16)0)
+#define MAX_UINT16 ((uint16)0xffff)
+
+#define MIN_INT32 ((int32)0x80000000)
+#define MAX_INT32 ((int32)0x7fffffff)
+
+#define MIN_UINT32 ((uint32)0)
+#define MAX_UINT32 ((uint32)0xffffffff)
+
+#define MIN_INT64 (CONST64(0x8000000000000000))
+#define MAX_INT64 (CONST64(0x7fffffffffffffff))
+
+#define MIN_UINT64 (CONST64U(0))
+#define MAX_UINT64 (CONST64U(0xffffffffffffffff))
+
+typedef uint8 *TCA; /* Pointer into TC (usually). */
+
+/*
+ * Type big enough to hold an integer between 0..100
+ */
+typedef uint8 Percent;
+#define AsPercent(v) ((Percent)(v))
+#define CHOOSE_PERCENT AsPercent(101)
+
+
+typedef uintptr_t VA;
+typedef uintptr_t VPN;
+
+typedef uint64 PA;
+typedef uint32 PPN;
+
+typedef uint64 PhysMemOff;
+typedef uint64 PhysMemSize;
+
+/* The Xserver source compiles with -ansi -pendantic */
+#ifndef __STRICT_ANSI__
+typedef uint64 BA;
+#endif
+typedef uint32 BPN;
+typedef uint32 PageNum;
+typedef unsigned MemHandle;
+typedef int32 World_ID;
+
+/* !! do not alter the definition of INVALID_WORLD_ID without ensuring
+ * that the values defined in both bora/public/vm_basic_types.h and
+ * lib/vprobe/vm_basic_types.h are the same. Additionally, the definition
+ * of VMK_INVALID_WORLD_ID in vmkapi_world.h also must be defined with
+ * the same value
+ */
+
+#define INVALID_WORLD_ID ((World_ID)0)
+
+typedef World_ID User_CartelID;
+#define INVALID_CARTEL_ID INVALID_WORLD_ID
+
+typedef User_CartelID User_SessionID;
+#define INVALID_SESSION_ID INVALID_CARTEL_ID
+
+typedef User_CartelID User_CartelGroupID;
+#define INVALID_CARTELGROUP_ID INVALID_CARTEL_ID
+
+typedef uint32 Worldlet_ID;
+#define INVALID_WORLDLET_ID ((Worldlet_ID)-1)
+
+/* The Xserver source compiles with -ansi -pendantic */
+#ifndef __STRICT_ANSI__
+typedef uint64 MA;
+typedef uint32 MPN;
+#endif
+
+/*
+ * This type should be used for variables that contain sector
+ * position/quantity.
+ */
+typedef uint64 SectorType;
+
+/*
+ * Linear address
+ */
+
+typedef uintptr_t LA;
+typedef uintptr_t LPN;
+#define LA_2_LPN(_la) ((_la) >> PAGE_SHIFT)
+#define LPN_2_LA(_lpn) ((_lpn) << PAGE_SHIFT)
+
+#define LAST_LPN ((((LA) 1) << (8 * sizeof(LA) - PAGE_SHIFT)) - 1)
+#define LAST_LPN32 ((((LA32)1) << (8 * sizeof(LA32) - PAGE_SHIFT)) - 1)
+#define LAST_LPN64 ((((LA64)1) << (8 * sizeof(LA64) - PAGE_SHIFT)) - 1)
+
+/* Valid bits in a LPN. */
+#define LPN_MASK LAST_LPN
+#define LPN_MASK32 LAST_LPN32
+#define LPN_MASK64 LAST_LPN64
+
+/*
+ * On 64 bit platform, address and page number types default
+ * to 64 bit. When we need to represent a 32 bit address, we use
+ * types defined below.
+ *
+ * On 32 bit platform, the following types are the same as the
+ * default types.
+ */
+typedef uint32 VA32;
+typedef uint32 VPN32;
+typedef uint32 LA32;
+typedef uint32 LPN32;
+typedef uint32 PA32;
+typedef uint32 PPN32;
+typedef uint32 MA32;
+typedef uint32 MPN32;
+
+/*
+ * On 64 bit platform, the following types are the same as the
+ * default types.
+ */
+typedef uint64 VA64;
+typedef uint64 VPN64;
+typedef uint64 LA64;
+typedef uint64 LPN64;
+typedef uint64 PA64;
+typedef uint64 PPN64;
+typedef uint64 MA64;
+typedef uint64 MPN64;
+
+/*
+ * VA typedefs for user world apps.
+ */
+typedef VA32 UserVA32;
+typedef VA64 UserVA64;
+typedef UserVA64 UserVAConst; /* Userspace ptr to data that we may only read. */
+typedef UserVA32 UserVA32Const; /* Userspace ptr to data that we may only read. */
+typedef UserVA64 UserVA64Const; /* Used by 64-bit syscalls until conversion is finished. */
+#ifdef VMKERNEL
+typedef UserVA64 UserVA;
+#else
+typedef void * UserVA;
+#endif
+
+
+/*
+ * Maximal possible PPN value (errors too) that PhysMem can handle.
+ * Must be at least as large as MAX_PPN which is the maximum PPN
+ * for any region other than buserror.
+ */
+#define PHYSMEM_MAX_PPN ((PPN)0xffffffff)
+#define MAX_PPN ((PPN)0x1fffffff) /* Maximal observable PPN value. */
+#define INVALID_PPN ((PPN)0xffffffff)
+
+#define INVALID_BPN ((BPN)0x1fffffff)
+
+#define RESERVED_MPN ((MPN) 0)
+#define INVALID_MPN ((MPN)-1)
+#define MEMREF_MPN ((MPN)-2)
+#define RELEASED_MPN ((MPN)-3)
+#define MAX_MPN ((MPN)0x7fffffff) /* 43 bits of address space. */
+
+#define INVALID_LPN ((LPN)-1)
+#define INVALID_VPN ((VPN)-1)
+#define INVALID_LPN64 ((LPN64)-1)
+#define INVALID_PAGENUM ((PageNum)-1)
+
+
+/*
+ * Format modifier for printing VA, LA, and VPN.
+ * Use them like this: Log("%#"FMTLA"x\n", laddr)
+ */
+
+#if defined(VMM) || defined(FROBOS64) || vm_x86_64 || defined __APPLE__
+# define FMTLA "l"
+# define FMTVA "l"
+# define FMTVPN "l"
+#else
+# define FMTLA ""
+# define FMTVA ""
+# define FMTVPN ""
+#endif
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif
+#define CONST const
+
+
+#ifndef INLINE
+# ifdef _MSC_VER
+# define INLINE __inline
+# else
+# define INLINE inline
+# endif
+#endif
+
+
+/*
+ * Annotation for data that may be exported into a DLL and used by other
+ * apps that load that DLL and import the data.
+ */
+#if defined(_WIN32) && defined(VMX86_IMPORT_DLLDATA)
+# define VMX86_EXTERN_DATA extern __declspec(dllimport)
+#else // !_WIN32
+# define VMX86_EXTERN_DATA extern
+#endif
+
+#if defined(_WIN32) && !defined(VMX86_NO_THREADS)
+#define THREADSPECIFIC __declspec(thread)
+#else
+#define THREADSPECIFIC
+#endif
+
+/*
+ * Due to the wonderful "registry redirection" feature introduced in
+ * 64-bit Windows, if you access any key under HKLM\Software in 64-bit
+ * code, you need to open/create/delete that key with
+ * VMKEY_WOW64_32KEY if you want a consistent view with 32-bit code.
+ */
+
+#ifdef _WIN32
+#ifdef _WIN64
+#define VMW_KEY_WOW64_32KEY KEY_WOW64_32KEY
+#else
+#define VMW_KEY_WOW64_32KEY 0x0
+#endif
+#endif
+
+
+/*
+ * Consider the following reasons functions are inlined:
+ *
+ * 1) inlined for performance reasons
+ * 2) inlined because it's a single-use function
+ *
+ * Functions which meet only condition 2 should be marked with this
+ * inline macro; It is not critical to be inlined (but there is a
+ * code-space & runtime savings by doing so), so when other callers
+ * are added the inline-ness should be removed.
+ */
+
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)
+/*
+ * Starting at version 3.3, gcc does not always inline functions marked
+ * 'inline' (it depends on their size). To force gcc to do so, one must use the
+ * extra __always_inline__ attribute.
+ */
+# define INLINE_SINGLE_CALLER INLINE __attribute__((__always_inline__))
+#else
+# define INLINE_SINGLE_CALLER INLINE
+#endif
+
+/*
+ * Used when a hard guaranteed of no inlining is needed. Very few
+ * instances need this since the absence of INLINE is a good hint
+ * that gcc will not do inlining.
+ */
+
+#if defined(__GNUC__) && defined(VMM)
+#define ABSOLUTELY_NOINLINE __attribute__((__noinline__))
+#endif
+
+/*
+ * Attributes placed on function declarations to tell the compiler
+ * that the function never returns.
+ */
+
+#ifdef _MSC_VER
+#define NORETURN __declspec(noreturn)
+#elif __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 9)
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#endif
+
+/*
+ * GCC 3.2 inline asm needs the + constraint for input/ouput memory operands.
+ * Older GCCs don't know about it --hpreg
+ */
+
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)
+# define VM_ASM_PLUS 1
+#else
+# define VM_ASM_PLUS 0
+#endif
+
+/*
+ * Branch prediction hints:
+ * LIKELY(exp) - Expression exp is likely TRUE.
+ * UNLIKELY(exp) - Expression exp is likely FALSE.
+ * Usage example:
+ * if (LIKELY(excCode == EXC_NONE)) {
+ * or
+ * if (UNLIKELY(REAL_MODE(vc))) {
+ *
+ * We know how to predict branches on gcc3 and later (hopefully),
+ * all others we don't so we do nothing.
+ */
+
+#if (__GNUC__ >= 3)
+/*
+ * gcc3 uses __builtin_expect() to inform the compiler of an expected value.
+ * We use this to inform the static branch predictor. The '!!' in LIKELY
+ * will convert any !=0 to a 1.
+ */
+#define LIKELY(_exp) __builtin_expect(!!(_exp), 1)
+#define UNLIKELY(_exp) __builtin_expect((_exp), 0)
+#else
+#define LIKELY(_exp) (_exp)
+#define UNLIKELY(_exp) (_exp)
+#endif
+
+/*
+ * GCC's argument checking for printf-like functions
+ * This is conditional until we have replaced all `"%x", void *'
+ * with `"0x%08x", (uint32) void *'. Note that %p prints different things
+ * on different platforms. Argument checking is enabled for the
+ * vmkernel, which has already been cleansed.
+ *
+ * fmtPos is the position of the format string argument, beginning at 1
+ * varPos is the position of the variable argument, beginning at 1
+ */
+
+#if defined(__GNUC__)
+# define PRINTF_DECL(fmtPos, varPos) __attribute__((__format__(__printf__, fmtPos, varPos)))
+#else
+# define PRINTF_DECL(fmtPos, varPos)
+#endif
+
+#if defined(__GNUC__)
+# define SCANF_DECL(fmtPos, varPos) __attribute__((__format__(__scanf__, fmtPos, varPos)))
+#else
+# define SCANF_DECL(fmtPos, varPos)
+#endif
+
+/*
+ * UNUSED_PARAM should surround the parameter name and type declaration,
+ * e.g. "int MyFunction(int var1, UNUSED_PARAM(int var2))"
+ *
+ */
+
+#ifndef UNUSED_PARAM
+# if defined(__GNUC__)
+# define UNUSED_PARAM(_parm) _parm __attribute__((__unused__))
+# else
+# define UNUSED_PARAM(_parm) _parm
+# endif
+#endif
+
+/*
+ * REGPARM defaults to REGPARM3; i.e., a request that gcc
+ * put the first three arguments in registers. (It is fine
+ * if the function has fewer than three arguments.) Gcc only.
+ * Syntactically, put REGPARM where you'd put INLINE or NORETURN.
+ *
+ * Note that 64-bit code already puts the first six arguments in
+ * registers, so these attributes are only useful for 32-bit code.
+ */
+
+#if defined(__GNUC__)
+# define REGPARM0 __attribute__((regparm(0)))
+# define REGPARM1 __attribute__((regparm(1)))
+# define REGPARM2 __attribute__((regparm(2)))
+# define REGPARM3 __attribute__((regparm(3)))
+# define REGPARM REGPARM3
+#else
+# define REGPARM0
+# define REGPARM1
+# define REGPARM2
+# define REGPARM3
+# define REGPARM
+#endif
+
+/*
+ * ALIGNED specifies minimum alignment in "n" bytes.
+ */
+
+#ifdef __GNUC__
+#define ALIGNED(n) __attribute__((__aligned__(n)))
+#else
+#define ALIGNED(n)
+#endif
+
+/*
+ * __func__ is a stringified function name that is part of the C99 standard. The block
+ * below defines __func__ on older systems where the compiler does not support that
+ * macro.
+ */
+#if defined(__GNUC__) \
+ && ((__GNUC__ == 2 && __GNUC_MINOR < 96) \
+ || (__GNUC__ < 2))
+# define __func__ __FUNCTION__
+#endif
+
+/*
+ * Once upon a time, this was used to silence compiler warnings that
+ * get generated when the compiler thinks that a function returns
+ * when it is marked noreturn. Don't do it. Use NOT_REACHED().
+ */
+
+#define INFINITE_LOOP() do { } while (1)
+
+/*
+ * On FreeBSD (for the tools build), size_t is typedef'd if _BSD_SIZE_T_
+ * is defined. Use the same logic here so we don't define it twice. [greg]
+ */
+#ifdef __FreeBSD__
+# ifdef _BSD_SIZE_T_
+# undef _BSD_SIZE_T_
+# ifdef VM_I386
+# ifdef VM_X86_64
+ typedef uint64 size_t;
+# else
+ typedef uint32 size_t;
+# endif
+# endif /* VM_I386 */
+# endif
+
+# ifdef _BSD_SSIZE_T_
+# undef _BSD_SSIZE_T_
+# ifdef VM_I386
+# ifdef VM_X86_64
+ typedef int64 ssize_t;
+# else
+ typedef int32 ssize_t;
+# endif
+# endif /* VM_I386 */
+# endif
+
+#else
+# ifndef _SIZE_T
+# ifdef VM_I386
+# define _SIZE_T
+# ifdef VM_X86_64
+ typedef uint64 size_t;
+# else
+ typedef uint32 size_t;
+# endif
+# elif defined(__arm__)
+# define _SIZE_T
+ typedef uint32 size_t;
+# endif
+# endif
+
+# if !defined(FROBOS) && !defined(_SSIZE_T) && !defined(_SSIZE_T_) && \
+ !defined(ssize_t) && !defined(__ssize_t_defined) && \
+ !defined(_SSIZE_T_DECLARED)
+# ifdef VM_I386
+# define _SSIZE_T
+# define __ssize_t_defined
+# define _SSIZE_T_DECLARED
+# ifdef VM_X86_64
+ typedef int64 ssize_t;
+# else
+ typedef int32 ssize_t;
+# endif
+# elif defined(__arm__)
+# define _SSIZE_T
+# define __ssize_t_defined
+# define _SSIZE_T_DECLARED
+ typedef int32 ssize_t;
+# endif
+# endif
+
+#endif
+
+/*
+ * Format modifier for printing pid_t. On sun the pid_t is a ulong, but on
+ * Linux it's an int.
+ * Use this like this: printf("The pid is %"FMTPID".\n", pid);
+ */
+#ifdef sun
+# ifdef VM_X86_64
+# define FMTPID "d"
+# else
+# define FMTPID "lu"
+# endif
+#else
+# define FMTPID "d"
+#endif
+
+/*
+ * Format modifier for printing uid_t. On Solaris 10 and earlier, uid_t
+ * is a ulong, but on other platforms it's an unsigned int.
+ * Use this like this: printf("The uid is %"FMTUID".\n", uid);
+ */
+#if defined(sun) && !defined(SOL11)
+# ifdef VM_X86_64
+# define FMTUID "u"
+# else
+# define FMTUID "lu"
+# endif
+#else
+# define FMTUID "u"
+#endif
+
+/*
+ * Format modifier for printing mode_t. On sun the mode_t is a ulong, but on
+ * Linux it's an int.
+ * Use this like this: printf("The mode is %"FMTMODE".\n", mode);
+ */
+#ifdef sun
+# ifdef VM_X86_64
+# define FMTMODE "o"
+# else
+# define FMTMODE "lo"
+# endif
+#else
+# define FMTMODE "o"
+#endif
+
+/*
+ * Format modifier for printing time_t. Most platforms define a time_t to be
+ * a long int, but on FreeBSD (as of 5.0, it seems), the time_t is a signed
+ * size quantity. Refer to the definition of FMTSZ to see why we need silly
+ * preprocessor arithmetic.
+ * Use this like this: printf("The mode is %"FMTTIME".\n", time);
+ */
+#if defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) >= 5)
+# define FMTTIME FMTSZ"d"
+#else
+# if defined(_MSC_VER)
+# ifndef _SAFETIME_H_
+# if (_MSC_VER < 1400) || defined(_USE_32BIT_TIME_T)
+# define FMTTIME "ld"
+# else
+# define FMTTIME FMT64"d"
+# endif
+# else
+# ifndef FMTTIME
+# error "safetime.h did not define FMTTIME"
+# endif
+# endif
+# else
+# define FMTTIME "ld"
+# endif
+#endif
+
+#ifdef __APPLE__
+/*
+ * Format specifier for all these annoying types such as {S,U}Int32
+ * which are 'long' in 32-bit builds
+ * and 'int' in 64-bit builds.
+ */
+# ifdef __LP64__
+# define FMTLI ""
+# else
+# define FMTLI "l"
+# endif
+
+/*
+ * Format specifier for all these annoying types such as NS[U]Integer
+ * which are 'int' in 32-bit builds
+ * and 'long' in 64-bit builds.
+ */
+# ifdef __LP64__
+# define FMTIL "l"
+# else
+# define FMTIL ""
+# endif
+#endif
+
+
+/*
+ * Define MXSemaHandle here so both vmmon and vmx see this definition.
+ */
+
+#ifdef _WIN32
+typedef uintptr_t MXSemaHandle;
+#else
+typedef int MXSemaHandle;
+#endif
+
+/*
+ * Define type for poll device handles.
+ */
+
+typedef int64 PollDevHandle;
+
+/*
+ * Define the utf16_t type.
+ */
+
+#if defined(_WIN32) && defined(_NATIVE_WCHAR_T_DEFINED)
+typedef wchar_t utf16_t;
+#else
+typedef uint16 utf16_t;
+#endif
+
+/*
+ * Define for point and rectangle types. Defined here so they
+ * can be used by other externally facing headers in bora/public.
+ */
+
+typedef struct VMPoint {
+ int x, y;
+} VMPoint;
+
+#if defined _WIN32 && defined USERLEVEL
+struct tagRECT;
+typedef struct tagRECT VMRect;
+#else
+typedef struct VMRect {
+ int left;
+ int top;
+ int right;
+ int bottom;
+} VMRect;
+#endif
+
+/*
+ * ranked locks "everywhere"
+ */
+
+typedef uint32 MX_Rank;
+
+#endif /* _VM_BASIC_TYPES_H_ */
diff --git a/usr/src/uts/intel/io/vmxnet/vm_device_version.h b/usr/src/uts/intel/io/vmxnet/vm_device_version.h
new file mode 100644
index 0000000000..7046594a6c
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vm_device_version.h
@@ -0,0 +1,246 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+#ifndef VM_DEVICE_VERSION_H
+#define VM_DEVICE_VERSION_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#ifdef _WIN32
+#ifdef __MINGW32__
+#include "initguid.h"
+#else
+#include "guiddef.h"
+#endif
+#endif
+
+/* LSILogic 53C1030 Parallel SCSI controller
+ * LSILogic SAS1068 SAS controller
+ */
+#define PCI_VENDOR_ID_LSILOGIC 0x1000
+#define PCI_DEVICE_ID_LSI53C1030 0x0030
+#define PCI_DEVICE_ID_LSISAS1068 0x0054
+
+/* Our own PCI IDs
+ * VMware SVGA II (Unified VGA)
+ * VMware SVGA (PCI Accelerator)
+ * VMware vmxnet (Idealized NIC)
+ * VMware vmxscsi (Abortive idealized SCSI controller)
+ * VMware chipset (Subsystem ID for our motherboards)
+ * VMware e1000 (Subsystem ID)
+ * VMware vmxnet3 (Uniform Pass Through NIC)
+ * VMware HD Audio codec
+ * VMware HD Audio controller
+ */
+#define PCI_VENDOR_ID_VMWARE 0x15AD
+#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405
+#define PCI_DEVICE_ID_VMWARE_SVGA 0x0710
+#define PCI_DEVICE_ID_VMWARE_NET 0x0720
+#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
+#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
+#define PCI_DEVICE_ID_VMWARE_CHIPSET 0x1976
+#define PCI_DEVICE_ID_VMWARE_82545EM 0x0750 /* single port */
+#define PCI_DEVICE_ID_VMWARE_82546EB 0x0760 /* dual port */
+#define PCI_DEVICE_ID_VMWARE_EHCI 0x0770
+#define PCI_DEVICE_ID_VMWARE_UHCI 0x0774
+#define PCI_DEVICE_ID_VMWARE_XHCI 0x0778
+#define PCI_DEVICE_ID_VMWARE_1394 0x0780
+#define PCI_DEVICE_ID_VMWARE_BRIDGE 0x0790
+#define PCI_DEVICE_ID_VMWARE_ROOTPORT 0x07A0
+#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
+#define PCI_DEVICE_ID_VMWARE_VMXWIFI 0x07B8
+#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0
+#define PCI_DEVICE_ID_VMWARE_82574 0x07D0
+#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CODEC 0x1975
+#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CONTROLLER 0x1977
+
+/* The hypervisor device might grow. Please leave room
+ * for 7 more subfunctions.
+ */
+#define PCI_DEVICE_ID_VMWARE_HYPER 0x0800
+#define PCI_DEVICE_ID_VMWARE_VMI 0x0801
+
+#define PCI_DEVICE_VMI_CLASS 0x05
+#define PCI_DEVICE_VMI_SUBCLASS 0x80
+#define PCI_DEVICE_VMI_INTERFACE 0x00
+#define PCI_DEVICE_VMI_REVISION 0x01
+
+/* From linux/pci_ids.h:
+ * AMD Lance Ethernet controller
+ * BusLogic SCSI controller
+ * Ensoniq ES1371 sound controller
+ */
+#define PCI_VENDOR_ID_AMD 0x1022
+#define PCI_DEVICE_ID_AMD_VLANCE 0x2000
+#define PCI_VENDOR_ID_BUSLOGIC 0x104B
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040
+#define PCI_VENDOR_ID_ENSONIQ 0x1274
+#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371
+
+/* From linux/pci_ids.h:
+ * Intel 82439TX (430 HX North Bridge)
+ * Intel 82371AB (PIIX4 South Bridge)
+ * Intel 82443BX (440 BX North Bridge and AGP Bridge)
+ * Intel 82545EM (e1000, server adapter, single port)
+ * Intel 82546EB (e1000, server adapter, dual port)
+ * Intel HECI (as embedded in ich9m)
+ */
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_DEVICE_ID_INTEL_82439TX 0x7100
+#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110
+#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112
+#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113
+#define PCI_DEVICE_ID_INTEL_82371AB 0x7111
+#define PCI_DEVICE_ID_INTEL_82443BX 0x7190
+#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191
+#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192 /* Used when no AGP support */
+#define PCI_DEVICE_ID_INTEL_82545EM 0x100f
+#define PCI_DEVICE_ID_INTEL_82546EB 0x1010
+#define PCI_DEVICE_ID_INTEL_82574 0x10d3
+#define PCI_DEVICE_ID_INTEL_82574_APPLE 0x10f6
+#define PCI_DEVICE_ID_INTEL_HECI 0x2a74
+
+#define E1000E_PCI_DEVICE_ID_CONFIG_STR "e1000e.pci.deviceID"
+#define E1000E_PCI_SUB_VENDOR_ID_CONFIG_STR "e1000e.pci.subVendorID"
+#define E1000E_PCI_SUB_DEVICE_ID_CONFIG_STR "e1000e.pci.subDeviceID"
+
+/*
+ * Intel HD Audio controller and Realtek ALC885 codec.
+ */
+#define PCI_DEVICE_ID_INTEL_631XESB_632XESB 0x269a
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+#define PCI_DEVICE_ID_REALTEK_ALC885 0x0885
+
+
+/*
+ * Fresco Logic xHCI (USB 3.0) Controller
+ */
+#define PCI_VENDOR_ID_FRESCO 0x1B73
+#define PCI_DEVICE_ID_FRESCO_FL1000 0x1000 // Original 1-port chip
+#define PCI_DEVICE_ID_FRESCO_FL1009 0x1009 // New 2-port chip (Driver 3.0.98+)
+#define PCI_DEVICE_ID_FRESCO_FL1400 0x1400 // Unknown (4-port? Dev hardware?)
+
+/*
+ * NEC/Renesas xHCI (USB 3.0) Controller
+ */
+#define PCI_VENDOR_ID_NEC 0x1033
+#define PCI_DEVICE_ID_NEC_UPD720200 0x0194
+#define PCI_REVISION_NEC_UPD720200 0x03
+#define PCI_FIRMWARE_NEC_UPD720200 0x3015
+
+
+/************* Strings for IDE Identity Fields **************************/
+#define VIDE_ID_SERIAL_STR "00000000000000000001" /* Must be 20 Bytes */
+#define VIDE_ID_FIRMWARE_STR "00000001" /* Must be 8 Bytes */
+
+/* No longer than 40 Bytes */
+#define VIDE_ATA_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE Hard Drive"
+#define VIDE_ATAPI_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE CDROM Drive"
+
+#define ATAPI_VENDOR_ID "NECVMWar" /* Must be 8 Bytes */
+#define ATAPI_PRODUCT_ID PRODUCT_GENERIC_NAME " IDE CDROM" /* Must be 16 Bytes */
+#define ATAPI_REV_LEVEL "1.00" /* Must be 4 Bytes */
+
+#define IDE_NUM_INTERFACES 2 /* support for two interfaces */
+#define IDE_DRIVES_PER_IF 2
+
+/************* Strings for SCSI Identity Fields **************************/
+#define SCSI_DISK_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI Hard Drive"
+#define SCSI_DISK_VENDOR_NAME COMPANY_NAME
+#define SCSI_DISK_REV_LEVEL "1.0"
+#define SCSI_CDROM_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI CDROM Drive"
+#define SCSI_CDROM_VENDOR_NAME COMPANY_NAME
+#define SCSI_CDROM_REV_LEVEL "1.0"
+
+/************* SCSI implementation limits ********************************/
+#define SCSI_MAX_CONTROLLERS 4 // Need more than 1 for MSCS clustering
+#define SCSI_MAX_DEVICES 16 // BT-958 emulates only 16
+#define PVSCSI_MAX_DEVICES 255 // 255 (including the controller)
+/*
+ * VSCSI_BV_INTS is the number of uint32's needed for a bit vector
+ * to cover all scsi devices per target.
+ */
+#define VSCSI_BV_INTS CEILING(PVSCSI_MAX_DEVICES, 8 * sizeof (uint32))
+#define SCSI_IDE_CHANNEL SCSI_MAX_CONTROLLERS
+#define SCSI_IDE_HOSTED_CHANNEL (SCSI_MAX_CONTROLLERS + 1)
+#define SCSI_MAX_CHANNELS (SCSI_MAX_CONTROLLERS + 2)
+
+/************* Strings for the VESA BIOS Identity Fields *****************/
+#define VBE_OEM_STRING COMPANY_NAME " SVGA"
+#define VBE_VENDOR_NAME COMPANY_NAME
+#define VBE_PRODUCT_NAME PRODUCT_GENERIC_NAME
+
+/************* PCI implementation limits ********************************/
+#define PCI_MAX_BRIDGES 15
+
+/************* Ethernet implementation limits ***************************/
+#define MAX_ETHERNET_CARDS 10
+
+/********************** Floppy limits ***********************************/
+#define MAX_FLOPPY_DRIVES 2
+
+/************* PCI Passthrough implementation limits ********************/
+#define MAX_PCI_PASSTHRU_DEVICES 6
+
+/************* USB implementation limits ********************************/
+#define MAX_USB_DEVICES_PER_HOST_CONTROLLER 127
+
+/************* Strings for Host USB Driver *******************************/
+
+#ifdef _WIN32
+
+/*
+ * Globally unique ID for the VMware device interface. Define INITGUID before including
+ * this header file to instantiate the variable.
+ */
+DEFINE_GUID(GUID_DEVICE_INTERFACE_VMWARE_USB_DEVICES,
+0x2da1fe75, 0xaab3, 0x4d2c, 0xac, 0xdf, 0x39, 0x8, 0x8c, 0xad, 0xa6, 0x65);
+
+/*
+ * Globally unique ID for the VMware device setup class.
+ */
+DEFINE_GUID(GUID_CLASS_VMWARE_USB_DEVICES,
+0x3b3e62a5, 0x3556, 0x4d7e, 0xad, 0xad, 0xf5, 0xfa, 0x3a, 0x71, 0x2b, 0x56);
+
+/*
+ * This string defines the device ID string of a VMware USB device.
+ * The format is USB\Vid_XXXX&Pid_YYYY, where XXXX and YYYY are the
+ * hexadecimal representations of the vendor and product ids, respectively.
+ *
+ * The official vendor ID for VMware, Inc. is 0x0E0F.
+ * The product id for USB generic devices is 0x0001.
+ */
+#define USB_VMWARE_DEVICE_ID_WIDE L"USB\\Vid_0E0F&Pid_0001"
+#define USB_DEVICE_ID_LENGTH (sizeof(USB_VMWARE_DEVICE_ID_WIDE) / sizeof(WCHAR))
+
+#ifdef UNICODE
+#define USB_PNP_SETUP_CLASS_NAME L"VMwareUSBDevices"
+#define USB_PNP_DRIVER_NAME L"vmusb"
+#else
+#define USB_PNP_SETUP_CLASS_NAME "VMwareUSBDevices"
+#define USB_PNP_DRIVER_NAME "vmusb"
+#endif
+#endif
+
+#endif /* VM_DEVICE_VERSION_H */
diff --git a/usr/src/uts/intel/io/vmxnet/vmnet_def.h b/usr/src/uts/intel/io/vmxnet/vmnet_def.h
new file mode 100644
index 0000000000..6e44aea2bb
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmnet_def.h
@@ -0,0 +1,91 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * vmnet_def.h
+ *
+ * - definitions which are (mostly) not vmxnet or vlance specific
+ */
+
+#ifndef _VMNET_DEF_H_
+#define _VMNET_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#define VMNET_NAME_BUFFER_LEN 128 /* Increased for i18n. */
+#define VMNET_COAL_SCHEME_NAME_LEN 16
+
+
+/*
+ * capabilities - not all of these are implemented in the virtual HW
+ * (eg VLAN support is in the virtual switch) so even vlance
+ * can use them
+ */
+#define VMNET_CAP_SG 0x0001 /* Can do scatter-gather transmits. */
+#define VMNET_CAP_IP4_CSUM 0x0002 /* Can checksum only TCP/UDP over IPv4. */
+#define VMNET_CAP_HW_CSUM 0x0004 /* Can checksum all packets. */
+#define VMNET_CAP_HIGH_DMA 0x0008 /* Can DMA to high memory. */
+#define VMNET_CAP_TOE 0x0010 /* Supports TCP/IP offload. */
+#define VMNET_CAP_TSO 0x0020 /* Supports TCP Segmentation offload */
+#define VMNET_CAP_SW_TSO 0x0040 /* Supports SW TCP Segmentation */
+#define VMNET_CAP_VMXNET_APROM 0x0080 /* Vmxnet APROM support */
+#define VMNET_CAP_HW_TX_VLAN 0x0100 /* Can we do VLAN tagging in HW */
+#define VMNET_CAP_HW_RX_VLAN 0x0200 /* Can we do VLAN untagging in HW */
+#define VMNET_CAP_SW_VLAN 0x0400 /* Can we do VLAN tagging/untagging in SW */
+#define VMNET_CAP_WAKE_PCKT_RCV 0x0800 /* Can wake on network packet recv? */
+#define VMNET_CAP_ENABLE_INT_INLINE 0x1000 /* Enable Interrupt Inline */
+#define VMNET_CAP_ENABLE_HEADER_COPY 0x2000 /* copy header for vmkernel */
+#define VMNET_CAP_TX_CHAIN 0x4000 /* Guest can use multiple tx entries for a pkt */
+#define VMNET_CAP_RX_CHAIN 0x8000 /* a pkt can span multiple rx entries */
+#define VMNET_CAP_LPD 0x10000 /* large pkt delivery */
+#define VMNET_CAP_BPF 0x20000 /* BPF Support in VMXNET Virtual Hardware */
+#define VMNET_CAP_SG_SPAN_PAGES 0x40000 /* Can do scatter-gather span multiple pages transmits. */
+#define VMNET_CAP_IP6_CSUM 0x80000 /* Can do IPv6 csum offload. */
+#define VMNET_CAP_TSO6 0x100000 /* Can do TSO segmentation offload for IPv6 pkts. */
+#define VMNET_CAP_TSO256k 0x200000 /* Can do TSO segmentation offload for pkts up to 256kB. */
+#define VMNET_CAP_UPT 0x400000 /* Support UPT */
+#define VMNET_CAP_RDONLY_INETHDRS 0x800000 /* Modifies inet headers for TSO/CSUm */
+#define VMNET_CAP_NPA 0x1000000 /* Support NPA */
+#define VMNET_CAP_DCB 0x2000000 /* Support DCB */
+#define VMNET_CAP_OFFLOAD_8OFFSET 0x4000000 /* supports 8bit parameterized offsets */
+#define VMNET_CAP_OFFLOAD_16OFFSET 0x8000000 /* supports 16bit parameterized offsets */
+#define VMNET_CAP_IP6_CSUM_EXT_HDRS 0x10000000 /* support csum of ip6 ext hdrs */
+#define VMNET_CAP_TSO6_EXT_HDRS 0x20000000 /* support TSO for ip6 ext hdrs */
+#define VMNET_CAP_SCHED 0x40000000 /* compliant with network scheduling */
+#endif // _VMNET_DEF_H_
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.c b/usr/src/uts/intel/io/vmxnet/vmxnet.c
new file mode 100644
index 0000000000..e0046d8deb
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet.c
@@ -0,0 +1,2438 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/kstat.h>
+#include <sys/vtrace.h>
+#include <sys/dlpi.h>
+#include <sys/strsun.h>
+#include <sys/ethernet.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/gld.h>
+#include <sys/pci.h>
+#include <sys/strsubr.h>
+
+/*
+ * This used to be defined in sys/gld.h, but was flagged as private,
+ * and we used it anyway. Now it no longer exists, and we're stuck
+ * with it for the time being.
+ */
+#ifndef GLD_MAX_MULTICAST
+#define GLD_MAX_MULTICAST 64
+#endif
+
+#define __intptr_t_defined
+#define _STDINT_H
+#include "vm_basic_types.h"
+#include "vmxnet2_def.h"
+#include "vm_device_version.h"
+#include "net.h"
+#include "buildNumber.h"
+
+#define SOLVMXNET_SUCCESS 1
+#define SOLVMXNET_FAILURE 0
+
+#ifdef SOLVMXNET_DEBUG_LEVEL
+static int vxn_debug = SOLVMXNET_DEBUG_LEVEL;
+#define DPRINTF(n, args) if (vxn_debug>(n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+static char ident[] = "VMware Ethernet Adapter b" BUILD_NUMBER_NUMERIC_STRING;
+char _depends_on[] = {"misc/gld"};
+
+#define MAX_NUM_RECV_BUFFERS 128
+#define DEFAULT_NUM_RECV_BUFFERS 100
+#define MAX_NUM_XMIT_BUFFERS 128
+#define DEFAULT_NUM_XMIT_BUFFERS 100
+#define CRC_POLYNOMIAL_LE 0xedb88320UL
+#define SOLVMXNET_MAXNAME 20
+#define MAX_TX_WAIT_ON_STOP 2000
+
+#define ETHERALIGN 2
+#define SLACKBYTES 4
+#define MAXPKTBUF (14 + ETHERALIGN + ETHERMTU + SLACKBYTES)
+
+
+#define QHIWATER (MAX_NUM_RECV_BUFFERS*ETHERMTU)
+
+#define OUTB(dp, p, v) \
+ ddi_put8((dp)->vxnIOHdl, \
+ (uint8_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define OUTW(dp, p, v) \
+ ddi_put16((dp)->vxnIOHdl, \
+ (uint16_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define OUTL(dp, p, v) \
+ ddi_put32((dp)->vxnIOHdl, \
+ (uint32_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define INB(dp, p) \
+ ddi_get8((dp)->vxnIOHdl, \
+ (uint8_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+#define INW(dp, p) \
+ ddi_get16((dp)->vxnIOHdl, \
+ (uint16_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+#define INL(dp, p) \
+ ddi_get32((dp)->vxnIOHdl, \
+ (uint32_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+
+#define VMXNET_INC(val, max) \
+ val++; \
+ if (UNLIKELY(val == max)) { \
+ val = 0; \
+ }
+
+#define TX_RINGBUF_MBLK(dp, idx) (dp->txRingBuf[idx].mblk)
+#define TX_RINGBUF_DMAMEM(dp, idx) (dp->txRingBuf[idx].dmaMem)
+
+typedef struct {
+ caddr_t buf; /* Virtual address */
+ uint32_t phyBuf; /* Physical address */
+ size_t bufLen; /* Buffer length */
+ ddi_dma_cookie_t cookie; /* Dma cookie */
+ uint_t cookieCount; /* Cookie count */
+ ddi_dma_handle_t dmaHdl; /* Dma handle */
+ ddi_acc_handle_t dataAccHdl; /* Dada access handle */
+} dma_buf_t;
+
+typedef struct rx_dma_buf {
+ dma_buf_t dmaDesc; /* Dma descriptor */
+ mblk_t *mblk; /* Streams message block */
+ frtn_t freeCB; /* Free callback */
+ struct vxn_softc *softc; /* Back pointer to softc */
+ struct rx_dma_buf *next; /* Next one in list */
+} rx_dma_buf_t;
+
+typedef struct vxn_stats {
+ uint32_t errxmt; /* Transmit errors */
+ uint32_t errrcv; /* Receive errors */
+ uint32_t runt; /* Runt packets */
+ uint32_t norcvbuf; /* Buffer alloc errors */
+ uint32_t interrupts; /* Interrupts */
+ uint32_t defer; /* Deferred transmits */
+} vxn_stats_t;
+
+typedef struct tx_ring_buf {
+ mblk_t *mblk;
+ dma_buf_t dmaMem;
+} tx_ring_buf_t;
+
+typedef struct vxn_softc {
+ char drvName[SOLVMXNET_MAXNAME]; /* Driver name string */
+ int unit; /* Driver instance */
+ vxn_stats_t stats; /* Stats */
+
+ dev_info_t *dip; /* Info pointer */
+ ddi_iblock_cookie_t iblockCookie; /* Interrupt block cookie */
+ gld_mac_info_t *macInfo; /* GLD mac info */
+ ddi_acc_handle_t confHdl; /* Configuration space handle */
+ ddi_acc_handle_t vxnIOHdl; /* I/O space handle */
+ caddr_t vxnIOp; /* I/O space pointer */
+ boolean_t morphed; /* Adapter morphed ? */
+
+ kmutex_t intrlock; /* Interrupt lock */
+ kmutex_t xmitlock; /* Transmit lock */
+ kmutex_t rxlistlock; /* Rx free pool lock */
+
+ boolean_t nicActive; /* NIC active flag */
+ boolean_t inIntr; /* Interrupt processing flag */
+
+ struct ether_addr devAddr; /* MAC address */
+
+ uint32_t vxnNumRxBufs; /* Number of reveice buffers */
+ uint32_t vxnNumTxBufs; /* Number of transmit buffers */
+
+ dma_buf_t driverDataDmaMem; /* Driver Data (dma handle) */
+ Vmxnet2_DriverData *driverData; /* Driver Data */
+ void *driverDataPhy; /* Driver Data busaddr pointer */
+ Vmxnet2_RxRingEntry *rxRing; /* Receive ring */
+ Vmxnet2_TxRingEntry *txRing; /* Transmit ring */
+ ddi_dma_handle_t txDmaHdl; /* Tx buffers dma handle */
+ rx_dma_buf_t *rxRingBuffPtr[MAX_NUM_RECV_BUFFERS];
+ /* DMA buffers associated with rxRing */
+ tx_ring_buf_t txRingBuf[MAX_NUM_XMIT_BUFFERS]; /* tx Ring buffers */
+
+ rx_dma_buf_t *rxFreeBufList;
+ uint32_t rxNumFreeBufs; /* current # of buffers in pool */
+ uint32_t rxMaxFreeBufs; /* max # of buffers in pool */
+
+ uint32_t txPending; /* Pending transmits */
+ uint32_t maxTxFrags; /* Max Tx fragments */
+
+ int multiCount; /* Multicast address count */
+ struct ether_addr multicastList[GLD_MAX_MULTICAST]; /* Multicast list */
+
+ struct vxn_softc *next; /* Circular list of instances */
+ struct vxn_softc *prev;
+} vxn_softc_t;
+
+/* used for rx buffers or buffers allocated by ddi_dma_mem_alloc() */
+static ddi_dma_attr_t vxn_dma_attrs = {
+ DMA_ATTR_V0, /* dma_attr version */
+ 0, /* dma_attr_addr_lo */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */
+ 0x7FFFFFFF, /* dma_attr_count_max */
+ 4, /* dma_attr_align */
+ 0x3F, /* dma_attr_burstsizes */
+ 1, /* dma_attr_minxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_seg */
+ 1, /* dma_attr_sgllen */
+ 1, /* dma_attr_granular */
+ 0, /* dma_attr_flags */
+};
+
+/* used for tx buffers */
+static ddi_dma_attr_t vxn_dma_attrs_tx = {
+ DMA_ATTR_V0, /* dma_attr version */
+ 0, /* dma_attr_addr_lo */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */
+ 0x7FFFFFFF, /* dma_attr_count_max */
+ 1, /* dma_attr_align */
+ 0x3F, /* dma_attr_burstsizes */
+ 1, /* dma_attr_minxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_seg */
+ 1, /* dma_attr_sgllen */
+ 1, /* dma_attr_granular */
+ 0, /* dma_attr_flags */
+};
+
+
+static struct ether_addr etherbroadcastaddr = {
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
+};
+
+static struct ddi_device_acc_attr vxn_buf_attrs = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+static struct ddi_device_acc_attr dev_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+static vxn_softc_t vxnList; /* for debugging */
+static kmutex_t vxnListLock;
+
+static void *Vxn_Memset(void *s, int c, size_t n);
+static int Vxn_Reset(gld_mac_info_t *macInfo);
+static int Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag);
+static int Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs);
+static void Vxn_ApplyAddressFilter(vxn_softc_t *dp);
+static int Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag);
+static int Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac);
+static int Vxn_Start(gld_mac_info_t *macInfo);
+static int Vxn_Stop(gld_mac_info_t *macInfo);
+static void Vxn_FreeTxBuf(vxn_softc_t *dp, int idx);
+static int Vxn_EncapTxBuf(vxn_softc_t *dp, mblk_t *mp, Vmxnet2_TxRingEntry *xre,
+ tx_ring_buf_t *txBuf);
+static int Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp);
+static boolean_t Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp);
+static boolean_t Vxn_Receive(vxn_softc_t *dp);
+static u_int Vxn_Interrupt(gld_mac_info_t *macInfo);
+static void Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc);
+static void Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc);
+static rx_dma_buf_t *Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep);
+static void Vxn_FreeInitBuffers(vxn_softc_t *dp);
+static int Vxn_AllocInitBuffers(vxn_softc_t *dp);
+static void Vxn_FreeDmaMem(dma_buf_t *dma);
+static int Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma);
+static void Vxn_FreeDriverData(vxn_softc_t *dp);
+static int Vxn_AllocDriverData(vxn_softc_t *dp);
+static int Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int Vxn_AllocRxBufPool(vxn_softc_t *dp);
+static void Vxn_FreeRxBufPool(vxn_softc_t *dp);
+static rx_dma_buf_t * Vxn_AllocRxBufFromPool(vxn_softc_t *dp);
+static void Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc);
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Memset --
+ * memset() (Because bzero does not get resolved by module loader)
+ *
+ * Results:
+ * pointer to the memory area s
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void *
+Vxn_Memset(void *s, int c, size_t n)
+{
+ while (n--) {
+ ((uint8_t *)s)[n] = c;
+ }
+
+ return s;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Reset --
+ * Stub routine to reset hardware. Presently does nothing. Start/Stop should
+ * take care of resets.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Reset(gld_mac_info_t *macInfo)
+{
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetPromiscuous --
+ * Set/Reset NIC to/from promiscuous mode
+ *
+ * Results:
+ * GLD_SUCCESS
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag)
+{
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+
+ mutex_enter(&dp->intrlock);
+ if (flag == GLD_MAC_PROMISC_PHYS) {
+ dd->ifflags |= VMXNET_IFF_PROMISC;
+ } else if (flag == GLD_MAC_PROMISC_MULTI) {
+ /*
+ * This should really set VMXNET_IFF_ALLMULTI,
+ * but unfortunately it doesn't exist. The next
+ * best thing would be to set the LADRFs to all
+ * 0xFFs and set VMXNET_IFF_MULTICAST, but that
+ * opens up a whole new set of potential pitfalls,
+ * so this is a reasonable temporary solution.
+ */
+ dd->ifflags |= VMXNET_IFF_PROMISC;
+ } else if (flag == GLD_MAC_PROMISC_NONE) {
+ dd->ifflags &= ~VMXNET_IFF_PROMISC;
+ } else {
+ /* This could be GLD_MAC_PROMISC_NOOP? */
+ mutex_exit(&dp->intrlock);
+ cmn_err(CE_WARN, "%s%d: Vxn_SetPromiscuous: Unexpected mode flag: 0x%x",
+ dp->drvName, dp->unit, flag);
+
+ return GLD_FAILURE;
+ }
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ mutex_exit(&dp->intrlock);
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_GetStats --
+ * Get driver specific stats
+ *
+ * Results:
+ * GLD_SUCCESS
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs)
+{
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ gs->glds_errxmt = dp->stats.errxmt;
+ gs->glds_errrcv = dp->stats.errrcv;
+ gs->glds_short = dp->stats.runt;
+ gs->glds_norcvbuf = dp->stats.norcvbuf;
+ gs->glds_intr = dp->stats.interrupts;
+ gs->glds_defer = dp->stats.defer;
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_ApplyAddressFilter --
+ * Go over multicast list and compute/apply address filter
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_ApplyAddressFilter(vxn_softc_t *dp)
+{
+ uint8_t *ep;
+ int i, j, bit, byte;
+ uint32_t crc, poly = CRC_POLYNOMIAL_LE;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ volatile uint16_t *mcastTable = (uint16_t *)dd->LADRF;
+
+ ASSERT(MUTEX_HELD(&dp->intrlock));
+
+ /* clear the multicast filter */
+ dd->LADRF[0] = 0;
+ dd->LADRF[1] = 0;
+
+ for (i = 0; i < dp->multiCount; i++) {
+ crc = 0xffffffff;
+ ep = (uint8_t *)&dp->multicastList[i].ether_addr_octet;
+
+ for (byte = 0; byte < 6; byte++) {
+ for (bit = *ep++, j = 0; j < 8; j++, bit >>= 1) {
+ int test;
+
+ test = ((bit ^ crc) & 0x01);
+ crc >>= 1;
+
+ if (test) {
+ crc = crc ^ poly;
+ }
+ }
+ }
+
+ crc = crc >> 26;
+ mcastTable[crc >> 4] |= 1 << (crc & 0xf);
+ }
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetMulticast --
+ * Add delete entry from multicast list
+ *
+ * Results:
+ * GLD_FAILURE on failure
+ * GLD_SUCCESS on success
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag)
+{
+ int i;
+ int copyLen;
+ vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+
+ if (flag == GLD_MULTI_ENABLE) {
+ /*
+ * Exceeded multicast address limit
+ */
+ if (dp->multiCount >= GLD_MAX_MULTICAST) {
+ return GLD_FAILURE;
+ }
+
+ /*
+ * Add mac address to multicast list
+ */
+ bcopy(ep, dp->multicastList[dp->multiCount].ether_addr_octet,
+ ETHERADDRL);
+ dp->multiCount++;
+ }
+ else {
+ for (i=0; i<dp->multiCount; i++) {
+ if (bcmp(ep, dp->multicastList[i].ether_addr_octet, ETHERADDRL) == 0) {
+ goto found;
+ }
+ }
+ return GLD_FAILURE;
+
+ found:
+ /*
+ * Delete mac address from multicast list
+ */
+ copyLen = (dp->multiCount - (i+1)) * sizeof(struct ether_addr);
+ if (copyLen > 0) {
+ bcopy(&dp->multicastList[i+1], &dp->multicastList[i], copyLen);
+ }
+ dp->multiCount--;
+ }
+
+ /*
+ * Compute address filter from list of addressed and apply it
+ */
+ mutex_enter(&dp->intrlock);
+ Vxn_ApplyAddressFilter(dp);
+
+ if (dp->multiCount) {
+ ASSERT(dd->LADRF[0] || dd->LADRF[1]);
+ dd->ifflags |= VMXNET_IFF_MULTICAST;
+ } else {
+ ASSERT(!(dd->LADRF[0] || dd->LADRF[1]));
+ dd->ifflags &= ~VMXNET_IFF_MULTICAST;
+ }
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF);
+ mutex_exit(&dp->intrlock);
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetMacAddress --
+ * Change device MAC address
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac)
+{
+ int i;
+ int err = GLD_SUCCESS;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ /*
+ * Don't change MAC address on a running NIC
+ */
+ if (dp->nicActive) {
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Save new MAC address
+ */
+ for (i = 0; i < 6; i++) {
+ dp->devAddr.ether_addr_octet[i] = mac[i];
+ }
+
+ /*
+ * Push new MAC address down into hardware
+ */
+ for (i = 0; i < 6; i++) {
+ OUTB(dp, VMXNET_MAC_ADDR + i, mac[i]);
+ }
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Start --
+ * Device start routine. Called on "ifconfig plumb"
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Start(gld_mac_info_t *macInfo)
+{
+ int err = GLD_SUCCESS;
+ uint32_t r, capabilities, features;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ if (!dp->nicActive) {
+ /*
+ * Register ring structure with hardware
+ *
+ * This downcast is OK because we requested a 32-bit physical address
+ */
+ OUTL(dp, VMXNET_INIT_ADDR, (uint32_t)(uintptr_t)dp->driverDataPhy);
+ OUTL(dp, VMXNET_INIT_LENGTH, dp->driverData->length);
+
+ /*
+ * Make sure registeration succeded
+ */
+ r = INL(dp, VMXNET_INIT_LENGTH);
+ if (!r) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to register ring",
+ dp->drvName, dp->unit);
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Get maximum tx fragments supported
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_CAPABILITIES);
+ capabilities = INL(dp, VMXNET_COMMAND_ADDR);
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_FEATURES);
+ features = INL(dp, VMXNET_COMMAND_ADDR);
+
+ DPRINTF(3, (CE_CONT, "%s%d: chip capabilities=0x%x features=0x%x\n",
+ dp->drvName, dp->unit, capabilities, features));
+
+ if ((capabilities & VMNET_CAP_SG) &&
+ (features & VMXNET_FEATURE_ZERO_COPY_TX)) {
+ dp->maxTxFrags = VMXNET2_SG_DEFAULT_LENGTH;
+ } else {
+ dp->maxTxFrags = 1;
+ }
+ ASSERT(dp->maxTxFrags >= 1);
+
+ /*
+ * Alloc Tx DMA handle
+ */
+ vxn_dma_attrs_tx.dma_attr_sgllen = dp->maxTxFrags;
+ if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs_tx, DDI_DMA_SLEEP,
+ NULL, &dp->txDmaHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to alloc tx dma handle",
+ dp->drvName, dp->unit);
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Enable interrupts on the card
+ */
+ dp->driverData->ifflags |= VMXNET_IFF_BROADCAST | VMXNET_IFF_DIRECTED;
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ENABLE);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF);
+
+ dp->nicActive = TRUE;
+ }
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Stop --
+ * Device stop routine. Called on "ifconfig unplumb"
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Stop(gld_mac_info_t *macInfo)
+{
+ int i;
+ int err = GLD_SUCCESS;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+ boolean_t resched;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ if (!dp->nicActive) {
+ goto out;
+ }
+
+ /*
+ * Disable interrupts
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_DISABLE);
+
+ /*
+ * Wait for pending transmits
+ */
+ if (dp->txPending) {
+ for (i=0; i < MAX_TX_WAIT_ON_STOP && dp->txPending; i++) {
+ delay(drv_usectohz(1000));
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_CHECK_TX_DONE);
+ (void) Vxn_TxComplete(dp, &resched);
+ /*
+ * Don't worry about rescheduling transmits - GLD handles
+ * this automatically.
+ */
+ }
+ }
+ if (dp->txPending) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Stop: giving up on %d pending transmits",
+ dp->drvName, dp->unit, dp->txPending);
+ }
+
+ OUTL(dp, VMXNET_INIT_ADDR, 0);
+ dp->nicActive = FALSE;
+
+ /*
+ * Free Tx DMA handle
+ *
+ * The ddi_dma_free_handle() man page says that ddi_dma_unbind_handle() must be called
+ * prior to calling ddi_dma_free_handle().
+ * However, call to ddi_dma_unbind_handle() is not required here, because
+ * ddi_dma_addr_bind_handle() and matching ddi_dma_unbind_handle() are called from
+ * Vxn_EncapTxBuf().
+ * xmitlock is held in Vxn_EncapTxBuf() as well as acquired above in Vxn_Stop().
+ */
+ ddi_dma_free_handle(&dp->txDmaHdl);
+ dp->txDmaHdl = NULL;
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeTxBuf --
+ * Free transmit buffer
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeTxBuf(vxn_softc_t *dp, int idx)
+{
+ mblk_t **txMblkp = &TX_RINGBUF_MBLK(dp, idx);
+ dma_buf_t *dmaMem = &TX_RINGBUF_DMAMEM(dp, idx);
+
+ if (*txMblkp) {
+ freemsg(*txMblkp);
+ *txMblkp = NULL;
+ }
+
+ if (dmaMem->buf) {
+ Vxn_FreeDmaMem(dmaMem);
+ ASSERT(dmaMem->buf == NULL);
+ }
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_EncapTxBuf --
+ * Go over dma mappings of Tx buffers and drop buffer physical address
+ * into ring entry
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *---------------- -------------------------------------------------------------
+ */
+static int
+Vxn_EncapTxBuf(vxn_softc_t *dp,
+ mblk_t *mp,
+ Vmxnet2_TxRingEntry *xre,
+ tx_ring_buf_t *txBuf)
+{
+ int frag;
+ int fragcount;
+ int rval;
+ mblk_t *tp;
+ mblk_t *mblk;
+ boolean_t needPullup = FALSE;
+ boolean_t dmaMemAlloced = FALSE;
+
+ ASSERT(txBuf);
+ ASSERT(txBuf->mblk == NULL);
+ ASSERT(MUTEX_HELD(&dp->xmitlock));
+
+ xre->sg.length = 0;
+ xre->flags = 0;
+
+ fragcount = 0;
+ for (tp = mp; tp != NULL; tp = tp->b_cont) {
+ fragcount++;
+ }
+ if (fragcount > dp->maxTxFrags) {
+ needPullup = TRUE;
+ }
+
+pullup:
+ frag = 0;
+ if (needPullup) {
+ if (!(mblk = msgpullup(mp, -1))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: msgpullup failed",
+ dp->drvName, dp->unit);
+ goto err;
+ }
+ } else {
+ mblk = mp;
+ }
+
+ /*
+ * Go through message chain and drop packet pointers into ring
+ * scatter/gather array
+ */
+ for (tp = mblk; tp != NULL; tp = tp->b_cont) {
+
+ uint_t nCookies;
+ ddi_dma_cookie_t dmaCookie;
+ int len = tp->b_wptr - tp->b_rptr;
+
+ if (len) {
+ /*
+ * Associate tx buffer with dma handle
+ */
+ ASSERT(dp->txDmaHdl);
+ if ((rval = ddi_dma_addr_bind_handle(dp->txDmaHdl, NULL, (caddr_t)tp->b_rptr,
+ len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
+ DDI_DMA_DONTWAIT, NULL,
+ &dmaCookie, &nCookies))
+ != DDI_DMA_MAPPED) {
+
+ /*
+ * Try to handle bind failure caused by a page boundary spill
+ * by allocating a private dma buffer and copying data into it
+ */
+ if ((rval == DDI_DMA_TOOBIG) && !dmaMemAlloced ) {
+ /*
+ * Force pullup
+ */
+ if (!needPullup && (dp->maxTxFrags > 1)) {
+ needPullup = TRUE;
+ goto pullup;
+ }
+
+ if (Vxn_AllocDmaMem(dp, len, FALSE, &txBuf->dmaMem)
+ != SOLVMXNET_SUCCESS) {
+ goto err;
+ }
+
+ dmaMemAlloced = TRUE;
+
+ /*
+ * Copy data into DMA capable buffer
+ */
+ bcopy(tp->b_rptr, txBuf->dmaMem.buf, len);
+
+ /*
+ * Stick buffer physical addr in the ring
+ */
+ xre->sg.sg[frag].addrLow = txBuf->dmaMem.phyBuf;
+ xre->sg.sg[frag].length = len;
+ frag++;
+
+ continue;
+
+ } else {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: failed (%d) to bind dma "
+ "handle for len %d. [dmaMemAlloced=%d]",
+ dp->drvName, dp->unit, rval, len, dmaMemAlloced);
+ goto err;
+ }
+ }
+
+ /*
+ * Extract tx buffer physical addresses from cookie
+ */
+ while (nCookies) {
+ if (UNLIKELY(frag == dp->maxTxFrags)) {
+ (void)ddi_dma_unbind_handle(dp->txDmaHdl);
+
+ if (!needPullup) {
+ ASSERT(!dmaMemAlloced);
+ needPullup = TRUE;
+ goto pullup;
+ } else {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: "
+ "exceeded max (%d) fragments in message",
+ dp->drvName, dp->unit, dp->maxTxFrags);
+ goto err;
+ }
+ }
+
+ /*
+ * Stick it in the ring
+ */
+ xre->sg.sg[frag].addrLow = dmaCookie.dmac_address;
+ xre->sg.sg[frag].length = dmaCookie.dmac_size;
+ frag++;
+
+ if (--nCookies) {
+ ddi_dma_nextcookie(dp->txDmaHdl, &dmaCookie);
+ }
+ }
+
+ (void)ddi_dma_unbind_handle(dp->txDmaHdl);
+ }
+ }
+
+ if (frag > 0) {
+ xre->sg.length = frag;
+
+ /* Give ownership to NIC */
+ xre->sg.addrType = NET_SG_PHYS_ADDR;
+ xre->ownership = VMXNET2_OWNERSHIP_NIC;
+ xre->flags |= VMXNET2_TX_CAN_KEEP;
+ txBuf->mblk = mblk;
+
+ /*
+ * If we called msgpullup to concatenate fragments, free
+ * original mblk now since we're going to return success.
+ */
+ if (mblk != mp) {
+ freemsg(mp);
+ }
+
+ return SOLVMXNET_SUCCESS;
+ }
+
+err:
+ if (mblk != NULL && mblk != mp) {
+ /*
+ * Free mblk allocated by msgpullup.
+ */
+ freemsg(mblk);
+ }
+
+ if (dmaMemAlloced) {
+ ASSERT(txBuf->dmaMem.buf);
+ Vxn_FreeDmaMem(&txBuf->dmaMem);
+ }
+
+ return SOLVMXNET_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Send --
+ * GLD Transmit routine. Starts packet hard tx.
+ *
+ * Results:
+ * GLD_SUCCESS on success
+ * GLD_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp)
+{
+ Vmxnet2_TxRingEntry *xre;
+ int err = GLD_SUCCESS;
+ vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ boolean_t resched = FALSE;
+
+ mutex_enter(&dp->xmitlock);
+
+ /*
+ * Check if ring entry at drop pointer is available
+ */
+ if (TX_RINGBUF_MBLK(dp, dd->txDriverNext) != NULL) {
+ DPRINTF(3, (CE_NOTE, "%s%d: Vxn_Send: tx ring full",
+ dp->drvName, dp->unit));
+ err = GLD_NORESOURCES;
+ dd->txStopped = TRUE;
+ dp->stats.defer++;
+ goto out;
+ }
+
+ xre = &dp->txRing[dd->txDriverNext];
+
+ /*
+ * Drop packet into ring entry
+ */
+ if (Vxn_EncapTxBuf(dp, mp, xre, &dp->txRingBuf[dd->txDriverNext])
+ != SOLVMXNET_SUCCESS) {
+ err = GLD_FAILURE;
+ dp->stats.errxmt++;
+ goto out;
+ }
+
+ /*
+ * Increment drop pointer
+ */
+ VMXNET_INC(dd->txDriverNext, dd->txRingLength);
+ dd->txNumDeferred++;
+ dp->txPending++;
+
+ /*
+ * Transmit, if number of pending packets > tx cluster length
+ */
+ if (dd->txNumDeferred >= dd->txClusterLength) {
+ dd->txNumDeferred = 0;
+
+ /*
+ * Call hardware transmit
+ */
+ INL(dp, VMXNET_TX_ADDR);
+ }
+
+ /*
+ * Clean up transmit ring. TX completion interrupts are not guaranteed
+ */
+ (void) Vxn_TxComplete(dp, &resched);
+
+out:
+ mutex_exit(&dp->xmitlock);
+ if (resched) {
+ /* Tell GLD to retry any deferred packets */
+ gld_sched(dp->macInfo);
+ }
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_TxComplete --
+ * Scan Tx ring for completed transmits. Reclaim Tx buffers.
+ *
+ * Results:
+ * Returns TRUE if it found a completed transmit, FALSE otherwise.
+ * Also sets *reschedp to TRUE if the caller should call gld_sched
+ * to reschedule transmits (once all locks are dropped).
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static boolean_t
+Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp)
+{
+ Vmxnet2_DriverData *dd = dp->driverData;
+ boolean_t found = FALSE;
+ boolean_t needresched = FALSE;
+
+ ASSERT(MUTEX_HELD(&dp->xmitlock));
+
+ while (1) {
+ Vmxnet2_TxRingEntry *xre = &dp->txRing[dd->txDriverCur];
+
+ if (xre->ownership != VMXNET2_OWNERSHIP_DRIVER ||
+ (TX_RINGBUF_MBLK(dp, dd->txDriverCur) == NULL)) {
+ break;
+ }
+
+ found = TRUE;
+ Vxn_FreeTxBuf(dp, dd->txDriverCur);
+
+ dp->txPending--;
+ VMXNET_INC(dd->txDriverCur, dd->txRingLength);
+ if (dd->txStopped) {
+ needresched = TRUE;
+ dd->txStopped = FALSE;
+ }
+ }
+
+ *reschedp = needresched;
+ return found;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Receive --
+ * Rx handler. First assembles the packets into a chain of mblks,
+ * then drops locks and passes them up the stack to GLD.
+ *
+ * Results:
+ * Returns TRUE if it find a packet ready for processing, FALSE
+ * otherwise.
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static boolean_t
+Vxn_Receive(vxn_softc_t *dp)
+{
+ int ringnext;
+ short pktlen;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ rx_dma_buf_t *rxDesc;
+ rx_dma_buf_t *newRxDesc;
+ mblk_t *mblk;
+ mblk_t *head = NULL;
+ mblk_t **tail = &head;
+ mblk_t *next;
+ boolean_t found = FALSE; /* Did we find at least one packet? */
+
+ ASSERT(MUTEX_HELD(&dp->intrlock));
+
+ /*
+ * Walk receive ring looking for entries with ownership
+ * reverted back to driver
+ */
+ while (1) {
+ Vmxnet2_RxRingEntry *rre;
+ rx_dma_buf_t **rbuf;
+
+ ringnext = dd->rxDriverNext;
+ rre = &dp->rxRing[ringnext];
+ rbuf = &dp->rxRingBuffPtr[ringnext];
+
+ if (rre->ownership != VMXNET2_OWNERSHIP_DRIVER) {
+ break;
+ }
+
+ found = TRUE;
+
+ pktlen = rre->actualLength;
+
+ if (pktlen < (60 - 4)) {
+ /*
+ * Ethernet header vlan tags are 4 bytes. Some vendors generate
+ * 60byte frames including vlan tags. When vlan tag
+ * is stripped, such frames become 60 - 4. (PR106153)
+ */
+ dp->stats.errrcv++;
+ if (pktlen != 0) {
+ DPRINTF(3, (CE_CONT, "%s%d: runt packet\n", dp->drvName, dp->unit));
+ dp->stats.runt++;
+ }
+ } else {
+ /*
+ * Alloc new Rx buffer to replace current one
+ */
+ newRxDesc = Vxn_AllocRxBufFromPool(dp);
+
+ if (newRxDesc) {
+ rxDesc = *rbuf;
+ mblk = rxDesc->mblk;
+
+ *rbuf = newRxDesc;
+ rre->paddr = newRxDesc->dmaDesc.phyBuf + ETHERALIGN;
+ rre->bufferLength = MAXPKTBUF - ETHERALIGN;
+ rre->actualLength = 0;
+
+ /*
+ * Advance write pointer past packet length
+ */
+ mblk->b_wptr = mblk->b_rptr + pktlen;
+
+ /*
+ * Add to end of chain.
+ */
+ mblk->b_next = NULL;
+ *tail = mblk;
+ tail = &mblk->b_next;
+ } else {
+ dp->stats.errrcv++;
+ dp->stats.norcvbuf++;
+ }
+ }
+
+ /* Give the descriptor back to NIC */
+ rre->ownership = VMXNET2_OWNERSHIP_NIC;
+ VMXNET_INC(dd->rxDriverNext, dd->rxRingLength);
+ }
+
+ /*
+ * Walk chain and pass mblks up to gld_recv one by one.
+ */
+ mutex_exit(&dp->intrlock);
+ for (mblk = head; mblk != NULL; mblk = next) {
+ next = mblk->b_next;
+ mblk->b_next = NULL;
+ gld_recv(dp->macInfo, mblk);
+ }
+ mutex_enter(&dp->intrlock);
+
+ return (found);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Interrupt --
+ * GLD interrupt handler. Scan: Rx ring for received packets, Tx ring for
+ * completed transmits
+ *
+ * Results:
+ * - DDI_INTR_CLAIMED (if we found something to do)
+ * - DDI_INTR_UNCLAIMED (if not)
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static u_int
+Vxn_Interrupt(gld_mac_info_t *macInfo)
+{
+ u_int ret = DDI_INTR_UNCLAIMED;
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+ boolean_t foundRx, foundTx;
+ boolean_t resched = FALSE;
+
+ mutex_enter(&dp->intrlock);
+ dp->inIntr = TRUE;
+
+ if (!dp->nicActive) {
+ goto out;
+ }
+
+ /*
+ * Ack interrupt
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ACK);
+
+ foundRx = Vxn_Receive(dp);
+
+ mutex_enter(&dp->xmitlock);
+ foundTx = Vxn_TxComplete(dp, &resched);
+ mutex_exit(&dp->xmitlock);
+
+ if (foundRx || foundTx) {
+ ret = DDI_INTR_CLAIMED;
+ dp->stats.interrupts++;
+ }
+
+out:
+ dp->inIntr = FALSE;
+ mutex_exit(&dp->intrlock);
+
+ if (resched) {
+ gld_sched(dp->macInfo);
+ }
+
+ return ret;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_ReclaimRxBuf --
+ * Callback handler invoked by freemsg(). Frees Rx buffer memory and mappings
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc)
+{
+ Vxn_FreeRxBufToPool(rxDesc);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBuf --
+ * Free allocated Rx buffer
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc)
+{
+ ASSERT(rxDesc);
+
+ if (rxDesc->mblk) {
+ freemsg(rxDesc->mblk);
+ } else {
+ Vxn_FreeDmaMem(&rxDesc->dmaDesc);
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ }
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBuf --
+ * Allocate Rx buffer
+ *
+ * Results:
+ * Pointer to Rx buffer descriptor - on success
+ * NULL - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static rx_dma_buf_t *
+Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep)
+{
+ rx_dma_buf_t *rxDesc;
+
+ rxDesc = (rx_dma_buf_t *)kmem_zalloc(sizeof(rx_dma_buf_t),
+ cansleep ? KM_SLEEP : KM_NOSLEEP);
+ if (!rxDesc) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBuf: kmem_zalloc failed",
+ dp->drvName, dp->unit);
+ return NULL;
+ }
+
+ rxDesc->softc = dp;
+
+ /*
+ * Alloc dma-able packet memory
+ */
+ if (Vxn_AllocDmaMem(dp, MAXPKTBUF, cansleep, &rxDesc->dmaDesc)
+ != SOLVMXNET_SUCCESS) {
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ return NULL;
+ }
+
+ /*
+ * Fill in free callback; fired by freemsg()
+ */
+ rxDesc->freeCB.free_func = &Vxn_ReclaimRxBuf;
+ rxDesc->freeCB.free_arg = (caddr_t) rxDesc;
+
+ rxDesc->mblk = NULL;
+ return rxDesc;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeInitBuffers --
+ * Free allocated Tx and Rx buffers
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeInitBuffers(vxn_softc_t *dp)
+{
+ int i;
+
+ for (i=0; i<dp->vxnNumRxBufs; i++) {
+ if (dp->rxRingBuffPtr[i]) {
+ Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]);
+ dp->rxRingBuffPtr[i] = NULL;
+ }
+ }
+
+ for (i=0; i<dp->vxnNumTxBufs; i++) {
+ if (TX_RINGBUF_MBLK(dp, i)) {
+ Vxn_FreeTxBuf(dp, i);
+ }
+ }
+
+ /*
+ * Rx pool must get freed last. Rx buffers above will
+ * show up on the pool when freemsg callback fires.
+ */
+ Vxn_FreeRxBufPool(dp);
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBufPool --
+ * Allocate pool of rx buffers - 3 * configured Rx buffers
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS/SOLVMXNET_FAILURE
+ *
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocRxBufPool(vxn_softc_t *dp)
+{
+ int i;
+
+ dp->rxFreeBufList = NULL;
+
+ // Allow list to double in size if needed. Any additional buffers
+ // that are allocated on the fly will be freed back to main memory.
+ dp->rxMaxFreeBufs = dp->vxnNumRxBufs * 6;
+
+ for (i = 0; i < dp->vxnNumRxBufs * 3; i++) {
+ rx_dma_buf_t *rxDesc;
+
+ /*
+ * Alloc rx buffer
+ */
+ if (!(rxDesc = Vxn_AllocRxBuf(dp, TRUE))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufPool: failed to allocate memory",
+ dp->drvName, dp->unit);
+ dp->rxNumFreeBufs = i;
+ return SOLVMXNET_FAILURE;
+ }
+ /*
+ * Add to free list
+ */
+ rxDesc->next = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc;
+ }
+
+ dp->rxNumFreeBufs = i;
+ return SOLVMXNET_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBufPool --
+ * Free rx buffers pool
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBufPool(vxn_softc_t *dp)
+{
+ while (dp->rxFreeBufList) {
+ rx_dma_buf_t *rxDesc = dp->rxFreeBufList;
+
+ /* unlink */
+ dp->rxFreeBufList = rxDesc->next;
+
+ ASSERT(rxDesc->mblk == NULL);
+ Vxn_FreeDmaMem(&rxDesc->dmaDesc);
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ }
+ dp->rxNumFreeBufs = 0;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBufFromPool --
+ * Allocate Rx buffer from free pool
+ *
+ * Results:
+ * Pointer to Rx buffer descriptor - on success
+ * NULL - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static rx_dma_buf_t *
+Vxn_AllocRxBufFromPool(vxn_softc_t *dp)
+{
+ rx_dma_buf_t *rxDesc = NULL;
+
+ mutex_enter(&dp->rxlistlock);
+ if (dp->rxFreeBufList) {
+ rxDesc = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc->next;
+ ASSERT(dp->rxNumFreeBufs >= 1);
+ dp->rxNumFreeBufs--;
+ }
+ mutex_exit(&dp->rxlistlock);
+
+ if (!rxDesc) {
+ /*
+ * Try to allocate new descriptor from memory. Can't block here
+ * since we could be being called from interrupt context.
+ */
+ DPRINTF(5, (CE_NOTE, "%s%d: allocating rx buf from memory",
+ dp->drvName, dp->unit));
+ if (!(rxDesc = Vxn_AllocRxBuf(dp, FALSE))) {
+ cmn_err(CE_WARN,
+ "%s%d: Vxn_AllocRxBufFromPool : pool rx alloc failed",
+ dp->drvName, dp->unit);
+ return NULL;
+ }
+ }
+
+ /*
+ * Allocate new message block for this buffer
+ */
+ rxDesc->mblk = desballoc((uchar_t *)rxDesc->dmaDesc.buf + ETHERALIGN,
+ rxDesc->dmaDesc.bufLen - ETHERALIGN,
+ BPRI_MED, &rxDesc->freeCB);
+ if (!rxDesc->mblk) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufFromPool : desballoc failed",
+ dp->drvName, dp->unit);
+
+ /* put back on free list */
+ Vxn_FreeRxBufToPool(rxDesc);
+ return NULL;
+ }
+
+ return rxDesc;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBufToPool --
+ * Return rx buffer to free pool
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc)
+{
+ vxn_softc_t *dp = rxDesc->softc;
+
+ rxDesc->mblk = NULL;
+
+ /*
+ * Insert on free list, or free if the list is full
+ */
+ mutex_enter(&dp->rxlistlock);
+ if (dp->rxNumFreeBufs >= dp->rxMaxFreeBufs) {
+ DPRINTF(5, (CE_NOTE, "%s%d: freeing rx buf to memory",
+ dp->drvName, dp->unit));
+ Vxn_FreeRxBuf(rxDesc);
+ } else {
+ rxDesc->next = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc;
+ dp->rxNumFreeBufs++;
+ }
+ mutex_exit(&dp->rxlistlock);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocInitBuffers --
+ * Allocated Rx buffers and init ring entries
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS - on success
+ * SOLVMXNET_FAILURE - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocInitBuffers(vxn_softc_t *dp)
+{
+ Vmxnet2_DriverData *dd;
+ uint32_t i, offset;
+
+ dd = dp->driverData;
+ offset = sizeof(*dd);
+
+ /*
+ * Init shared structures
+ */
+ dd->rxRingLength = dp->vxnNumRxBufs;
+ dd->rxRingOffset = offset;
+ dp->rxRing = (Vmxnet2_RxRingEntry *)((uintptr_t)dd + offset);
+ offset += dp->vxnNumRxBufs * sizeof(Vmxnet2_RxRingEntry);
+
+ dd->rxRingLength2 = 1;
+ dd->rxRingOffset2 = offset;
+ offset += sizeof(Vmxnet2_RxRingEntry);
+
+ dd->txRingLength = dp->vxnNumTxBufs;
+ dd->txRingOffset = offset;
+ dp->txRing = (Vmxnet2_TxRingEntry *)((uintptr_t)dd + offset);
+ offset += dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry);
+
+ /*
+ * Alloc Rx buffers pool
+ */
+ if ( Vxn_AllocRxBufPool(dp) != SOLVMXNET_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: failed to alloc buf pool",
+ dp->drvName, dp->unit);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Allocate receive buffers
+ */
+ for (i = 0; i < dp->vxnNumRxBufs; i++) {
+ rx_dma_buf_t *rxDesc;
+ Vmxnet2_RxRingEntry *rre = &dp->rxRing[i];
+
+ if (!(rxDesc = Vxn_AllocRxBufFromPool(dp))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: "
+ "failed to alloc buf from pool", dp->drvName, dp->unit);
+ goto err;
+ }
+
+ /*
+ * Init ring entries
+ */
+ rre->paddr = rxDesc->dmaDesc.phyBuf + ETHERALIGN;
+ rre->bufferLength = MAXPKTBUF - ETHERALIGN;
+ rre->actualLength = 0;
+ dp->rxRingBuffPtr[i] = rxDesc;
+ rre->ownership = VMXNET2_OWNERSHIP_NIC;
+ }
+
+ dp->txDmaHdl = NULL;
+
+ /*
+ * Dummy recvRing2 tacked on to the end, with a single unusable entry
+ */
+ dp->rxRing[i].paddr = 0;
+ dp->rxRing[i].bufferLength = 0;
+ dp->rxRing[i].actualLength = 0;
+ dp->rxRingBuffPtr[i] = NULL;
+ dp->rxRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;
+
+ dd->rxDriverNext = 0;
+
+ /*
+ * Give xmit ring ownership to DRIVER
+ */
+ for (i = 0; i < dp->vxnNumTxBufs; i++) {
+ dp->txRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;
+ dp->txRingBuf[i].mblk = NULL;
+ dp->txRingBuf[i].dmaMem.buf = NULL;
+ dp->txRing[i].sg.sg[0].addrHi = 0;
+ }
+
+ dd->txDriverCur = dd->txDriverNext = 0;
+ dd->txStopped = FALSE;
+
+ return SOLVMXNET_SUCCESS;
+
+err:
+ for (i=0; i<dp->vxnNumRxBufs; i++) {
+ if (dp->rxRingBuffPtr[i]) {
+ Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]);
+ dp->rxRingBuffPtr[i] = NULL;
+ }
+ }
+
+ Vxn_FreeRxBufPool(dp);
+ return SOLVMXNET_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeDmaMem --
+ * Free allocated dma memory
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeDmaMem(dma_buf_t *dma)
+{
+ ddi_dma_unbind_handle(dma->dmaHdl);
+ ddi_dma_mem_free(&dma->dataAccHdl);
+ ddi_dma_free_handle(&dma->dmaHdl);
+
+ dma->buf = NULL;
+ dma->phyBuf = NULL;
+ dma->bufLen = 0;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocDmaMem --
+ * Allocate dma-able memory and fill passed in dma descriptor pointer
+ * if successful
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma)
+{
+ /*
+ * Allocate handle
+ */
+ if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
+ NULL, &dma->dmaHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to allocate handle",
+ dp->drvName, dp->unit);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Allocate memory
+ */
+ if (ddi_dma_mem_alloc(dma->dmaHdl, size, &vxn_buf_attrs, DDI_DMA_CONSISTENT,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, NULL,
+ &dma->buf, &dma->bufLen, &dma->dataAccHdl)
+ != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: "
+ "ddi_dma_mem_alloc %d bytes failed",
+ dp->drvName, dp->unit, size);
+ ddi_dma_free_handle(&dma->dmaHdl);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Mapin memory
+ */
+ if (ddi_dma_addr_bind_handle(dma->dmaHdl, NULL, dma->buf, dma->bufLen,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
+ NULL, &dma->cookie, &dma->cookieCount)
+ != DDI_DMA_MAPPED) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to bind handle",
+ dp->drvName, dp->unit);
+ ddi_dma_mem_free(&dma->dataAccHdl);
+ ddi_dma_free_handle(&dma->dmaHdl);
+ return SOLVMXNET_FAILURE;
+ }
+
+ if (dma->cookieCount != 1) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: too many DMA cookies",
+ dp->drvName, dp->unit);
+ Vxn_FreeDmaMem(dma);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Save physical address (for easy use)
+ */
+ dma->phyBuf = dma->cookie.dmac_address;
+
+ return SOLVMXNET_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeDriverData --
+ * Free driver data structures and Tx Rx buffers
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeDriverData(vxn_softc_t *dp)
+{
+ Vxn_FreeInitBuffers(dp);
+ Vxn_FreeDmaMem(&dp->driverDataDmaMem);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocDriverData --
+ * Allocate driver data structures and Tx Rx buffers on init
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocDriverData(vxn_softc_t *dp)
+{
+ uint32_t r, driverDataSize;
+
+ /*
+ * Get configured receive buffers
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_RX_BUFFERS);
+ r = INL(dp, VMXNET_COMMAND_ADDR);
+ if (r == 0 || r > MAX_NUM_RECV_BUFFERS) {
+ r = DEFAULT_NUM_RECV_BUFFERS;
+ }
+ dp->vxnNumRxBufs = r;
+
+ /*
+ * Get configured transmit buffers
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_TX_BUFFERS);
+ r = INL(dp, VMXNET_COMMAND_ADDR);
+ if (r == 0 || r > MAX_NUM_XMIT_BUFFERS) {
+ r = DEFAULT_NUM_XMIT_BUFFERS;
+ }
+ dp->vxnNumTxBufs = r;
+
+ /*
+ * Calculate shared data size and allocate memory for it
+ */
+ driverDataSize =
+ sizeof(Vmxnet2_DriverData) +
+ /* numRecvBuffers + 1 for the dummy recvRing2 (used only by Windows) */
+ (dp->vxnNumRxBufs + 1) * sizeof(Vmxnet2_RxRingEntry) +
+ dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry);
+
+ if (Vxn_AllocDmaMem(dp, driverDataSize, TRUE, &dp->driverDataDmaMem)
+ != SOLVMXNET_SUCCESS) {
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Clear memory (bzero isn't resolved by module loader for some reason)
+ */
+ ASSERT(dp->driverDataDmaMem.buf && dp->driverDataDmaMem.bufLen);
+ Vxn_Memset(dp->driverDataDmaMem.buf, 0, dp->driverDataDmaMem.bufLen);
+
+ dp->driverData = (Vmxnet2_DriverData *)dp->driverDataDmaMem.buf;
+ dp->driverDataPhy = (void *)(uintptr_t)dp->driverDataDmaMem.phyBuf;
+
+ /* So that the vmkernel can check it is compatible */
+ dp->driverData->magic = VMXNET2_MAGIC;
+ dp->driverData->length = driverDataSize;
+
+ /*
+ * Alloc rx/tx buffers, init ring, register with hardware etc.
+ */
+ if (Vxn_AllocInitBuffers(dp) != SOLVMXNET_SUCCESS) {
+ Vxn_FreeDmaMem(&dp->driverDataDmaMem);
+ return SOLVMXNET_FAILURE;
+ }
+
+ DPRINTF(3, (CE_CONT, "%s%d: numRxBufs=(%d*%"FMT64"d) numTxBufs=(%d*%"FMT64"d)"
+ " driverDataSize=%d driverDataPhy=0x%p\n",
+ dp->drvName, dp->unit,
+ dp->vxnNumRxBufs, (uint64_t)sizeof(Vmxnet2_RxRingEntry),
+ dp->vxnNumTxBufs, (uint64_t)sizeof(Vmxnet2_TxRingEntry),
+ driverDataSize, dp->driverDataPhy));
+
+ return SOLVMXNET_SUCCESS;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Attach --
+ * Probe and attach driver to stack
+ *
+ * Results:
+ * DDI_SUCCESS
+ * DDI_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i, ret, len, unit;
+ const char *drvName;
+ ddi_acc_handle_t confHdl;
+ uint16_t vid, did;
+ uint8_t revid;
+ struct pci_phys_spec *regs;
+ caddr_t vxnIOp;
+ ddi_acc_handle_t vxnIOHdl;
+ uint32_t vLow, vHigh;
+ gld_mac_info_t *macInfo;
+ vxn_softc_t *dp;
+ boolean_t morphed = FALSE;
+ uint_t regSpaceSize;
+ uint_t chip;
+ uint_t vxnIOSize;
+
+ if (cmd != DDI_ATTACH) {
+ return DDI_FAILURE;
+ }
+
+ unit = ddi_get_instance(dip);
+ drvName = ddi_driver_name(dip);
+
+ /*
+ * Check if chip is supported.
+ */
+ if (pci_config_setup(dip, &confHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: pci_config_setup() failed", drvName, unit);
+ return DDI_FAILURE;
+ }
+
+ vid = pci_config_get16(confHdl, PCI_CONF_VENID);
+ did = pci_config_get16(confHdl, PCI_CONF_DEVID);
+ revid = pci_config_get8(confHdl, PCI_CONF_REVID);
+
+ if (vid == PCI_VENDOR_ID_VMWARE && did == PCI_DEVICE_ID_VMWARE_NET) {
+ /* Found vmxnet */
+ chip = VMXNET_CHIP;
+ }
+ else if (vid == PCI_VENDOR_ID_AMD && did == PCI_DEVICE_ID_AMD_VLANCE) {
+ /* Found vlance (maybe a vmxnet disguise) */
+ chip = LANCE_CHIP;
+ }
+ else {
+ /* Not Found */
+ DPRINTF(3, (CE_WARN, "%s: Vxn_Attach: wrong PCI venid/devid (0x%x, 0x%x)",
+ drvName, vid, did));
+ goto err;
+ }
+
+ DPRINTF(3, (CE_CONT, "%s%d: (vid: 0x%04x, did: 0x%04x, revid: 0x%02x)\n",
+ drvName, unit, vid, did, revid));
+
+ /*
+ * Get device properties
+ */
+ regs = NULL;
+ len = 0;
+ if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "reg", (caddr_t)&regs, &len) != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to get reg property",
+ drvName, unit);
+ goto err;
+ }
+
+ ASSERT(regs != NULL && len > 0);
+
+ /*
+ * Search device properties for IO-space
+ */
+ for (i = 0; i <len / sizeof(struct pci_phys_spec); i++) {
+ if ((regs[i].pci_phys_hi & PCI_REG_ADDR_M) == PCI_ADDR_IO) {
+ regSpaceSize = regs[i].pci_size_low;
+ DPRINTF(5, (CE_CONT, "%s%d: Vxn_Attach: regSpaceSize=%d\n",
+ drvName, unit, regSpaceSize));
+ kmem_free(regs, len);
+ goto map_space_found;
+ }
+ }
+
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to find IO space", drvName, unit);
+ kmem_free(regs, len);
+ goto err;
+
+map_space_found:
+
+ /*
+ * Ensure we can access registers through IO space.
+ */
+ ret = pci_config_get16(confHdl, PCI_CONF_COMM);
+ ret |= PCI_COMM_IO | PCI_COMM_ME;
+ pci_config_put16(confHdl, PCI_CONF_COMM, ret);
+
+ if (ddi_regs_map_setup(dip, i, (caddr_t *)&vxnIOp, 0, 0, &dev_attr,
+ &vxnIOHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_regs_map_setup failed",
+ drvName, unit);
+ goto err;
+ }
+
+ if (chip == VMXNET_CHIP) {
+ vxnIOSize = VMXNET_CHIP_IO_RESV_SIZE;
+ }
+ else {
+ /*
+ * Since this is a vlance adapter we can only use it if
+ * its I/0 space is big enough for the adapter to be
+ * capable of morphing. This is the first requirement
+ * for this adapter to potentially be morphable. The
+ * layout of a morphable LANCE adapter is
+ *
+ * I/O space:
+ *
+ * |------------------|
+ * | LANCE IO PORTS |
+ * |------------------|
+ * | MORPH PORT |
+ * |------------------|
+ * | VMXNET IO PORTS |
+ * |------------------|
+ *
+ * VLance has 8 ports of size 4 bytes, the morph port is 4 bytes, and
+ * Vmxnet has 10 ports of size 4 bytes.
+ *
+ * We shift up the ioaddr with the size of the LANCE I/O space since
+ * we want to access the vmxnet ports. We also shift the ioaddr up by
+ * the MORPH_PORT_SIZE so other port access can be independent of
+ * whether we are Vmxnet or a morphed VLance. This means that when
+ * we want to access the MORPH port we need to subtract the size
+ * from ioaddr to get to it.
+ */
+ vxnIOp += LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE;
+ vxnIOSize = LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE +
+ VMXNET_CHIP_IO_RESV_SIZE;
+ }
+
+ /*
+ * Do not attempt to morph non-morphable AMD PCnet
+ */
+ if (vxnIOSize > regSpaceSize) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: "
+ "vlance device is not supported by this driver", drvName, unit);
+ goto err_free_regs_map;
+ }
+
+ /*
+ * Morph, if we found a vlance adapter
+ */
+ if (chip == LANCE_CHIP) {
+ uint16_t magic;
+
+ /* Read morph port to verify that we can morph the adapter */
+ magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE));
+ if (magic != LANCE_CHIP && magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: Invalid magic, read: 0x%08X",
+ drvName, unit, magic);
+ goto err_free_regs_map;
+ }
+
+ /* Morph */
+ ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), VMXNET_CHIP);
+ morphed = TRUE;
+
+ /* Verify that we morphed correctly */
+ magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE));
+ if (magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: Couldn't morph adapter."
+ " Invalid magic, read:: 0x%08X", drvName, unit, magic);
+ goto err_morph_back;
+ }
+ }
+
+ /*
+ * Check the version number of the device implementation
+ */
+ vLow = (uint32_t)ddi_get32(vxnIOHdl,
+ (uint32_t *)(vxnIOp+VMXNET_LOW_VERSION));
+ vHigh = (uint32_t)ddi_get32(vxnIOHdl,
+ (uint32_t *)(vxnIOp+VMXNET_HIGH_VERSION));
+
+ if ((vLow & 0xffff0000) != (VMXNET2_MAGIC & 0xffff0000) ||
+ ((VMXNET2_MAGIC < vLow) || (VMXNET2_MAGIC > vHigh))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: driver version 0x%08X doesn't "
+ "match device 0x%08X:0x%08X",
+ drvName, unit, VMXNET2_MAGIC, vLow, vHigh);
+ goto err_version_mismatch;
+ }
+
+ /*
+ * Alloc soft state
+ */
+ macInfo = gld_mac_alloc(dip);
+ if (!macInfo) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: gld_mac_alloc failed",
+ drvName, unit);
+ goto err_gld_mac_alloc;
+ }
+
+ dp = (vxn_softc_t *) kmem_zalloc(sizeof(vxn_softc_t), KM_SLEEP);
+ ASSERT(dp);
+
+ /*
+ * Get interrupt cookie
+ */
+ if (ddi_get_iblock_cookie(dip, 0, &dp->iblockCookie) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_get_iblock_cookie failed",
+ drvName, unit);
+ goto err_get_iblock_cookie;
+ }
+
+ strncpy(dp->drvName, drvName, SOLVMXNET_MAXNAME);
+ dp->unit = unit;
+ dp->dip = dip;
+ dp->macInfo = macInfo;
+ dp->confHdl = confHdl;
+ dp->vxnIOHdl = vxnIOHdl;
+ dp->vxnIOp = vxnIOp;
+ dp->morphed = morphed;
+ dp->nicActive = FALSE;
+ dp->txPending = 0;
+ dp->maxTxFrags = 1;
+
+ /*
+ * Initialize mutexes
+ */
+ mutex_init(&dp->intrlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+ mutex_init(&dp->xmitlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+ mutex_init(&dp->rxlistlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+
+ /*
+ * Allocate and initialize our private and shared data structures
+ */
+ if (Vxn_AllocDriverData(dp) != SOLVMXNET_SUCCESS) {
+ goto err_alloc_driverdata;
+ }
+
+ /*
+ * Read the MAC address from the device
+ */
+ for (i = 0; i < 6; i++) {
+ dp->devAddr.ether_addr_octet[i] =
+ (uint8_t)ddi_get8(vxnIOHdl, (uint8_t *)(vxnIOp + VMXNET_MAC_ADDR + i));
+ }
+ macInfo->gldm_vendor_addr = dp->devAddr.ether_addr_octet;
+ macInfo->gldm_broadcast_addr = etherbroadcastaddr.ether_addr_octet;
+
+ DPRINTF(3, (CE_CONT,
+ "MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ dp->devAddr.ether_addr_octet[0],
+ dp->devAddr.ether_addr_octet[1],
+ dp->devAddr.ether_addr_octet[2],
+ dp->devAddr.ether_addr_octet[3],
+ dp->devAddr.ether_addr_octet[4],
+ dp->devAddr.ether_addr_octet[5]));
+
+ /*
+ * Configure GLD entry points
+ */
+ macInfo->gldm_devinfo = dip;
+ macInfo->gldm_private = (caddr_t)dp;
+ macInfo->gldm_cookie = dp->iblockCookie;
+ macInfo->gldm_reset = Vxn_Reset;
+ macInfo->gldm_start = Vxn_Start;
+ macInfo->gldm_stop = Vxn_Stop;
+ macInfo->gldm_set_mac_addr = Vxn_SetMacAddress;
+ macInfo->gldm_send = Vxn_Send;
+ macInfo->gldm_set_promiscuous = Vxn_SetPromiscuous;
+ macInfo->gldm_get_stats = Vxn_GetStats;
+ macInfo->gldm_ioctl = NULL;
+ macInfo->gldm_set_multicast= Vxn_SetMulticast;
+ macInfo->gldm_intr = Vxn_Interrupt;
+ macInfo->gldm_mctl = NULL;
+
+ macInfo->gldm_ident = (char *)ddi_driver_name(dip);
+ macInfo->gldm_type = DL_ETHER;
+ macInfo->gldm_minpkt = 0;
+ macInfo->gldm_maxpkt = ETHERMTU;
+ macInfo->gldm_addrlen = ETHERADDRL;
+ macInfo->gldm_saplen = -2;
+ macInfo->gldm_ppa = unit;
+
+ /*
+ * Register with GLD (Generic Lan Driver) framework
+ */
+ if (gld_register(dip,
+ (char *)ddi_driver_name(dip), macInfo) != DDI_SUCCESS) {
+ goto err_gld_register;
+ }
+
+ /*
+ * Add interrupt to system.
+ */
+ if (ddi_add_intr(dip, 0, NULL, NULL, gld_intr,
+ (caddr_t)macInfo) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: ddi_add_intr failed", drvName, unit);
+ goto err_ddi_add_intr;
+ }
+
+ /*
+ * Add to list of interfaces.
+ */
+ mutex_enter(&vxnListLock);
+ dp->next = &vxnList;
+ dp->prev = vxnList.prev;
+ vxnList.prev->next = dp;
+ vxnList.prev = dp;
+ mutex_exit(&vxnListLock);
+
+ /*
+ * Success
+ */
+ return DDI_SUCCESS;
+
+err_ddi_add_intr:
+ gld_unregister(macInfo);
+
+err_gld_register:
+ Vxn_FreeDriverData(dp);
+
+err_alloc_driverdata:
+ mutex_destroy(&dp->intrlock);
+ mutex_destroy(&dp->xmitlock);
+
+err_get_iblock_cookie:
+ kmem_free(dp, sizeof(*dp));
+ gld_mac_free(macInfo);
+
+err_gld_mac_alloc:
+err_version_mismatch:
+err_morph_back:
+ if (morphed) {
+ ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP);
+ }
+
+err_free_regs_map:
+ ddi_regs_map_free(&vxnIOHdl);
+
+err:
+ pci_config_teardown(&confHdl);
+ return DDI_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Detach --
+ * Called on module unload
+ *
+ * Results:
+ * DDI_SUCCESS
+ * DDI_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ gld_mac_info_t *macInfo;
+ vxn_softc_t *dp;
+
+ macInfo = (gld_mac_info_t *)ddi_get_driver_private(dip);
+ dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ if (cmd == DDI_DETACH) {
+ /*
+ * Tear down interrupt
+ */
+ ddi_remove_intr(dip, 0, macInfo->gldm_cookie);
+ gld_unregister(macInfo);
+
+ /*
+ * Quiesce hardware
+ */
+ Vxn_Stop(macInfo);
+
+ /*
+ * Free driver-data, tx/rx buffers etc
+ */
+ Vxn_FreeDriverData(dp);
+
+ /*
+ * Destroy locks
+ */
+ mutex_destroy(&dp->intrlock);
+ mutex_destroy(&dp->xmitlock);
+
+ /*
+ * Unmorph
+ */
+ if (dp->morphed) {
+ uint16_t magic;
+
+ /* Verify that we had morphed earlier */
+ magic = ddi_get16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE));
+ if (magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Detach: Adapter not morphed"
+ " magic=0x%08X", dp->drvName, dp->unit, magic);
+ }
+ else {
+ /* Unmorph */
+ ddi_put16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP);
+
+ /* Verify */
+ magic = ddi_get16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE));
+ if (magic != LANCE_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Detach: Unable to unmorph adapter"
+ " magic=0x%08X", dp->drvName, dp->unit, magic);
+ }
+ }
+ }
+
+ /*
+ * Release resister mappings
+ */
+ ddi_regs_map_free(&dp->vxnIOHdl);
+ pci_config_teardown(&dp->confHdl);
+
+ /*
+ * Remove from list of interfaces.
+ */
+ mutex_enter(&vxnListLock);
+ ASSERT(dp != &vxnList);
+ dp->prev->next = dp->next;
+ dp->next->prev = dp->prev;
+ mutex_exit(&vxnListLock);
+
+ /*
+ * Release memory
+ */
+ kmem_free(dp, sizeof(*dp));
+ gld_mac_free(macInfo);
+
+ return DDI_SUCCESS;
+ }
+ else {
+ return DDI_FAILURE;
+ }
+}
+
+static struct module_info vxnminfo = {
+ 0, /* mi_idnum */
+ "vmxnet", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ QHIWATER, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit vxnrinit = {
+ NULL, /* qi_putp */
+ gld_rsrv, /* qi_srvp */
+ gld_open, /* qi_qopen */
+ gld_close, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &vxnminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit vxnwinit = {
+ gld_wput, /* qi_putp */
+ gld_wsrv, /* qi_srvp */
+ NULL, /* qi_qopen */
+ NULL, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &vxnminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab vxn_info = {
+ &vxnrinit, /* st_rdinit */
+ &vxnwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_vxn_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &vxn_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops vxn_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ gld_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ Vxn_Attach, /* devo_attach */
+ Vxn_Detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_vxn_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ ddi_power /* devo_power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ ident,
+ &vxn_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, {&modldrv, NULL,}
+};
+
+
+/*
+ * Module load entry point
+ */
+int
+_init(void)
+{
+ int err;
+
+ DPRINTF(5, (CE_CONT, "vxn: _init:\n"));
+ /* Initialize interface list */
+ vxnList.next = vxnList.prev = &vxnList;
+ mutex_init(&vxnListLock, NULL, MUTEX_DRIVER, NULL);
+ if ((err = mod_install(&modlinkage)) != 0) {
+ mutex_destroy(&vxnListLock);
+ }
+ return err;
+}
+
+/*
+ * Module unload entry point
+ */
+int
+_fini(void)
+{
+ int err;
+
+ DPRINTF(5, (CE_CONT, "vxn: _fini:\n"));
+ if ((err = mod_remove(&modlinkage)) == 0) {
+ mutex_destroy(&vxnListLock);
+ }
+ return err;
+}
+
+/*
+ * Module info entry point
+ */
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.conf b/usr/src/uts/intel/io/vmxnet/vmxnet.conf
new file mode 100644
index 0000000000..eb3b160412
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet.conf
@@ -0,0 +1,24 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h
new file mode 100644
index 0000000000..5ea437df72
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h
@@ -0,0 +1,436 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#ifndef _VMXNET2_DEF_H_
+#define _VMXNET2_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#include "net_sg.h"
+#include "vmxnet_def.h"
+
+
+/*
+ * Magic number that identifies this version of the vmxnet protocol.
+ */
+#define VMXNET2_MAGIC 0xbabe864f
+
+/* size of the rx ring */
+#define VMXNET2_MAX_NUM_RX_BUFFERS 128
+#define VMXNET2_DEFAULT_NUM_RX_BUFFERS 100
+
+
+/* size of the rx ring when enhanced vmxnet is used */
+#define ENHANCED_VMXNET2_MAX_NUM_RX_BUFFERS 512
+#define ENHANCED_VMXNET2_DEFAULT_NUM_RX_BUFFERS 150
+
+/* size of the 2nd rx ring */
+#define VMXNET2_MAX_NUM_RX_BUFFERS2 2048
+#define VMXNET2_DEFAULT_NUM_RX_BUFFERS2 512
+
+/* size of the tx ring */
+#define VMXNET2_MAX_NUM_TX_BUFFERS 128
+#define VMXNET2_DEFAULT_NUM_TX_BUFFERS 100
+
+/* size of the tx ring when tso/jf is used */
+#define VMXNET2_MAX_NUM_TX_BUFFERS_TSO 512
+#define VMXNET2_DEFAULT_NUM_TX_BUFFERS_TSO 256
+
+enum {
+ VMXNET2_OWNERSHIP_DRIVER,
+ VMXNET2_OWNERSHIP_DRIVER_PENDING,
+ VMXNET2_OWNERSHIP_NIC,
+ VMXNET2_OWNERSHIP_NIC_PENDING,
+ VMXNET2_OWNERSHIP_NIC_FRAG,
+ VMXNET2_OWNERSHIP_DRIVER_FRAG,
+};
+
+#define VMXNET2_SG_DEFAULT_LENGTH 6
+
+typedef struct Vmxnet2_SG_Array {
+ uint16 addrType;
+ uint16 length;
+ NetSG_Elem sg[VMXNET2_SG_DEFAULT_LENGTH];
+} Vmxnet2_SG_Array;
+
+typedef struct Vmxnet2_RxRingEntry {
+ uint64 paddr; /* Physical address of the packet data. */
+ uint32 bufferLength; /* The length of the data at paddr. */
+ uint32 actualLength; /* The actual length of the received data. */
+ uint16 ownership; /* Who owns the packet. */
+ uint16 flags; /* Flags as defined below. */
+ uint32 index; /*
+ * Currently:
+ *
+ * This is being used as an packet index to
+ * rx buffers.
+ *
+ * Originally:
+ *
+ * was void* driverData ("Driver specific data.")
+ * which was used for sk_buf**s in Linux and
+ * VmxnetRxBuff*s in Windows. It could not be
+ * here because the structure needs to be the
+ * same size between architectures, and it was
+ * not used on the device side, anyway. Look
+ * for its replacement in
+ * Vmxnet_Private.rxRingBuffPtr on Linux and
+ * VmxnetAdapter.rxRingBuffPtr on Windows.
+ */
+} Vmxnet2_RxRingEntry;
+
+/*
+ * Vmxnet2_RxRingEntry flags:
+ *
+ * VMXNET2_RX_HW_XSUM_OK The hardware verified the TCP/UDP checksum.
+ * VMXNET2_RX_WITH_FRAG More data is in the 2nd ring
+ * VMXNET2_RX_FRAG_EOP This is the last frag, the only valid flag for
+ * 2nd ring entry
+ *
+ */
+#define VMXNET2_RX_HW_XSUM_OK 0x01
+#define VMXNET2_RX_WITH_FRAG 0x02
+#define VMXNET2_RX_FRAG_EOP 0x04
+
+typedef struct Vmxnet2_TxRingEntry {
+ uint16 flags; /* Flags as defined below. */
+ uint16 ownership; /* Who owns this packet. */
+ uint32 extra; /*
+ * was void* driverData ("Driver specific data.")
+ * which was used for sk_buf*s in Linux and
+ * VmxnetTxInfo*s in Windows. It could not be
+ * here because the structure needs to be the
+ * same size between architectures, and it was
+ * not used on the device side, anyway. Look
+ * for its replacement in
+ * Vmxnet_Private.txRingBuffPtr on Linux and
+ * VmxnetAdapter.txRingBuffPtr on Windows.
+ */
+ uint32 tsoMss; /* TSO pkt MSS */
+ Vmxnet2_SG_Array sg; /* Packet data. */
+} Vmxnet2_TxRingEntry;
+
+/*
+ * Vmxnet2_TxRingEntry flags:
+ *
+ * VMXNET2_TX_CAN_KEEP The implementation can return the tx ring entry
+ * to the driver when it is ready as opposed to
+ * before the transmit call from the driver completes.
+ * VMXNET2_TX_RING_LOW The driver's transmit ring buffer is low on free
+ * slots.
+ * VMXNET2_TX_HW_XSUM The hardware should perform the TCP/UDP checksum
+ * VMXNET2_TX_TSO The hardware should do TCP segmentation.
+ * VMXNET2_TX_PINNED_BUFFER The driver used one of the preallocated vmkernel
+ * buffers *and* it has been pinned with Net_PinTxBuffers.
+ * VMXNET2_TX_MORE This is *not* the last tx entry for the pkt.
+ * All flags except VMXNET2_TX_MORE are ignored
+ * for the subsequent tx entries.
+ */
+#define VMXNET2_TX_CAN_KEEP 0x0001
+#define VMXNET2_TX_RING_LOW 0x0002
+#define VMXNET2_TX_HW_XSUM 0x0004
+#define VMXNET2_TX_TSO 0x0008
+#define VMXNET2_TX_PINNED_BUFFER 0x0010
+#define VMXNET2_TX_MORE 0x0020
+
+/*
+ * Structure used by implementations. This structure allows the inline
+ * functions below to be used.
+ */
+typedef struct Vmxnet2_RxRingInfo {
+ Vmxnet2_RxRingEntry *base; /* starting addr of the ring */
+ uint32 nicNext; /* next entry to use in the ring */
+ uint32 ringLength; /* # of entries in the ring */
+ PA startPA; /* PA of the starting addr of the ring */
+#ifdef VMX86_DEBUG
+ const char *name;
+#endif
+} Vmxnet2_RxRingInfo;
+
+typedef struct Vmxnet2_TxRingInfo {
+ Vmxnet2_TxRingEntry *base; /* starting addr of the ring */
+ uint32 nicNext; /* next entry to use in the ring */
+ uint32 ringLength; /* # of entries in the ring */
+ PA startPA; /* PA of the starting addr of the ring */
+#ifdef VMX86_DEBUG
+ const char *name;
+#endif
+} Vmxnet2_TxRingInfo;
+
+typedef struct Vmxnet2_ImplData {
+ Vmxnet2_RxRingInfo rxRing;
+ Vmxnet2_RxRingInfo rxRing2;
+ Vmxnet2_TxRingInfo txRing;
+
+ struct PhysMem_Token *ddPhysMemToken;
+} Vmxnet2_ImplData;
+
+/*
+ * Used internally for performance studies. By default this will be off so there
+ * should be no compatibilty or other interferences.
+ */
+
+/* #define ENABLE_VMXNET2_PROFILING */
+
+
+#ifdef ENABLE_VMXNET2_PROFILING
+typedef struct Vmxnet2_VmmStats {
+ uint64 vIntTSC; /* the time that virtual int was posted */
+ uint64 actionsCount; /* Number of actions received */
+ uint64 numWasteActions; /* Number of non-productive actions */
+} Vmxnet2_VmmStats;
+#endif
+
+typedef struct Vmxnet2_DriverStats {
+ uint32 transmits; /* # of times that the drivers transmit function */
+ /* is called. The driver could transmit more */
+ /* than one packet per call. */
+ uint32 pktsTransmitted; /* # of packets transmitted. */
+ uint32 noCopyTransmits; /* # of packets that are transmitted without */
+ /* copying any data. */
+ uint32 copyTransmits; /* # of packets that are transmittted by copying */
+ /* the data into a buffer. */
+ uint32 maxTxsPending; /* Max # of transmits outstanding. */
+ uint32 txStopped; /* # of times that transmits got stopped because */
+ /* the tx ring was full. */
+ uint32 txRingOverflow; /* # of times that transmits got deferred bc */
+ /* the tx ring was full. This must be >= */
+ /* txStopped since there will be one */
+ /* txStopped when the ring fills up and then */
+ /* one txsRingOverflow for each packet that */
+ /* that gets deferred until there is space. */
+ uint32 interrupts; /* # of times interrupted. */
+ uint32 pktsReceived; /* # of packets received. */
+ uint32 rxBuffersLow; /* # of times that the driver was low on */
+ /* receive buffers. */
+#ifdef ENABLE_VMXNET2_PROFILING
+ Vmxnet2_VmmStats vmmStats; /* vmm related stats for perf study */
+#endif
+} Vmxnet2_DriverStats;
+
+/*
+ * Shared data structure between the vm, the vmm, and the vmkernel.
+ * This structure was originally arranged to try to group common data
+ * on 32-byte cache lines, but bit rot and the fact that we no longer
+ * run on many CPUs with that cacheline size killed that optimization.
+ * vmxnet3 should target 128 byte sizes and alignments to optimize for
+ * the 64 byte cacheline pairs on P4.
+ */
+typedef struct Vmxnet2_DriverData {
+ /*
+ * Magic must be first.
+ */
+ Vmxnet_DDMagic magic;
+
+ /*
+ * Receive fields.
+ */
+ uint32 rxRingLength; /* Length of the receive ring. */
+ uint32 rxDriverNext; /* Index of the next packet that will */
+ /* be filled in by the impl */
+
+ uint32 rxRingLength2; /* Length of the 2nd receive ring. */
+ uint32 rxDriverNext2; /* Index of the next packet that will */
+ /* be filled in by the impl */
+
+ uint32 notUsed1; /* was "irq" */
+
+ /*
+ * Interface flags and multicast filter.
+ */
+ uint32 ifflags;
+ uint32 LADRF[VMXNET_MAX_LADRF];
+
+ /*
+ * Transmit fields
+ */
+ uint32 txDontClusterSize; /* All packets <= this will be transmitted */
+ /* immediately, regardless of clustering */
+ /* settings [was fill[1]] */
+ uint32 txRingLength; /* Length of the transmit ring. */
+ uint32 txDriverCur; /* Index of the next packet to be */
+ /* returned by the implementation.*/
+ uint32 txDriverNext; /* Index of the entry in the ring */
+ /* buffer to use for the next packet.*/
+ uint32 txStopped; /* The driver has stopped transmitting */
+ /* because its ring buffer is full.*/
+ uint32 txClusterLength; /* Maximum number of packets to */
+ /* put in the ring buffer before */
+ /* asking the implementation to */
+ /* transmit the packets in the buffer.*/
+ uint32 txNumDeferred; /* Number of packets that have been */
+ /* queued in the ring buffer since */
+ /* the last time the implementation */
+ /* was asked to transmit. */
+ uint32 notUsed3; /* This field is deprecated but still used */
+ /* as minXmitPhysLength on the escher branch. */
+ /* It cannot be used for other purposes */
+ /* until escher vms no longer are allowed */
+ /* to install this driver. */
+
+ uint32 totalRxBuffers; /* used by esx for max rx buffers */
+ uint64 rxBufferPhysStart; /* used by esx for pinng rx buffers */
+ /*
+ * Extra fields for future expansion.
+ */
+ uint32 extra[2];
+
+ uint16 maxFrags; /* # of frags the driver can handle */
+ uint16 featureCtl; /* for driver to enable some feature */
+
+ /*
+ * The following fields are used to save the nicNext indexes part
+ * of implData in the vmkernel when disconnecting the adapter, we
+ * need them when we reconnect. This mechanism is used for
+ * checkpointing as well.
+ */
+ uint32 savedRxNICNext;
+ uint32 savedRxNICNext2;
+ uint32 savedTxNICNext;
+
+ /*
+ * Fields used during initialization or debugging.
+ */
+ uint32 length;
+ uint32 rxRingOffset;
+ uint32 rxRingOffset2;
+ uint32 txRingOffset;
+ uint32 debugLevel;
+ uint32 txBufferPhysStart;
+ uint32 txBufferPhysLength;
+ uint32 txPktMaxSize;
+
+ /*
+ * Driver statistics.
+ */
+ Vmxnet2_DriverStats stats;
+} Vmxnet2_DriverData;
+
+/*
+ * Shared between VMM and Vmkernel part of vmxnet2 to optimize action posting
+ * VMM writes 1 (don't post) or 0 (okay to post) and vmk reads this.
+ */
+typedef struct VmxnetVMKShared {
+ uint32 dontPostActions;
+} VmxnetVMKShared;
+
+#if defined VMX86_VMX || defined VMKERNEL
+
+/*
+ * Inline functions used to assist the implementation of the vmxnet interface.
+ */
+
+/*
+ * Get the next empty packet out of the receive ring and move to
+ * the next packet.
+ */
+static INLINE Vmxnet2_RxRingEntry *
+Vmxnet2_GetNextRx(Vmxnet2_RxRingInfo *ri, uint16 ownership)
+{
+ Vmxnet2_RxRingEntry *rre = ri->base + ri->nicNext;
+ if (rre->ownership == ownership) {
+ VMXNET_INC(ri->nicNext, ri->ringLength);
+ } else {
+ rre = NULL;
+ }
+
+ return rre;
+}
+
+/*
+ * Return ownership of a packet in the receive ring to the driver.
+ */
+static INLINE void
+Vmxnet2_PutRx(Vmxnet2_RxRingEntry *rre, uint32 pktLength, uint16 ownership)
+{
+ rre->actualLength = pktLength;
+ COMPILER_MEM_BARRIER();
+ rre->ownership = ownership;
+}
+
+/*
+ * Get the next pending packet out of the transmit ring.
+ */
+static INLINE Vmxnet2_TxRingEntry *
+Vmxnet2_GetNextTx(Vmxnet2_TxRingInfo *ri)
+{
+ Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext;
+ if (txre->ownership == VMXNET2_OWNERSHIP_NIC) {
+ return txre;
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * Move to the next entry in the transmit ring.
+ */
+static INLINE unsigned int
+Vmxnet2_IncNextTx(Vmxnet2_TxRingInfo *ri)
+{
+ unsigned int prev = ri->nicNext;
+ Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext;
+
+ txre->ownership = VMXNET2_OWNERSHIP_NIC_PENDING;
+
+ VMXNET_INC(ri->nicNext, ri->ringLength);
+ return prev;
+}
+
+/*
+ * Get the indicated entry from transmit ring.
+ */
+static INLINE Vmxnet2_TxRingEntry *
+Vmxnet2_GetTxEntry(Vmxnet2_TxRingInfo *ri, unsigned int idx)
+{
+ return ri->base + idx;
+}
+
+/*
+ * Get the indicated entry from the given rx ring
+ */
+static INLINE Vmxnet2_RxRingEntry *
+Vmxnet2_GetRxEntry(Vmxnet2_RxRingInfo *ri, unsigned int idx)
+{
+ return ri->base + idx;
+}
+
+#endif /* defined VMX86_VMX || defined VMKERNEL */
+
+#endif
+
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h
new file mode 100644
index 0000000000..703466c995
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h
@@ -0,0 +1,184 @@
+/*********************************************************
+ * Copyright (C) 1999 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#ifndef _VMXNET_DEF_H_
+#define _VMXNET_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#include "net_sg.h"
+#include "vmnet_def.h"
+
+
+/*
+ * Vmxnet I/O ports, used by both the vmxnet driver and
+ * the device emulation code.
+ */
+
+#define VMXNET_INIT_ADDR 0x00
+#define VMXNET_INIT_LENGTH 0x04
+#define VMXNET_TX_ADDR 0x08
+#define VMXNET_COMMAND_ADDR 0x0c
+#define VMXNET_MAC_ADDR 0x10
+#define VMXNET_LOW_VERSION 0x18
+#define VMXNET_HIGH_VERSION 0x1c
+#define VMXNET_STATUS_ADDR 0x20
+#define VMXNET_TOE_INIT_ADDR 0x24
+#define VMXNET_APROM_ADDR 0x28
+#define VMXNET_INT_ENABLE_ADDR 0x30
+#define VMXNET_WAKE_PKT_PATTERNS 0x34
+
+/*
+ * Vmxnet command register values.
+ */
+#define VMXNET_CMD_INTR_ACK 0x0001
+#define VMXNET_CMD_UPDATE_LADRF 0x0002
+#define VMXNET_CMD_UPDATE_IFF 0x0004
+#define VMXNET_CMD_UNUSED 1 0x0008
+#define VMXNET_CMD_UNUSED_2 0x0010
+#define VMXNET_CMD_INTR_DISABLE 0x0020
+#define VMXNET_CMD_INTR_ENABLE 0x0040
+#define VMXNET_CMD_UNUSED_3 0x0080
+#define VMXNET_CMD_CHECK_TX_DONE 0x0100
+#define VMXNET_CMD_GET_NUM_RX_BUFFERS 0x0200
+#define VMXNET_CMD_GET_NUM_TX_BUFFERS 0x0400
+#define VMXNET_CMD_PIN_TX_BUFFERS 0x0800
+#define VMXNET_CMD_GET_CAPABILITIES 0x1000
+#define VMXNET_CMD_GET_FEATURES 0x2000
+#define VMXNET_CMD_SET_POWER_FULL 0x4000
+#define VMXNET_CMD_SET_POWER_LOW 0x8000
+
+/*
+ * Vmxnet status register values.
+ */
+#define VMXNET_STATUS_CONNECTED 0x0001
+#define VMXNET_STATUS_ENABLED 0x0002
+#define VMXNET_STATUS_TX_PINNED 0x0004
+
+/*
+ * Values for the interface flags.
+ */
+#define VMXNET_IFF_PROMISC 0x01
+#define VMXNET_IFF_BROADCAST 0x02
+#define VMXNET_IFF_MULTICAST 0x04
+#define VMXNET_IFF_DIRECTED 0x08
+
+/*
+ * Length of the multicast address filter.
+ */
+#define VMXNET_MAX_LADRF 2
+
+/*
+ * Size of Vmxnet APROM.
+ */
+#define VMXNET_APROM_SIZE 6
+
+/*
+ * An invalid ring index.
+ */
+#define VMXNET_INVALID_RING_INDEX (-1)
+
+/*
+ * Features that are implemented by the driver. These are driver
+ * specific so not all features will be listed here. In addition not all
+ * drivers have to pay attention to these feature flags.
+ *
+ * VMXNET_FEATURE_ZERO_COPY_TX The driver won't do any copies as long as
+ * the packet length is >
+ * Vmxnet_DriverData.minTxPhysLength.
+ *
+ * VMXNET_FEATURE_TSO The driver will use the TSO capabilities
+ * of the underlying hardware if available
+ * and enabled.
+ *
+ * VMXNET_FEATURE_JUMBO_FRAME The driver can send/rcv jumbo frame
+ *
+ * VMXNET_FEATURE_LPD The backend can deliver large pkts
+ */
+#define VMXNET_FEATURE_ZERO_COPY_TX 0x01
+#define VMXNET_FEATURE_TSO 0x02
+#define VMXNET_FEATURE_JUMBO_FRAME 0x04
+#define VMXNET_FEATURE_LPD 0x08
+
+/*
+ * Define the set of capabilities required by each feature above
+ */
+#define VMXNET_FEATURE_ZERO_COPY_TX_CAPS VMXNET_CAP_SG
+#define VMXNET_FEATURE_TSO_CAPS VMXNET_CAP_TSO
+#define VMXNET_HIGHEST_FEATURE_BIT VMXNET_FEATURE_TSO
+
+#define VMXNET_INC(val, max) \
+ val++; \
+ if (UNLIKELY(val == max)) { \
+ val = 0; \
+ }
+
+/*
+ * code that just wants to switch on the different versions of the
+ * guest<->implementation protocol can cast driver data to this.
+ */
+typedef uint32 Vmxnet_DDMagic;
+
+/*
+ * Wake packet pattern commands sent through VMXNET_WAKE_PKT_PATTERNS port
+ */
+
+#define VMXNET_PM_OPCODE_START 3 /* args: cnt of wake packet patterns */
+#define VMXNET_PM_OPCODE_LEN 2 /* args: index of wake packet pattern */
+ /* number of pattern byte values */
+#define VMXNET_PM_OPCODE_DATA 1 /* args: index of wake packet pattern */
+ /* offset in pattern byte values list */
+ /* packet byte offset */
+ /* packet byte value */
+#define VMXNET_PM_OPCODE_END 0 /* args: <none> */
+
+typedef union Vmxnet_WakePktCmd {
+ uint32 pktData : 32;
+ struct {
+ unsigned cmd : 2; /* wake packet pattern cmd [from list above] */
+ unsigned cnt : 3; /* cnt wk pkt pttrns 1..MAX_NUM_FILTER_PTTRNS */
+ unsigned ind : 3; /* ind wk pkt pttrn 0..MAX_NUM_FILTER_PTTRNS-1 */
+ unsigned lenOff : 8; /* num pttrn byte vals 1..MAX_PKT_FILTER_SIZE */
+ /* OR offset in pattern byte values list */
+ /* 0..MAX_PKT_FILTER_SIZE-1 */
+ unsigned byteOff : 8; /* pkt byte offset 0..MAX_PKT_FILTER_SIZE-1 */
+ unsigned byteVal : 8; /* packet byte value 0..255 */
+ } pktPttrn;
+} Vmxnet_WakePktCmd;
+
+#endif /* _VMXNET_DEF_H_ */
diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64
index b05d399caf..3098f98265 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64
@@ -105,7 +105,6 @@ ilb_sticky_expiry
ilb_sticky_timer_size
ilb_sticky_timeout
ill_no_arena
-ill_null
inet_dev_info
inet_devops
ip6_ftable_hash_size
diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64
index 7dd30502a0..f182d7198e 100644
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64
@@ -105,7 +105,6 @@ ilb_sticky_expiry
ilb_sticky_timer_size
ilb_sticky_timeout
ill_no_arena
-ill_null
inet_dev_info
inet_devops
ip6_ftable_hash_size
diff --git a/usr/src/uts/intel/ipf/Makefile b/usr/src/uts/intel/ipf/Makefile
index 046a6c223d..e18158a43d 100644
--- a/usr/src/uts/intel/ipf/Makefile
+++ b/usr/src/uts/intel/ipf/Makefile
@@ -58,7 +58,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
CPPFLAGS += -DIPFILTER_LKM -DIPFILTER_LOG -DIPFILTER_LOOKUP -DUSE_INET6
CPPFLAGS += -DSUNDDI -DSOLARIS2=$(RELEASE_MINOR) -DIRE_ILL_CN
-LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf
+LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf -Ndrv/vnd
INC_PATH += -I$(UTSBASE)/common/inet/ipf
diff --git a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
index 203dd14416..0af6da41fa 100644
--- a/usr/src/uts/intel/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
@@ -22,11 +22,41 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright 2013 Joyent, Inc. All rights reserved
+#
fr_availfuncs
fr_features
fr_objbytes
hdrsizes
+hook4_in
+hook4_in_gz
+hook4_loop_in
+hook4_loop_in_gz
+hook4_loop_out
+hook4_loop_out_gz
+hook4_nicevents
+hook4_nicevents_gz
+hook4_out
+hook4_out_gz
+hook4_vnd_in
+hook4_vnd_in_gz
+hook4_vnd_out
+hook4_vnd_out_gz
+hook6_in
+hook6_in_gz
+hook6_loop_in
+hook6_loop_in_gz
+hook6_loop_out
+hook6_loop_out_gz
+hook6_nicevents
+hook6_nicevents_gz
+hook6_out
+hook6_out_gz
+hook6_vnd_in
+hook6_vnd_in_gz
+hook6_vnd_out
+hook6_vnd_out_gz
icmpreplytype4
icmpreplytype6
icmptoicmp6types
@@ -37,10 +67,12 @@ ipf_cb_ops
ipf_dev_info
ipf_devfiles
ipf_kstat_tmp
+ipf_minor
ipf_ops
ipf_proxy_debug
ipf_stack_lock
ipf_stacks
+ipf_state
ipfilter_version
ipfncb
ipl_magic
diff --git a/usr/src/uts/intel/kdi/kdi_idt.c b/usr/src/uts/intel/kdi/kdi_idt.c
index 4bd6ca73e1..73d9628e62 100644
--- a/usr/src/uts/intel/kdi/kdi_idt.c
+++ b/usr/src/uts/intel/kdi/kdi_idt.c
@@ -217,7 +217,8 @@ kdi_idt_patch(caddr_t code, size_t sz)
continue; /* uses kernel's handler */
gd = &kdi_idt[i];
- patch = (uchar_t *)GATESEG_GETOFFSET(gd) + KDI_MSR_PATCHOFF;
+ patch = ((uchar_t *)(uintptr_t)GATESEG_GETOFFSET(gd)) +
+ KDI_MSR_PATCHOFF;
/*
* We can't ASSERT that there's a nop here, because this may be
diff --git a/usr/src/uts/intel/lx_afs/Makefile b/usr/src/uts/intel/lx_afs/Makefile
new file mode 100644
index 0000000000..657ce7f4f0
--- /dev/null
+++ b/usr/src/uts/intel/lx_afs/Makefile
@@ -0,0 +1,108 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This makefile drives the production of the lxproc file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+# Note that the name of the actual filesystem is lx_afs and
+# not lx_autofs. This is becase filesystem names are stupidly
+# limited to 8 characters.
+#
+MODULE = lx_afs
+OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_afs/Makefile.rules
diff --git a/usr/src/uts/intel/lx_afs/Makefile.rules b/usr/src/uts/intel/lx_afs/Makefile.rules
new file mode 100644
index 0000000000..2793fedaa4
--- /dev/null
+++ b/usr/src/uts/intel/lx_afs/Makefile.rules
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_audio/Makefile b/usr/src/uts/intel/lx_audio/Makefile
new file mode 100644
index 0000000000..9341fc7def
--- /dev/null
+++ b/usr/src/uts/intel/lx_audio/Makefile
@@ -0,0 +1,100 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_audio/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lx_audio driver
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_audio
+OBJECTS = $(LX_AUDIO_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_AUDIO_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile
new file mode 100644
index 0000000000..01ac92512c
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile
@@ -0,0 +1,108 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+# This makefile drives the production of the kernel component of
+# the lx brand
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_brand
+OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nexec/elfexec -Nfs/fifofs
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_brand/Makefile.rules
diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules
new file mode 100644
index 0000000000..e78bcb1827
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile.rules
@@ -0,0 +1,85 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -o $@ $<
+
+$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -o $@ $<
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -o $@ $<
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s
+ @($(LHEAD) $(LINT.s) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_netlink/Makefile b/usr/src/uts/intel/lx_netlink/Makefile
new file mode 100644
index 0000000000..9893b90d77
--- /dev/null
+++ b/usr/src/uts/intel/lx_netlink/Makefile
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_netlink
+OBJECTS = $(LX_NETLINK_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_NETLINK_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile
new file mode 100644
index 0000000000..04873fcd72
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile
@@ -0,0 +1,115 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_proc/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+# This makefile drives the production of the lxproc file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_proc
+OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# Depends on procfs and lx_brand
+#
+LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand -Ndrv/inotify
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_proc/Makefile.rules
diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules
new file mode 100644
index 0000000000..b8592d2fdd
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile.rules
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile
new file mode 100644
index 0000000000..dcead27da7
--- /dev/null
+++ b/usr/src/uts/intel/lx_ptm/Makefile
@@ -0,0 +1,91 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_ptm/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lx_ptm driver
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_ptm
+OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile
new file mode 100644
index 0000000000..20c4a6a3a3
--- /dev/null
+++ b/usr/src/uts/intel/lx_systrace/Makefile
@@ -0,0 +1,80 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+UTSBASE = ../..
+
+MODULE = lx_systrace
+OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+ROOTLINK = $(USR_DTRACE_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_STATIC_UNUSED
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+$(ROOTLINK): $(USR_DTRACE_DIR) $(ROOTMODULE)
+ -$(RM) $@; ln $(ROOTMODULE) $@
+
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lxprocfs/Makefile b/usr/src/uts/intel/lxprocfs/Makefile
new file mode 100644
index 0000000000..c6ffec0199
--- /dev/null
+++ b/usr/src/uts/intel/lxprocfs/Makefile
@@ -0,0 +1,88 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lxprocfs/Makefile
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lxprocfs file system
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lxprocfs
+OBJECTS = $(LXPROC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LXPROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Depends on procfs
+#
+LDFLAGS += -dy -Nfs/procfs
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/nfp/Makefile b/usr/src/uts/intel/nfp/Makefile
new file mode 100644
index 0000000000..3acbc6a725
--- /dev/null
+++ b/usr/src/uts/intel/nfp/Makefile
@@ -0,0 +1,82 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc
+#
+
+#
+# uts/intel/nfp/Makefile
+#
+# This makefile drives the production of the nfp
+# driver kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Paths to the base of the uts directory trees
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = nfp
+OBJECTS = $(NFP_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(NFP_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Driver-specific flags
+#
+CPPFLAGS += -DCH_KERNELVER=270
+LDFLAGS += -dy
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-function
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/opteron_pcbe/Makefile b/usr/src/uts/intel/opteron_pcbe/Makefile
index aea8e89fbe..7431f50329 100644
--- a/usr/src/uts/intel/opteron_pcbe/Makefile
+++ b/usr/src/uts/intel/opteron_pcbe/Makefile
@@ -34,7 +34,7 @@ UTSBASE = ../..
MODULE = pcbe.AuthenticAMD
OBJECTS = $(OPTERON_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(OPTERON_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
#
# Include common rules.
diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases
index 3116819932..1cea287121 100644
--- a/usr/src/uts/intel/os/driver_aliases
+++ b/usr/src/uts/intel/os/driver_aliases
@@ -1 +1,2 @@
asy "pci11c1,480"
+vmxnet "pci15ad,720"
diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major
index c5ad4c9bf0..1fb86f9a50 100644
--- a/usr/src/uts/intel/os/name_to_major
+++ b/usr/src/uts/intel/os/name_to_major
@@ -2,3 +2,4 @@ md 85
devinfo 88
asy 106
did 239
+vmxnet 270
diff --git a/usr/src/uts/intel/p123_pcbe/Makefile b/usr/src/uts/intel/p123_pcbe/Makefile
index 71b7d9bcec..d591d52e62 100644
--- a/usr/src/uts/intel/p123_pcbe/Makefile
+++ b/usr/src/uts/intel/p123_pcbe/Makefile
@@ -35,8 +35,8 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.6
OBJECTS = $(P123_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(P123_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
-ROOTLINK = $(USR_PCBE_DIR)/pcbe.GenuineIntel.5
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_PSM_PCBE_DIR)/pcbe.GenuineIntel.5
#
# Include common rules.
diff --git a/usr/src/uts/intel/p4_pcbe/Makefile b/usr/src/uts/intel/p4_pcbe/Makefile
index 07689646c1..42dc040eee 100644
--- a/usr/src/uts/intel/p4_pcbe/Makefile
+++ b/usr/src/uts/intel/p4_pcbe/Makefile
@@ -35,7 +35,7 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.15
OBJECTS = $(P4_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(P4_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
#
# Include common rules.
diff --git a/usr/src/uts/intel/sngl_brand/Makefile b/usr/src/uts/intel/sngl_brand/Makefile
new file mode 100644
index 0000000000..cec2141e89
--- /dev/null
+++ b/usr/src/uts/intel/sngl_brand/Makefile
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+# This makefile drives the production of the kernel component of
+# the SNGL brand
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+SNGL_BASE = $(UTSBASE)/common/brand/sngl
+
+#
+# Define the module and object file sets.
+#
+MODULE = sngl_brand
+OBJECTS = $(SNGL_BRAND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SNGL_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+
+#
+# Update compiler variables.
+#
+INC_PATH += -I$(SNGL_BASE) -I$(OBJS_DIR)
+AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR)
+LDFLAGS += -dy -Nexec/elfexec
+
+#
+# Ugh, this is a gross hack. sngl_brand_asm.s uses lots of defines
+# to simplify variable access. All these defines work fine for amd64
+# compiles because when compiling for amd64 we use the GNU assembler,
+# gas. For 32-bit code we use the Sun assembler, as. Unfortunatly
+# as does not handle certian constructs that gas does. So rather than
+# make our code less readable, we'll just use gas to compile our 32-bit
+# code as well.
+#
+i386_AS = $(amd64_AS)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/sys/acpi/acapps.h b/usr/src/uts/intel/sys/acpi/acapps.h
index 2d3f84186e..0382ca8547 100644
--- a/usr/src/uts/intel/sys/acpi/acapps.h
+++ b/usr/src/uts/intel/sys/acpi/acapps.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,7 +52,7 @@
/* Common info for tool signons */
#define ACPICA_NAME "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2011 Intel Corporation"
+#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2012 Intel Corporation"
#if ACPI_MACHINE_WIDTH == 64
#define ACPI_WIDTH "-64"
@@ -81,6 +81,15 @@
Prefix, ACPICA_COPYRIGHT, \
Prefix
+/* Macros for usage messages */
+
+#define ACPI_USAGE_HEADER(Usage) \
+ printf ("Usage: %s\nOptions:\n", Usage);
+
+#define ACPI_OPTION(Name, Description) \
+ printf (" %-18s%s\n", Name, Description);
+
+
#define FILE_SUFFIX_DISASSEMBLY "dsl"
#define ACPI_TABLE_FILE_SUFFIX ".dat"
diff --git a/usr/src/uts/intel/sys/acpi/accommon.h b/usr/src/uts/intel/sys/acpi/accommon.h
index 58bb83ddb6..7e6d05aa8d 100644
--- a/usr/src/uts/intel/sys/acpi/accommon.h
+++ b/usr/src/uts/intel/sys/acpi/accommon.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acconfig.h b/usr/src/uts/intel/sys/acpi/acconfig.h
index 7a2107aa76..7880516e47 100644
--- a/usr/src/uts/intel/sys/acpi/acconfig.h
+++ b/usr/src/uts/intel/sys/acpi/acconfig.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -86,6 +86,23 @@
*/
#define ACPI_CHECKSUM_ABORT FALSE
+/*
+ * Generate a version of ACPICA that only supports "reduced hardware"
+ * platforms (as defined in ACPI 5.0). Set to TRUE to generate a specialized
+ * version of ACPICA that ONLY supports the ACPI 5.0 "reduced hardware"
+ * model. In other words, no ACPI hardware is supported.
+ *
+ * If TRUE, this means no support for the following:
+ * PM Event and Control registers
+ * SCI interrupt (and handler)
+ * Fixed Events
+ * General Purpose Events (GPEs)
+ * Global Lock
+ * ACPI PM timer
+ * FACS table (Waking vectors and Global Lock)
+ */
+#define ACPI_REDUCED_HARDWARE FALSE
+
/******************************************************************************
*
@@ -95,7 +112,7 @@
/* Version of ACPI supported */
-#define ACPI_CA_SUPPORT_LEVEL 3
+#define ACPI_CA_SUPPORT_LEVEL 5
/* Maximum count for a semaphore object */
@@ -123,7 +140,11 @@
/* Maximum sleep allowed via Sleep() operator */
-#define ACPI_MAX_SLEEP 20000 /* Two seconds */
+#define ACPI_MAX_SLEEP 2000 /* 2000 millisec == two seconds */
+
+/* Address Range lists are per-SpaceId (Memory and I/O only) */
+
+#define ACPI_ADDRESS_RANGE_MAX 2
/******************************************************************************
@@ -183,9 +204,10 @@
#define ACPI_RSDP_CHECKSUM_LENGTH 20
#define ACPI_RSDP_XCHECKSUM_LENGTH 36
-/* SMBus and IPMI bidirectional buffer size */
+/* SMBus, GSBus and IPMI bidirectional buffer size */
#define ACPI_SMBUS_BUFFER_SIZE 34
+#define ACPI_GSBUS_BUFFER_SIZE 34
#define ACPI_IPMI_BUFFER_SIZE 66
/* _SxD and _SxW control methods */
diff --git a/usr/src/uts/intel/sys/acpi/acdebug.h b/usr/src/uts/intel/sys/acpi/acdebug.h
index 2c1fe73429..bcc505c8d5 100644
--- a/usr/src/uts/intel/sys/acpi/acdebug.h
+++ b/usr/src/uts/intel/sys/acpi/acdebug.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -107,6 +107,10 @@ AcpiDbDisplayTableInfo (
char *TableArg);
void
+AcpiDbDisplayTemplate (
+ char *BufferArg);
+
+void
AcpiDbUnloadAcpiTable (
char *TableArg,
char *InstanceArg);
@@ -133,18 +137,20 @@ void
AcpiDbDisplayResources (
char *ObjectArg);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiDbDisplayGpes (
- void);
+ void))
void
AcpiDbDisplayHandlers (
void);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiDbGenerateGpe (
char *GpeArg,
- char *BlockArg);
+ char *BlockArg))
/*
diff --git a/usr/src/uts/intel/sys/acpi/acdisasm.h b/usr/src/uts/intel/sys/acpi/acdisasm.h
index aaaac11d86..9ec12713b4 100644
--- a/usr/src/uts/intel/sys/acpi/acdisasm.h
+++ b/usr/src/uts/intel/sys/acpi/acdisasm.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@
typedef const struct acpi_dmtable_info
{
UINT8 Opcode;
- UINT8 Offset;
+ UINT16 Offset;
char *Name;
UINT8 Flags;
@@ -80,57 +80,82 @@ typedef const struct acpi_dmtable_info
/*
* Values for Opcode above.
- * Note: 0-7 must not change, used as a flag shift value
+ * Note: 0-7 must not change, they are used as a flag shift value. Other
+ * than those, new values can be added wherever appropriate.
*/
-#define ACPI_DMT_FLAG0 0
-#define ACPI_DMT_FLAG1 1
-#define ACPI_DMT_FLAG2 2
-#define ACPI_DMT_FLAG3 3
-#define ACPI_DMT_FLAG4 4
-#define ACPI_DMT_FLAG5 5
-#define ACPI_DMT_FLAG6 6
-#define ACPI_DMT_FLAG7 7
-#define ACPI_DMT_FLAGS0 8
-#define ACPI_DMT_FLAGS2 9
-#define ACPI_DMT_UINT8 10
-#define ACPI_DMT_UINT16 11
-#define ACPI_DMT_UINT24 12
-#define ACPI_DMT_UINT32 13
-#define ACPI_DMT_UINT56 14
-#define ACPI_DMT_UINT64 15
-#define ACPI_DMT_STRING 16
-#define ACPI_DMT_NAME4 17
-#define ACPI_DMT_NAME6 18
-#define ACPI_DMT_NAME8 19
-#define ACPI_DMT_CHKSUM 20
-#define ACPI_DMT_SPACEID 21
-#define ACPI_DMT_GAS 22
-#define ACPI_DMT_ASF 23
-#define ACPI_DMT_DMAR 24
-#define ACPI_DMT_HEST 25
-#define ACPI_DMT_HESTNTFY 26
-#define ACPI_DMT_HESTNTYP 27
-#define ACPI_DMT_MADT 28
-#define ACPI_DMT_SRAT 29
-#define ACPI_DMT_EXIT 30
-#define ACPI_DMT_SIG 31
-#define ACPI_DMT_FADTPM 32
-#define ACPI_DMT_BUF16 33
-#define ACPI_DMT_IVRS 34
-#define ACPI_DMT_BUFFER 35
-#define ACPI_DMT_PCI_PATH 36
-#define ACPI_DMT_EINJACT 37
-#define ACPI_DMT_EINJINST 38
-#define ACPI_DMT_ERSTACT 39
-#define ACPI_DMT_ERSTINST 40
-#define ACPI_DMT_ACCWIDTH 41
-#define ACPI_DMT_UNICODE 42
-#define ACPI_DMT_UUID 43
-#define ACPI_DMT_DEVICE_PATH 44
-#define ACPI_DMT_LABEL 45
-#define ACPI_DMT_BUF7 46
-#define ACPI_DMT_BUF128 47
-#define ACPI_DMT_SLIC 48
+typedef enum
+{
+ /* Simple Data Types */
+
+ ACPI_DMT_FLAG0 = 0,
+ ACPI_DMT_FLAG1 = 1,
+ ACPI_DMT_FLAG2 = 2,
+ ACPI_DMT_FLAG3 = 3,
+ ACPI_DMT_FLAG4 = 4,
+ ACPI_DMT_FLAG5 = 5,
+ ACPI_DMT_FLAG6 = 6,
+ ACPI_DMT_FLAG7 = 7,
+ ACPI_DMT_FLAGS0,
+ ACPI_DMT_FLAGS1,
+ ACPI_DMT_FLAGS2,
+ ACPI_DMT_FLAGS4,
+ ACPI_DMT_UINT8,
+ ACPI_DMT_UINT16,
+ ACPI_DMT_UINT24,
+ ACPI_DMT_UINT32,
+ ACPI_DMT_UINT40,
+ ACPI_DMT_UINT48,
+ ACPI_DMT_UINT56,
+ ACPI_DMT_UINT64,
+ ACPI_DMT_BUF7,
+ ACPI_DMT_BUF16,
+ ACPI_DMT_BUF128,
+ ACPI_DMT_SIG,
+ ACPI_DMT_STRING,
+ ACPI_DMT_NAME4,
+ ACPI_DMT_NAME6,
+ ACPI_DMT_NAME8,
+
+ /* Types that are decoded to strings and miscellaneous */
+
+ ACPI_DMT_ACCWIDTH,
+ ACPI_DMT_CHKSUM,
+ ACPI_DMT_GAS,
+ ACPI_DMT_SPACEID,
+ ACPI_DMT_UNICODE,
+ ACPI_DMT_UUID,
+
+ /* Types used only for the Data Table Compiler */
+
+ ACPI_DMT_BUFFER,
+ ACPI_DMT_DEVICE_PATH,
+ ACPI_DMT_LABEL,
+ ACPI_DMT_PCI_PATH,
+
+ /* Types that are specific to particular ACPI tables */
+
+ ACPI_DMT_ASF,
+ ACPI_DMT_DMAR,
+ ACPI_DMT_EINJACT,
+ ACPI_DMT_EINJINST,
+ ACPI_DMT_ERSTACT,
+ ACPI_DMT_ERSTINST,
+ ACPI_DMT_FADTPM,
+ ACPI_DMT_HEST,
+ ACPI_DMT_HESTNTFY,
+ ACPI_DMT_HESTNTYP,
+ ACPI_DMT_IVRS,
+ ACPI_DMT_MADT,
+ ACPI_DMT_PMTT,
+ ACPI_DMT_SLIC,
+ ACPI_DMT_SRAT,
+
+ /* Special opcodes */
+
+ ACPI_DMT_EXTRA_TEXT,
+ ACPI_DMT_EXIT
+
+} ACPI_ENTRY_TYPES;
typedef
void (*ACPI_DMTABLE_HANDLER) (
@@ -175,6 +200,11 @@ ACPI_STATUS (*ASL_WALK_CALLBACK) (
#define ASL_WALK_CALLBACK_DEFINED
#endif
+typedef
+void (*ACPI_RESOURCE_HANDLER) (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
typedef struct acpi_resource_tag
{
@@ -202,6 +232,7 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoAsf4[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoAsfHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoBoot[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoBert[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoBgrt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoCpep[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoCpep0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDbgp[];
@@ -212,6 +243,7 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar3[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoDrtm[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEcdt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEinj[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEinj0[];
@@ -221,7 +253,13 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoFacs[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt3[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt5[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdtHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoGas[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoGtdt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHeader[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHest[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHest0[];
@@ -254,14 +292,34 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt7[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt8[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt9[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt10[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt11[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt12[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadtHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMcfg[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMcfg0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMchi[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0A[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0B[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst1[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMsct[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMsct0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt1[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt1a[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt2[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmttHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPcct[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPcct0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoRsdp1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoRsdp2[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3ptHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSbst[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSlicHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSlic0[];
@@ -354,6 +412,10 @@ AcpiDmDumpFadt (
ACPI_TABLE_HEADER *Table);
void
+AcpiDmDumpFpdt (
+ ACPI_TABLE_HEADER *Table);
+
+void
AcpiDmDumpHest (
ACPI_TABLE_HEADER *Table);
@@ -362,17 +424,29 @@ AcpiDmDumpIvrs (
ACPI_TABLE_HEADER *Table);
void
+AcpiDmDumpMadt (
+ ACPI_TABLE_HEADER *Table);
+
+void
AcpiDmDumpMcfg (
ACPI_TABLE_HEADER *Table);
void
-AcpiDmDumpMadt (
+AcpiDmDumpMpst (
ACPI_TABLE_HEADER *Table);
void
AcpiDmDumpMsct (
ACPI_TABLE_HEADER *Table);
+void
+AcpiDmDumpPcct (
+ ACPI_TABLE_HEADER *Table);
+
+void
+AcpiDmDumpPmtt (
+ ACPI_TABLE_HEADER *Table);
+
UINT32
AcpiDmDumpRsdp (
ACPI_TABLE_HEADER *Table);
@@ -381,6 +455,10 @@ void
AcpiDmDumpRsdt (
ACPI_TABLE_HEADER *Table);
+UINT32
+AcpiDmDumpS3pt (
+ ACPI_TABLE_HEADER *Table);
+
void
AcpiDmDumpSlic (
ACPI_TABLE_HEADER *Table);
@@ -671,6 +749,18 @@ AcpiDmVendorLargeDescriptor (
UINT32 Level);
void
+AcpiDmGpioDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
+AcpiDmSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
AcpiDmVendorCommon (
char *Name,
UINT8 *ByteData,
@@ -694,6 +784,12 @@ AcpiDmDmaDescriptor (
UINT32 Level);
void
+AcpiDmFixedDmaDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
AcpiDmIoDescriptor (
AML_RESOURCE *Resource,
UINT32 Length,
diff --git a/usr/src/uts/intel/sys/acpi/acdispat.h b/usr/src/uts/intel/sys/acpi/acdispat.h
index ae8dd93154..0699ee77a1 100644
--- a/usr/src/uts/intel/sys/acpi/acdispat.h
+++ b/usr/src/uts/intel/sys/acpi/acdispat.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acevents.h b/usr/src/uts/intel/sys/acpi/acevents.h
index 8681ed5f38..3b874f1075 100644
--- a/usr/src/uts/intel/sys/acpi/acevents.h
+++ b/usr/src/uts/intel/sys/acpi/acevents.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -85,13 +85,15 @@ ACPI_STATUS
AcpiEvInitGlobalLockHandler (
void);
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvAcquireGlobalLock(
- UINT16 Timeout);
+ UINT16 Timeout))
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvReleaseGlobalLock(
- void);
+ void))
ACPI_STATUS
AcpiEvRemoveGlobalLockHandler (
@@ -154,9 +156,10 @@ AcpiEvInitializeGpeBlock (
ACPI_GPE_BLOCK_INFO *GpeBlock,
void *Context);
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvDeleteGpeBlock (
- ACPI_GPE_BLOCK_INFO *GpeBlock);
+ ACPI_GPE_BLOCK_INFO *GpeBlock))
UINT32
AcpiEvGpeDispatch (
@@ -171,9 +174,10 @@ ACPI_STATUS
AcpiEvGpeInitialize (
void);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiEvUpdateGpes (
- ACPI_OWNER_ID TableOwnerId);
+ ACPI_OWNER_ID TableOwnerId))
ACPI_STATUS
AcpiEvMatchGpeMethod (
@@ -228,7 +232,8 @@ AcpiEvInitializeOpRegions (
ACPI_STATUS
AcpiEvAddressSpaceDispatch (
- ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *FieldObj,
UINT32 Function,
UINT32 RegionOffset,
UINT32 BitWidth,
@@ -334,9 +339,9 @@ UINT32
AcpiEvInitializeSCI (
UINT32 ProgramSCI);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiEvTerminate (
- void);
-
+ void))
#endif /* __ACEVENTS_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acexcep.h b/usr/src/uts/intel/sys/acpi/acexcep.h
index 7d426d0f26..10f5a113da 100644
--- a/usr/src/uts/intel/sys/acpi/acexcep.h
+++ b/usr/src/uts/intel/sys/acpi/acexcep.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -92,8 +92,9 @@
#define AE_SAME_HANDLER (ACPI_STATUS) (0x0019 | AE_CODE_ENVIRONMENTAL)
#define AE_NO_HANDLER (ACPI_STATUS) (0x001A | AE_CODE_ENVIRONMENTAL)
#define AE_OWNER_ID_LIMIT (ACPI_STATUS) (0x001B | AE_CODE_ENVIRONMENTAL)
+#define AE_NOT_CONFIGURED (ACPI_STATUS) (0x001C | AE_CODE_ENVIRONMENTAL)
-#define AE_CODE_ENV_MAX 0x001B
+#define AE_CODE_ENV_MAX 0x001C
/*
@@ -222,7 +223,8 @@ char const *AcpiGbl_ExceptionNames_Env[] =
"AE_ABORT_METHOD",
"AE_SAME_HANDLER",
"AE_NO_HANDLER",
- "AE_OWNER_ID_LIMIT"
+ "AE_OWNER_ID_LIMIT",
+ "AE_NOT_CONFIGURED"
};
char const *AcpiGbl_ExceptionNames_Pgm[] =
diff --git a/usr/src/uts/intel/sys/acpi/acglobal.h b/usr/src/uts/intel/sys/acpi/acglobal.h
index ebbae0f595..79e53d07e2 100644
--- a/usr/src/uts/intel/sys/acpi/acglobal.h
+++ b/usr/src/uts/intel/sys/acpi/acglobal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -128,6 +128,12 @@ UINT8 ACPI_INIT_GLOBAL (AcpiGbl_CopyDsdtLocally, FALSE);
*/
UINT8 ACPI_INIT_GLOBAL (AcpiGbl_TruncateIoAddresses, FALSE);
+/*
+ * Disable runtime checking and repair of values returned by control methods.
+ * Use only if the repair is causing a problem on a particular machine.
+ */
+UINT8 ACPI_INIT_GLOBAL (AcpiGbl_DisableAutoRepair, FALSE);
+
/* AcpiGbl_FADT is a local copy of the FADT, converted to a common format. */
@@ -137,7 +143,18 @@ UINT32 AcpiGbl_TraceFlags;
ACPI_NAME AcpiGbl_TraceMethodName;
BOOLEAN AcpiGbl_SystemAwakeAndRunning;
-#endif
+/*
+ * ACPI 5.0 introduces the concept of a "reduced hardware platform", meaning
+ * that the ACPI hardware is no longer required. A flag in the FADT indicates
+ * a reduced HW machine, and that flag is duplicated here for convenience.
+ */
+BOOLEAN AcpiGbl_ReducedHardware;
+
+#endif /* DEFINE_ACPI_GLOBALS */
+
+/* Do not disassemble buffers to resource descriptors */
+
+ACPI_EXTERN UINT8 ACPI_INIT_GLOBAL (AcpiGbl_NoResourceDisassembly, FALSE);
/*****************************************************************************
*
@@ -150,8 +167,12 @@ BOOLEAN AcpiGbl_SystemAwakeAndRunning;
* found in the RSDT/XSDT.
*/
ACPI_EXTERN ACPI_TABLE_LIST AcpiGbl_RootTableList;
+
+#if (!ACPI_REDUCED_HARDWARE)
ACPI_EXTERN ACPI_TABLE_FACS *AcpiGbl_FACS;
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* These addresses are calculated from the FADT Event Block addresses */
ACPI_EXTERN ACPI_GENERIC_ADDRESS AcpiGbl_XPm1aStatus;
@@ -177,7 +198,7 @@ ACPI_EXTERN UINT8 AcpiGbl_IntegerNybbleWidth;
/*****************************************************************************
*
- * Mutual exlusion within ACPICA subsystem
+ * Mutual exclusion within ACPICA subsystem
*
****************************************************************************/
@@ -233,8 +254,7 @@ ACPI_EXTERN ACPI_CACHE_T *AcpiGbl_OperandCache;
/* Global handlers */
-ACPI_EXTERN ACPI_OBJECT_NOTIFY_HANDLER AcpiGbl_DeviceNotify;
-ACPI_EXTERN ACPI_OBJECT_NOTIFY_HANDLER AcpiGbl_SystemNotify;
+ACPI_EXTERN ACPI_GLOBAL_NOTIFY_HANDLER AcpiGbl_GlobalNotify[2];
ACPI_EXTERN ACPI_EXCEPTION_HANDLER AcpiGbl_ExceptionHandler;
ACPI_EXTERN ACPI_INIT_HANDLER AcpiGbl_InitHandler;
ACPI_EXTERN ACPI_TABLE_HANDLER AcpiGbl_TableHandler;
@@ -265,6 +285,7 @@ ACPI_EXTERN BOOLEAN AcpiGbl_AcpiHardwarePresent;
ACPI_EXTERN BOOLEAN AcpiGbl_EventsInitialized;
ACPI_EXTERN UINT8 AcpiGbl_OsiData;
ACPI_EXTERN ACPI_INTERFACE_INFO *AcpiGbl_SupportedInterfaces;
+ACPI_EXTERN ACPI_ADDRESS_RANGE *AcpiGbl_AddressRangeList[ACPI_ADDRESS_RANGE_MAX];
#ifndef DEFINE_ACPI_GLOBALS
@@ -362,6 +383,8 @@ ACPI_EXTERN UINT8 AcpiGbl_SleepTypeB;
*
****************************************************************************/
+#if (!ACPI_REDUCED_HARDWARE)
+
ACPI_EXTERN UINT8 AcpiGbl_AllGpesInitialized;
ACPI_EXTERN ACPI_GPE_XRUPT_INFO *AcpiGbl_GpeXruptListHead;
ACPI_EXTERN ACPI_GPE_BLOCK_INFO *AcpiGbl_GpeFadtBlocks[ACPI_MAX_GPE_BLOCKS];
@@ -370,6 +393,7 @@ ACPI_EXTERN void *AcpiGbl_GlobalEventHandlerContext;
ACPI_EXTERN ACPI_FIXED_EVENT_HANDLER AcpiGbl_FixedEventHandlers[ACPI_NUM_FIXED_EVENTS];
extern ACPI_FIXED_EVENT_INFO AcpiGbl_FixedEventInfo[ACPI_NUM_FIXED_EVENTS];
+#endif /* !ACPI_REDUCED_HARDWARE */
/*****************************************************************************
*
diff --git a/usr/src/uts/intel/sys/acpi/achware.h b/usr/src/uts/intel/sys/acpi/achware.h
index ae4a77a9f2..f607d1702a 100644
--- a/usr/src/uts/intel/sys/acpi/achware.h
+++ b/usr/src/uts/intel/sys/acpi/achware.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -110,6 +110,49 @@ AcpiHwClearAcpiStatus (
/*
+ * hwsleep - sleep/wake support (Legacy sleep registers)
+ */
+ACPI_STATUS
+AcpiHwLegacySleep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwLegacyWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwLegacyWake (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+
+/*
+ * hwesleep - sleep/wake support (Extended FADT-V5 sleep registers)
+ */
+void
+AcpiHwExecuteSleepMethod (
+ char *MethodName,
+ UINT32 IntegerArgument);
+
+ACPI_STATUS
+AcpiHwExtendedSleep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwExtendedWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwExtendedWake (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+
+/*
* hwvalid - Port I/O with validation
*/
ACPI_STATUS
@@ -188,22 +231,4 @@ AcpiHwDerivePciId (
ACPI_HANDLE PciRegion);
-/*
- * hwtimer - ACPI Timer prototypes
- */
-ACPI_STATUS
-AcpiGetTimerResolution (
- UINT32 *Resolution);
-
-ACPI_STATUS
-AcpiGetTimer (
- UINT32 *Ticks);
-
-ACPI_STATUS
-AcpiGetTimerDuration (
- UINT32 StartTicks,
- UINT32 EndTicks,
- UINT32 *TimeElapsed);
-
-
#endif /* __ACHWARE_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acinterp.h b/usr/src/uts/intel/sys/acpi/acinterp.h
index 8ac828c59c..45e9814f79 100644
--- a/usr/src/uts/intel/sys/acpi/acinterp.h
+++ b/usr/src/uts/intel/sys/acpi/acinterp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -642,6 +642,10 @@ AcpiExIntegerToString (
char *Dest,
UINT64 Value);
+BOOLEAN
+AcpiIsValidSpaceId (
+ UINT8 SpaceId);
+
/*
* exregion - default OpRegion handlers
diff --git a/usr/src/uts/intel/sys/acpi/aclocal.h b/usr/src/uts/intel/sys/acpi/aclocal.h
index 2eb6fc21d4..68ea95b7c8 100644
--- a/usr/src/uts/intel/sys/acpi/aclocal.h
+++ b/usr/src/uts/intel/sys/acpi/aclocal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,7 +54,7 @@ typedef UINT32 ACPI_MUTEX_HANDLE;
/* Total number of aml opcodes defined */
-#define AML_NUM_OPCODES 0x7F
+#define AML_NUM_OPCODES 0x81
/* Forward declarations */
@@ -213,7 +213,6 @@ typedef struct acpi_namespace_node
#define ANOBJ_IS_EXTERNAL 0x08 /* iASL only: This object created via External() */
#define ANOBJ_METHOD_NO_RETVAL 0x10 /* iASL only: Method has no return value */
#define ANOBJ_METHOD_SOME_NO_RETVAL 0x20 /* iASL only: Method has at least one return value */
-#define ANOBJ_IS_BIT_OFFSET 0x40 /* iASL only: Reference is a bit offset */
#define ANOBJ_IS_REFERENCED 0x80 /* iASL only: Object was referenced */
@@ -286,12 +285,16 @@ typedef struct acpi_create_field_info
ACPI_NAMESPACE_NODE *FieldNode;
ACPI_NAMESPACE_NODE *RegisterNode;
ACPI_NAMESPACE_NODE *DataRegisterNode;
+ ACPI_NAMESPACE_NODE *ConnectionNode;
+ UINT8 *ResourceBuffer;
UINT32 BankValue;
UINT32 FieldBitPosition;
UINT32 FieldBitLength;
+ UINT16 ResourceLength;
UINT8 FieldFlags;
UINT8 Attribute;
UINT8 FieldType;
+ UINT8 AccessLength;
} ACPI_CREATE_FIELD_INFO;
@@ -359,7 +362,8 @@ typedef struct acpi_name_info
/*
* Used for ACPI_PTYPE1_FIXED, ACPI_PTYPE1_VAR, ACPI_PTYPE2,
- * ACPI_PTYPE2_MIN, ACPI_PTYPE2_PKG_COUNT, ACPI_PTYPE2_COUNT
+ * ACPI_PTYPE2_MIN, ACPI_PTYPE2_PKG_COUNT, ACPI_PTYPE2_COUNT,
+ * ACPI_PTYPE2_FIX_VAR
*/
typedef struct acpi_package_info
{
@@ -411,6 +415,7 @@ typedef struct acpi_predefined_data
char *Pathname;
const ACPI_PREDEFINED_INFO *Predefined;
union acpi_operand_object *ParentPackage;
+ ACPI_NAMESPACE_NODE *Node;
UINT32 Flags;
UINT8 NodeFlags;
@@ -419,6 +424,7 @@ typedef struct acpi_predefined_data
/* Defines for Flags field above */
#define ACPI_OBJECT_REPAIRED 1
+#define ACPI_OBJECT_WRAPPED 2
/*
@@ -710,6 +716,15 @@ ACPI_STATUS (*ACPI_PARSE_UPWARDS) (
struct acpi_walk_state *WalkState);
+/* Global handlers for AML Notifies */
+
+typedef struct acpi_global_notify_handler
+{
+ ACPI_NOTIFY_HANDLER Handler;
+ void *Context;
+
+} ACPI_GLOBAL_NOTIFY_HANDLER;
+
/*
* Notify info - used to pass info to the deferred notify
* handler/dispatcher.
@@ -717,8 +732,10 @@ ACPI_STATUS (*ACPI_PARSE_UPWARDS) (
typedef struct acpi_notify_info
{
ACPI_STATE_COMMON
+ UINT8 HandlerListId;
ACPI_NAMESPACE_NODE *Node;
- union acpi_operand_object *HandlerObj;
+ union acpi_operand_object *HandlerListHead;
+ ACPI_GLOBAL_NOTIFY_HANDLER *Global;
} ACPI_NOTIFY_INFO;
@@ -750,6 +767,17 @@ typedef
ACPI_STATUS (*ACPI_EXECUTE_OP) (
struct acpi_walk_state *WalkState);
+/* Address Range info block */
+
+typedef struct acpi_address_range
+{
+ struct acpi_address_range *Next;
+ ACPI_NAMESPACE_NODE *RegionNode;
+ ACPI_PHYSICAL_ADDRESS StartAddress;
+ ACPI_PHYSICAL_ADDRESS EndAddress;
+
+} ACPI_ADDRESS_RANGE;
+
/*****************************************************************************
*
@@ -774,6 +802,17 @@ typedef struct acpi_opcode_info
} ACPI_OPCODE_INFO;
+/* Structure for Resource Tag information */
+
+typedef struct acpi_tag_info
+{
+ UINT32 BitOffset;
+ UINT32 BitLength;
+
+} ACPI_TAG_INFO;
+
+/* Value associated with the parse object */
+
typedef union acpi_parse_value
{
UINT64 Integer; /* Integer constant (Up to 64 bits) */
@@ -782,6 +821,7 @@ typedef union acpi_parse_value
UINT8 *Buffer; /* buffer or string */
char *Name; /* NULL terminated string */
union acpi_parse_object *Arg; /* arguments and contained ops */
+ ACPI_TAG_INFO Tag; /* Resource descriptor tag info */
} ACPI_PARSE_VALUE;
@@ -1112,7 +1152,7 @@ typedef struct acpi_port_info
#define ACPI_RESOURCE_NAME_END_DEPENDENT 0x38
#define ACPI_RESOURCE_NAME_IO 0x40
#define ACPI_RESOURCE_NAME_FIXED_IO 0x48
-#define ACPI_RESOURCE_NAME_RESERVED_S1 0x50
+#define ACPI_RESOURCE_NAME_FIXED_DMA 0x50
#define ACPI_RESOURCE_NAME_RESERVED_S2 0x58
#define ACPI_RESOURCE_NAME_RESERVED_S3 0x60
#define ACPI_RESOURCE_NAME_RESERVED_S4 0x68
@@ -1134,7 +1174,9 @@ typedef struct acpi_port_info
#define ACPI_RESOURCE_NAME_EXTENDED_IRQ 0x89
#define ACPI_RESOURCE_NAME_ADDRESS64 0x8A
#define ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 0x8B
-#define ACPI_RESOURCE_NAME_LARGE_MAX 0x8B
+#define ACPI_RESOURCE_NAME_GPIO 0x8C
+#define ACPI_RESOURCE_NAME_SERIAL_BUS 0x8E
+#define ACPI_RESOURCE_NAME_LARGE_MAX 0x8E
/*****************************************************************************
diff --git a/usr/src/uts/intel/sys/acpi/acmacros.h b/usr/src/uts/intel/sys/acpi/acmacros.h
index 6581d111c9..cdb9cb109e 100644
--- a/usr/src/uts/intel/sys/acpi/acmacros.h
+++ b/usr/src/uts/intel/sys/acpi/acmacros.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -428,7 +428,6 @@
#endif /* ACPI_SIMPLE_RETURN_MACROS */
-
/* Conditional execution */
#define ACPI_DEBUG_EXEC(a) a
@@ -477,6 +476,14 @@
#endif /* ACPI_DEBUG_OUTPUT */
+
+#if (!ACPI_REDUCED_HARDWARE)
+#define ACPI_HW_OPTIONAL_FUNCTION(addr) addr
+#else
+#define ACPI_HW_OPTIONAL_FUNCTION(addr) NULL
+#endif
+
+
/*
* Some code only gets executed when the debugger is built in.
* Note that this is entirely independent of whether the
diff --git a/usr/src/uts/intel/sys/acpi/acnames.h b/usr/src/uts/intel/sys/acpi/acnames.h
index e08302dd30..4c5fab696f 100644
--- a/usr/src/uts/intel/sys/acpi/acnames.h
+++ b/usr/src/uts/intel/sys/acpi/acnames.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
/* Method names - these methods can appear anywhere in the namespace */
+#define METHOD_NAME__SB_ "_SB_"
#define METHOD_NAME__HID "_HID"
#define METHOD_NAME__CID "_CID"
#define METHOD_NAME__UID "_UID"
@@ -58,16 +59,17 @@
#define METHOD_NAME__PRT "_PRT"
#define METHOD_NAME__CRS "_CRS"
#define METHOD_NAME__PRS "_PRS"
+#define METHOD_NAME__AEI "_AEI"
#define METHOD_NAME__PRW "_PRW"
#define METHOD_NAME__SRS "_SRS"
/* Method names - these methods must appear at the namespace root */
-#define METHOD_NAME__BFS "\\_BFS"
-#define METHOD_NAME__GTS "\\_GTS"
-#define METHOD_NAME__PTS "\\_PTS"
-#define METHOD_NAME__SST "\\_SI._SST"
-#define METHOD_NAME__WAK "\\_WAK"
+#define METHOD_PATHNAME__BFS "\\_BFS"
+#define METHOD_PATHNAME__GTS "\\_GTS"
+#define METHOD_PATHNAME__PTS "\\_PTS"
+#define METHOD_PATHNAME__SST "\\_SI._SST"
+#define METHOD_PATHNAME__WAK "\\_WAK"
/* Definitions of the predefined namespace names */
@@ -78,7 +80,6 @@
#define ACPI_PREFIX_LOWER (UINT32) 0x69706361 /* "acpi" */
#define ACPI_NS_ROOT_PATH "\\"
-#define ACPI_NS_SYSTEM_BUS "_SB_"
#endif /* __ACNAMES_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acnamesp.h b/usr/src/uts/intel/sys/acpi/acnamesp.h
index 77f65d0ae4..4a91108eae 100644
--- a/usr/src/uts/intel/sys/acpi/acnamesp.h
+++ b/usr/src/uts/intel/sys/acpi/acnamesp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -368,8 +368,9 @@ AcpiNsRepairObject (
ACPI_OPERAND_OBJECT **ReturnObjectPtr);
ACPI_STATUS
-AcpiNsRepairPackageList (
+AcpiNsWrapWithPackage (
ACPI_PREDEFINED_DATA *Data,
+ ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ObjDescPtr);
ACPI_STATUS
diff --git a/usr/src/uts/intel/sys/acpi/acobject.h b/usr/src/uts/intel/sys/acpi/acobject.h
index 27a21e6858..f2f8a1cfb1 100644
--- a/usr/src/uts/intel/sys/acpi/acobject.h
+++ b/usr/src/uts/intel/sys/acpi/acobject.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -246,8 +246,7 @@ typedef struct acpi_object_method
* Common fields for objects that support ASL notifications
*/
#define ACPI_COMMON_NOTIFY_INFO \
- union acpi_operand_object *SystemNotify; /* Handler for system notifies */\
- union acpi_operand_object *DeviceNotify; /* Handler for driver notifies */\
+ union acpi_operand_object *NotifyList[2]; /* Handlers for system/device notifies */\
union acpi_operand_object *Handler; /* Handler for Address space */
@@ -320,6 +319,7 @@ typedef struct acpi_object_thermal_zone
UINT32 BaseByteOffset; /* Byte offset within containing object */\
UINT32 Value; /* Value to store into the Bank or Index register */\
UINT8 StartFieldBitOffset;/* Bit offset within first field datum (0-63) */\
+ UINT8 AccessLength; /* For serial regions/fields */
typedef struct acpi_object_field_common /* COMMON FIELD (for BUFFER, REGION, BANK, and INDEX fields) */
@@ -335,7 +335,9 @@ typedef struct acpi_object_region_field
{
ACPI_OBJECT_COMMON_HEADER
ACPI_COMMON_FIELD_INFO
+ UINT16 ResourceLength;
union acpi_operand_object *RegionObj; /* Containing OpRegion object */
+ UINT8 *ResourceBuffer; /* ResourceTemplate for serial regions/fields */
} ACPI_OBJECT_REGION_FIELD;
@@ -386,8 +388,10 @@ typedef struct acpi_object_notify_handler
{
ACPI_OBJECT_COMMON_HEADER
ACPI_NAMESPACE_NODE *Node; /* Parent device */
- ACPI_NOTIFY_HANDLER Handler;
+ UINT32 HandlerType; /* Type: Device/System/Both */
+ ACPI_NOTIFY_HANDLER Handler; /* Handler addess */
void *Context;
+ union acpi_operand_object *Next[2]; /* Device and System handler lists */
} ACPI_OBJECT_NOTIFY_HANDLER;
@@ -463,6 +467,7 @@ typedef struct acpi_object_extra
{
ACPI_OBJECT_COMMON_HEADER
ACPI_NAMESPACE_NODE *Method_REG; /* _REG method for this region (if any) */
+ ACPI_NAMESPACE_NODE *ScopeNode;
void *RegionContext; /* Region-specific data */
UINT8 *AmlStart;
UINT32 AmlLength;
diff --git a/usr/src/uts/intel/sys/acpi/acopcode.h b/usr/src/uts/intel/sys/acpi/acopcode.h
index d8e1c75a7b..e182a07e61 100644
--- a/usr/src/uts/intel/sys/acpi/acopcode.h
+++ b/usr/src/uts/intel/sys/acpi/acopcode.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,7 @@
#define ARGP_CONCAT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_CONCAT_RES_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_COND_REF_OF_OP ARGP_LIST2 (ARGP_SUPERNAME, ARGP_SUPERNAME)
+#define ARGP_CONNECTFIELD_OP ARGP_LIST1 (ARGP_NAMESTRING)
#define ARGP_CONTINUE_OP ARG_NONE
#define ARGP_COPY_OP ARGP_LIST2 (ARGP_TERMARG, ARGP_SIMPLENAME)
#define ARGP_CREATE_BIT_FIELD_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_NAME)
@@ -165,6 +166,7 @@
#define ARGP_RETURN_OP ARGP_LIST1 (ARGP_TERMARG)
#define ARGP_REVISION_OP ARG_NONE
#define ARGP_SCOPE_OP ARGP_LIST3 (ARGP_PKGLENGTH, ARGP_NAME, ARGP_TERMLIST)
+#define ARGP_SERIALFIELD_OP ARGP_LIST1 (ARGP_NAMESTRING)
#define ARGP_SHIFT_LEFT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_SHIFT_RIGHT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_SIGNAL_OP ARGP_LIST1 (ARGP_SUPERNAME)
@@ -225,6 +227,7 @@
#define ARGI_CONCAT_OP ARGI_LIST3 (ARGI_COMPUTEDATA,ARGI_COMPUTEDATA, ARGI_TARGETREF)
#define ARGI_CONCAT_RES_OP ARGI_LIST3 (ARGI_BUFFER, ARGI_BUFFER, ARGI_TARGETREF)
#define ARGI_COND_REF_OF_OP ARGI_LIST2 (ARGI_OBJECT_REF, ARGI_TARGETREF)
+#define ARGI_CONNECTFIELD_OP ARGI_INVALID_OPCODE
#define ARGI_CONTINUE_OP ARGI_INVALID_OPCODE
#define ARGI_COPY_OP ARGI_LIST2 (ARGI_ANYTYPE, ARGI_SIMPLE_TARGET)
#define ARGI_CREATE_BIT_FIELD_OP ARGI_LIST3 (ARGI_BUFFER, ARGI_INTEGER, ARGI_REFERENCE)
@@ -296,6 +299,7 @@
#define ARGI_RETURN_OP ARGI_INVALID_OPCODE
#define ARGI_REVISION_OP ARG_NONE
#define ARGI_SCOPE_OP ARGI_INVALID_OPCODE
+#define ARGI_SERIALFIELD_OP ARGI_INVALID_OPCODE
#define ARGI_SHIFT_LEFT_OP ARGI_LIST3 (ARGI_INTEGER, ARGI_INTEGER, ARGI_TARGETREF)
#define ARGI_SHIFT_RIGHT_OP ARGI_LIST3 (ARGI_INTEGER, ARGI_INTEGER, ARGI_TARGETREF)
#define ARGI_SIGNAL_OP ARGI_LIST1 (ARGI_EVENT)
diff --git a/usr/src/uts/intel/sys/acpi/acoutput.h b/usr/src/uts/intel/sys/acpi/acoutput.h
index c8007e21fb..a1d89d408c 100644
--- a/usr/src/uts/intel/sys/acpi/acoutput.h
+++ b/usr/src/uts/intel/sys/acpi/acoutput.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -72,6 +72,7 @@
#define ACPI_EXAMPLE 0x00004000
#define ACPI_DRIVER 0x00008000
#define DT_COMPILER 0x00010000
+#define ASL_PREPROCESSOR 0x00020000
#define ACPI_ALL_COMPONENTS 0x0001FFFF
#define ACPI_COMPONENT_DEFAULT (ACPI_ALL_COMPONENTS)
diff --git a/usr/src/uts/intel/sys/acpi/acparser.h b/usr/src/uts/intel/sys/acpi/acparser.h
index 83b3c4fc06..f46d3cbc2c 100644
--- a/usr/src/uts/intel/sys/acpi/acparser.h
+++ b/usr/src/uts/intel/sys/acpi/acparser.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acpi.h b/usr/src/uts/intel/sys/acpi/acpi.h
index 0f4f607120..aa40219112 100644
--- a/usr/src/uts/intel/sys/acpi/acpi.h
+++ b/usr/src/uts/intel/sys/acpi/acpi.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acpiosxf.h b/usr/src/uts/intel/sys/acpi/acpiosxf.h
index 85b0ff6108..b79d0e6ade 100644
--- a/usr/src/uts/intel/sys/acpi/acpiosxf.h
+++ b/usr/src/uts/intel/sys/acpi/acpiosxf.h
@@ -9,7 +9,7 @@
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -112,6 +112,12 @@ AcpiOsTableOverride (
ACPI_TABLE_HEADER *ExistingTable,
ACPI_TABLE_HEADER **NewTable);
+ACPI_STATUS
+AcpiOsPhysicalTableOverride (
+ ACPI_TABLE_HEADER *ExistingTable,
+ ACPI_PHYSICAL_ADDRESS *NewAddress,
+ UINT32 *NewTableLength);
+
/*
* Spinlock primitives
@@ -302,13 +308,13 @@ AcpiOsWritePort (
ACPI_STATUS
AcpiOsReadMemory (
ACPI_PHYSICAL_ADDRESS Address,
- UINT32 *Value,
+ UINT64 *Value,
UINT32 Width);
ACPI_STATUS
AcpiOsWriteMemory (
ACPI_PHYSICAL_ADDRESS Address,
- UINT32 Value,
+ UINT64 Value,
UINT32 Width);
diff --git a/usr/src/uts/intel/sys/acpi/acpixf.h b/usr/src/uts/intel/sys/acpi/acpixf.h
index 82b677e615..7478c78940 100644
--- a/usr/src/uts/intel/sys/acpi/acpixf.h
+++ b/usr/src/uts/intel/sys/acpi/acpixf.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,8 +48,9 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20110527
+#define ACPI_CA_VERSION 0x20120420
+#include "acconfig.h"
#include "actypes.h"
#include "actbl.h"
@@ -59,6 +60,7 @@
extern UINT32 AcpiCurrentGpeCount;
extern ACPI_TABLE_FADT AcpiGbl_FADT;
extern BOOLEAN AcpiGbl_SystemAwakeAndRunning;
+extern BOOLEAN AcpiGbl_ReducedHardware; /* ACPI 5.0 */
/* Runtime configuration of debug print levels */
@@ -76,6 +78,35 @@ extern UINT32 AcpiGbl_TraceFlags;
extern UINT8 AcpiGbl_EnableAmlDebugObject;
extern UINT8 AcpiGbl_CopyDsdtLocally;
extern UINT8 AcpiGbl_TruncateIoAddresses;
+extern UINT8 AcpiGbl_DisableAutoRepair;
+
+
+/*
+ * Hardware-reduced prototypes. All interfaces that use these macros will
+ * be configured out of the ACPICA build if the ACPI_REDUCED_HARDWARE flag
+ * is set to TRUE.
+ */
+#if (!ACPI_REDUCED_HARDWARE)
+#define ACPI_HW_DEPENDENT_RETURN_STATUS(Prototype) \
+ Prototype;
+
+#define ACPI_HW_DEPENDENT_RETURN_OK(Prototype) \
+ Prototype;
+
+#define ACPI_HW_DEPENDENT_RETURN_VOID(Prototype) \
+ Prototype;
+
+#else
+#define ACPI_HW_DEPENDENT_RETURN_STATUS(Prototype) \
+ static ACPI_INLINE Prototype {return(AE_NOT_CONFIGURED);}
+
+#define ACPI_HW_DEPENDENT_RETURN_OK(Prototype) \
+ static ACPI_INLINE Prototype {return(AE_OK);}
+
+#define ACPI_HW_DEPENDENT_RETURN_VOID(Prototype) \
+ static ACPI_INLINE Prototype {}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
/*
@@ -107,13 +138,15 @@ AcpiTerminate (
/*
* Miscellaneous global interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnable (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisable (
- void);
+ void))
ACPI_STATUS
AcpiSubsystemStatus (
@@ -143,6 +176,13 @@ ACPI_STATUS
AcpiRemoveInterface (
ACPI_STRING InterfaceName);
+UINT32
+AcpiCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ ACPI_SIZE Length,
+ BOOLEAN Warn);
+
/*
* ACPI Memory management
@@ -312,35 +352,40 @@ AcpiInstallInitializationHandler (
ACPI_INIT_HANDLER Handler,
UINT32 Function);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGlobalEventHandler (
ACPI_GBL_EVENT_HANDLER Handler,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallFixedEventHandler (
UINT32 AcpiEvent,
ACPI_EVENT_HANDLER Handler,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveFixedEventHandler (
UINT32 AcpiEvent,
- ACPI_EVENT_HANDLER Handler);
+ ACPI_EVENT_HANDLER Handler))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGpeHandler (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
UINT32 Type,
ACPI_GPE_HANDLER Address,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveGpeHandler (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- ACPI_GPE_HANDLER Address);
+ ACPI_GPE_HANDLER Address))
ACPI_STATUS
AcpiInstallNotifyHandler (
@@ -381,113 +426,148 @@ AcpiInstallInterfaceHandler (
/*
* Global Lock interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiAcquireGlobalLock (
UINT16 Timeout,
- UINT32 *Handle);
+ UINT32 *Handle))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiReleaseGlobalLock (
- UINT32 Handle);
+ UINT32 Handle))
+
+
+/*
+ * Interfaces to AML mutex objects
+ */
+ACPI_STATUS
+AcpiAcquireMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ UINT16 Timeout);
+
+ACPI_STATUS
+AcpiReleaseMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname);
/*
* Fixed Event interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableEvent (
UINT32 Event,
- UINT32 Flags);
+ UINT32 Flags))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableEvent (
UINT32 Event,
- UINT32 Flags);
+ UINT32 Flags))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiClearEvent (
- UINT32 Event);
+ UINT32 Event))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetEventStatus (
UINT32 Event,
- ACPI_EVENT_STATUS *EventStatus);
+ ACPI_EVENT_STATUS *EventStatus))
/*
* General Purpose Event (GPE) Interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiUpdateAllGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiClearGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetGpe (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- UINT8 Action);
+ UINT8 Action))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiFinishGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetupGpeForWake (
ACPI_HANDLE ParentDevice,
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetGpeWakeMask (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- UINT8 Action);
+ UINT8 Action))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetGpeStatus (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- ACPI_EVENT_STATUS *EventStatus);
+ ACPI_EVENT_STATUS *EventStatus))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableAllGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableAllRuntimeGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetGpeDevice (
UINT32 GpeIndex,
- ACPI_HANDLE *GpeDevice);
+ ACPI_HANDLE *GpeDevice))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGpeBlock (
ACPI_HANDLE GpeDevice,
ACPI_GENERIC_ADDRESS *GpeBlockAddress,
UINT32 RegisterCount,
- UINT32 InterruptNumber);
+ UINT32 InterruptNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveGpeBlock (
- ACPI_HANDLE GpeDevice);
+ ACPI_HANDLE GpeDevice))
/*
@@ -516,6 +596,11 @@ AcpiGetPossibleResources (
ACPI_BUFFER *RetBuffer);
ACPI_STATUS
+AcpiGetEventResources (
+ ACPI_HANDLE DeviceHandle,
+ ACPI_BUFFER *RetBuffer);
+
+ACPI_STATUS
AcpiWalkResources (
ACPI_HANDLE Device,
char *Name,
@@ -537,6 +622,12 @@ AcpiResourceToAddress64 (
ACPI_RESOURCE *Resource,
ACPI_RESOURCE_ADDRESS64 *Out);
+ACPI_STATUS
+AcpiBufferToResource (
+ UINT8 *AmlBuffer,
+ UINT16 AmlBufferLength,
+ ACPI_RESOURCE **ResourcePtr);
+
/*
* Hardware (ACPI device) interfaces
@@ -555,16 +646,22 @@ AcpiWrite (
UINT64 Value,
ACPI_GENERIC_ADDRESS *Reg);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiReadBitRegister (
UINT32 RegisterId,
- UINT32 *ReturnValue);
+ UINT32 *ReturnValue))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiWriteBitRegister (
UINT32 RegisterId,
- UINT32 Value);
+ UINT32 Value))
+
+/*
+ * Sleep/Wake interfaces
+ */
ACPI_STATUS
AcpiGetSleepTypeData (
UINT8 SleepState,
@@ -577,28 +674,58 @@ AcpiEnterSleepStatePrep (
ACPI_STATUS
AcpiEnterSleepState (
- UINT8 SleepState);
+ UINT8 SleepState,
+ UINT8 Flags);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnterSleepStateS4bios (
- void);
+ void))
+
+ACPI_STATUS
+AcpiLeaveSleepStatePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
ACPI_STATUS
AcpiLeaveSleepState (
- UINT8 SleepState)
- ;
+ UINT8 SleepState);
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetFirmwareWakingVector (
- UINT32 PhysicalAddress);
+ UINT32 PhysicalAddress))
#if ACPI_MACHINE_WIDTH == 64
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetFirmwareWakingVector64 (
- UINT64 PhysicalAddress);
+ UINT64 PhysicalAddress))
#endif
/*
+ * ACPI Timer interfaces
+ */
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimerResolution (
+ UINT32 *Resolution))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimer (
+ UINT32 *Ticks))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimerDuration (
+ UINT32 StartTicks,
+ UINT32 EndTicks,
+ UINT32 *TimeElapsed))
+
+
+/*
* Error/Warning output
*/
void ACPI_INTERNAL_VAR_XFACE
diff --git a/usr/src/uts/intel/sys/acpi/acpredef.h b/usr/src/uts/intel/sys/acpi/acpredef.h
index 0496767a86..c17a6d9145 100644
--- a/usr/src/uts/intel/sys/acpi/acpredef.h
+++ b/usr/src/uts/intel/sys/acpi/acpredef.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,14 @@
* ACPI_PTYPE2_REV_FIXED: Revision at start, each subpackage is Fixed-length
* (Used for _ART, _FPS)
*
+ * ACPI_PTYPE2_FIX_VAR: Each subpackage consists of some fixed-length elements
+ * followed by an optional element
+ * object type
+ * count
+ * object type
+ * count = 0 (optional)
+ * (Used for _DLM)
+ *
*****************************************************************************/
enum AcpiReturnPackageTypes
@@ -106,7 +114,8 @@ enum AcpiReturnPackageTypes
ACPI_PTYPE2_PKG_COUNT = 6,
ACPI_PTYPE2_FIXED = 7,
ACPI_PTYPE2_MIN = 8,
- ACPI_PTYPE2_REV_FIXED = 9
+ ACPI_PTYPE2_REV_FIXED = 9,
+ ACPI_PTYPE2_FIX_VAR = 10
};
@@ -159,6 +168,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_AC8", 0, ACPI_RTYPE_INTEGER}},
{{"_AC9", 0, ACPI_RTYPE_INTEGER}},
{{"_ADR", 0, ACPI_RTYPE_INTEGER}},
+ {{"_AEI", 0, ACPI_RTYPE_BUFFER}},
{{"_AL0", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -231,6 +241,12 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_CID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING | ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Strs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING, 0,0}, 0,0}},
+ {{"_CLS", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (3 Int) */
+ {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 3,0}, 0,0}},
+
+ {{"_CPC", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Bufs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER, 0,0}, 0,0}},
+
{{"_CRS", 0, ACPI_RTYPE_BUFFER}},
{{"_CRT", 0, ACPI_RTYPE_INTEGER}},
{{"_CSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n-1 Int) */
@@ -239,12 +255,20 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_CST", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n Pkg (1 Buf/3 Int) */
{{{ACPI_PTYPE2_PKG_COUNT,ACPI_RTYPE_BUFFER, 1, ACPI_RTYPE_INTEGER}, 3,0}},
+ {{"_CWS", 1, ACPI_RTYPE_INTEGER}},
{{"_DCK", 1, ACPI_RTYPE_INTEGER}},
{{"_DCS", 0, ACPI_RTYPE_INTEGER}},
{{"_DDC", 1, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER}},
{{"_DDN", 0, ACPI_RTYPE_STRING}},
+ {{"_DEP", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
{{"_DGS", 0, ACPI_RTYPE_INTEGER}},
{{"_DIS", 0, 0}},
+
+ {{"_DLM", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (1 Ref, 0/1 Optional Buf/Ref) */
+ {{{ACPI_PTYPE2_FIX_VAR, ACPI_RTYPE_REFERENCE, 1, ACPI_RTYPE_REFERENCE | ACPI_RTYPE_BUFFER}, 0,0}},
+
{{"_DMA", 0, ACPI_RTYPE_BUFFER}},
{{"_DOD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
@@ -264,6 +288,8 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_EJ3", 1, 0}},
{{"_EJ4", 1, 0}},
{{"_EJD", 0, ACPI_RTYPE_STRING}},
+ {{"_ERR", 3, ACPI_RTYPE_INTEGER}}, /* Internal use only, used by ACPICA test suites */
+ {{"_EVT", 1, 0}},
{{"_FDE", 0, ACPI_RTYPE_BUFFER}},
{{"_FDI", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (16 Int) */
{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,0}, 0,0}},
@@ -284,14 +310,17 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_GAI", 0, ACPI_RTYPE_INTEGER}},
+ {{"_GCP", 0, ACPI_RTYPE_INTEGER}},
{{"_GHL", 0, ACPI_RTYPE_INTEGER}},
{{"_GLK", 0, ACPI_RTYPE_INTEGER}},
{{"_GPD", 0, ACPI_RTYPE_INTEGER}},
{{"_GPE", 0, ACPI_RTYPE_INTEGER}}, /* _GPE method, not _GPE scope */
+ {{"_GRT", 0, ACPI_RTYPE_BUFFER}},
{{"_GSB", 0, ACPI_RTYPE_INTEGER}},
{{"_GTF", 0, ACPI_RTYPE_BUFFER}},
{{"_GTM", 0, ACPI_RTYPE_BUFFER}},
{{"_GTS", 1, 0}},
+ {{"_GWS", 1, ACPI_RTYPE_INTEGER}},
{{"_HID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
{{"_HOT", 0, ACPI_RTYPE_INTEGER}},
{{"_HPP", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
@@ -306,6 +335,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_HPX", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (var Ints) */
{{{ACPI_PTYPE2_MIN, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+ {{"_HRV", 0, ACPI_RTYPE_INTEGER}},
{{"_IFT", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
{{"_INI", 0, 0}},
{{"_IRC", 0, 0}},
@@ -363,6 +393,9 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_PR3", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+ {{"_PRE", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
{{"_PRL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -393,6 +426,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_PSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (5 Int) with count */
{{{ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER,0,0}, 0,0}},
+ {{"_PSE", 1, 0}},
{{"_PSL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -459,6 +493,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_SLI", 0, ACPI_RTYPE_BUFFER}},
{{"_SPD", 1, ACPI_RTYPE_INTEGER}},
{{"_SRS", 1, 0}},
+ {{"_SRT", 1, ACPI_RTYPE_INTEGER}},
{{"_SRV", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
{{"_SST", 1, 0}},
{{"_STA", 0, ACPI_RTYPE_INTEGER}},
@@ -466,6 +501,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_STP", 2, ACPI_RTYPE_INTEGER}},
{{"_STR", 0, ACPI_RTYPE_BUFFER}},
{{"_STV", 2, ACPI_RTYPE_INTEGER}},
+ {{"_SUB", 0, ACPI_RTYPE_STRING}},
{{"_SUN", 0, ACPI_RTYPE_INTEGER}},
{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
diff --git a/usr/src/uts/intel/sys/acpi/acresrc.h b/usr/src/uts/intel/sys/acpi/acresrc.h
index 00c4bb2530..c879fdc4a3 100644
--- a/usr/src/uts/intel/sys/acpi/acresrc.h
+++ b/usr/src/uts/intel/sys/acpi/acresrc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -75,28 +75,42 @@ typedef const struct acpi_rsconvert_info
/* Resource conversion opcodes */
-#define ACPI_RSC_INITGET 0
-#define ACPI_RSC_INITSET 1
-#define ACPI_RSC_FLAGINIT 2
-#define ACPI_RSC_1BITFLAG 3
-#define ACPI_RSC_2BITFLAG 4
-#define ACPI_RSC_COUNT 5
-#define ACPI_RSC_COUNT16 6
-#define ACPI_RSC_LENGTH 7
-#define ACPI_RSC_MOVE8 8
-#define ACPI_RSC_MOVE16 9
-#define ACPI_RSC_MOVE32 10
-#define ACPI_RSC_MOVE64 11
-#define ACPI_RSC_SET8 12
-#define ACPI_RSC_DATA8 13
-#define ACPI_RSC_ADDRESS 14
-#define ACPI_RSC_SOURCE 15
-#define ACPI_RSC_SOURCEX 16
-#define ACPI_RSC_BITMASK 17
-#define ACPI_RSC_BITMASK16 18
-#define ACPI_RSC_EXIT_NE 19
-#define ACPI_RSC_EXIT_LE 20
-#define ACPI_RSC_EXIT_EQ 21
+typedef enum
+{
+ ACPI_RSC_INITGET = 0,
+ ACPI_RSC_INITSET,
+ ACPI_RSC_FLAGINIT,
+ ACPI_RSC_1BITFLAG,
+ ACPI_RSC_2BITFLAG,
+ ACPI_RSC_3BITFLAG,
+ ACPI_RSC_ADDRESS,
+ ACPI_RSC_BITMASK,
+ ACPI_RSC_BITMASK16,
+ ACPI_RSC_COUNT,
+ ACPI_RSC_COUNT16,
+ ACPI_RSC_COUNT_GPIO_PIN,
+ ACPI_RSC_COUNT_GPIO_RES,
+ ACPI_RSC_COUNT_GPIO_VEN,
+ ACPI_RSC_COUNT_SERIAL_RES,
+ ACPI_RSC_COUNT_SERIAL_VEN,
+ ACPI_RSC_DATA8,
+ ACPI_RSC_EXIT_EQ,
+ ACPI_RSC_EXIT_LE,
+ ACPI_RSC_EXIT_NE,
+ ACPI_RSC_LENGTH,
+ ACPI_RSC_MOVE_GPIO_PIN,
+ ACPI_RSC_MOVE_GPIO_RES,
+ ACPI_RSC_MOVE_SERIAL_RES,
+ ACPI_RSC_MOVE_SERIAL_VEN,
+ ACPI_RSC_MOVE8,
+ ACPI_RSC_MOVE16,
+ ACPI_RSC_MOVE32,
+ ACPI_RSC_MOVE64,
+ ACPI_RSC_SET8,
+ ACPI_RSC_SOURCE,
+ ACPI_RSC_SOURCEX
+
+} ACPI_RSCONVERT_OPCODES;
/* Resource Conversion sub-opcodes */
@@ -109,6 +123,9 @@ typedef const struct acpi_rsconvert_info
#define AML_OFFSET(f) (UINT8) ACPI_OFFSET (AML_RESOURCE,f)
+/*
+ * Individual entry for the resource dump tables
+ */
typedef const struct acpi_rsdump_info
{
UINT8 Opcode;
@@ -120,20 +137,27 @@ typedef const struct acpi_rsdump_info
/* Values for the Opcode field above */
-#define ACPI_RSD_TITLE 0
-#define ACPI_RSD_LITERAL 1
-#define ACPI_RSD_STRING 2
-#define ACPI_RSD_UINT8 3
-#define ACPI_RSD_UINT16 4
-#define ACPI_RSD_UINT32 5
-#define ACPI_RSD_UINT64 6
-#define ACPI_RSD_1BITFLAG 7
-#define ACPI_RSD_2BITFLAG 8
-#define ACPI_RSD_SHORTLIST 9
-#define ACPI_RSD_LONGLIST 10
-#define ACPI_RSD_DWORDLIST 11
-#define ACPI_RSD_ADDRESS 12
-#define ACPI_RSD_SOURCE 13
+typedef enum
+{
+ ACPI_RSD_TITLE = 0,
+ ACPI_RSD_1BITFLAG,
+ ACPI_RSD_2BITFLAG,
+ ACPI_RSD_3BITFLAG,
+ ACPI_RSD_ADDRESS,
+ ACPI_RSD_DWORDLIST,
+ ACPI_RSD_LITERAL,
+ ACPI_RSD_LONGLIST,
+ ACPI_RSD_SHORTLIST,
+ ACPI_RSD_SHORTLISTX,
+ ACPI_RSD_SOURCE,
+ ACPI_RSD_STRING,
+ ACPI_RSD_UINT8,
+ ACPI_RSD_UINT16,
+ ACPI_RSD_UINT32,
+ ACPI_RSD_UINT64,
+ ACPI_RSD_WORDLIST
+
+} ACPI_RSDUMP_OPCODES;
/* restore default alignment */
@@ -143,13 +167,16 @@ typedef const struct acpi_rsdump_info
/* Resource tables indexed by internal resource type */
extern const UINT8 AcpiGbl_AmlResourceSizes[];
+extern const UINT8 AcpiGbl_AmlResourceSerialBusSizes[];
extern ACPI_RSCONVERT_INFO *AcpiGbl_SetResourceDispatch[];
/* Resource tables indexed by raw AML resource descriptor type */
extern const UINT8 AcpiGbl_ResourceStructSizes[];
+extern const UINT8 AcpiGbl_ResourceStructSerialBusSizes[];
extern ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[];
+extern ACPI_RSCONVERT_INFO *AcpiGbl_ConvertResourceSerialBusDispatch[];
typedef struct acpi_vendor_walk_info
{
@@ -208,6 +235,10 @@ AcpiRsSetSrsMethodData (
ACPI_NAMESPACE_NODE *Node,
ACPI_BUFFER *RetBuffer);
+ACPI_STATUS
+AcpiRsGetAeiMethodData (
+ ACPI_NAMESPACE_NODE *Node,
+ ACPI_BUFFER *RetBuffer);
/*
* rscalc
@@ -348,6 +379,11 @@ extern ACPI_RSCONVERT_INFO AcpiRsConvertAddress16[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertExtIrq[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertAddress64[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertExtAddress64[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertGpio[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertFixedDma[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertI2cSerialBus[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertSpiSerialBus[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertUartSerialBus[];
/* These resources require separate get/set tables */
@@ -366,6 +402,7 @@ extern ACPI_RSCONVERT_INFO AcpiRsSetVendor[];
* rsinfo
*/
extern ACPI_RSDUMP_INFO *AcpiGbl_DumpResourceDispatch[];
+extern ACPI_RSDUMP_INFO *AcpiGbl_DumpSerialBusDispatch[];
/*
* rsdump
@@ -387,6 +424,12 @@ extern ACPI_RSDUMP_INFO AcpiRsDumpAddress64[];
extern ACPI_RSDUMP_INFO AcpiRsDumpExtAddress64[];
extern ACPI_RSDUMP_INFO AcpiRsDumpExtIrq[];
extern ACPI_RSDUMP_INFO AcpiRsDumpGenericReg[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpGpio[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpFixedDma[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpCommonSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpI2cSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpSpiSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpUartSerialBus[];
#endif
#endif /* __ACRESRC_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acrestyp.h b/usr/src/uts/intel/sys/acpi/acrestyp.h
index 03e7e2ce08..b927c9c58e 100644
--- a/usr/src/uts/intel/sys/acpi/acrestyp.h
+++ b/usr/src/uts/intel/sys/acpi/acrestyp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -62,11 +62,14 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_WRITE_COMBINING_MEMORY (UINT8) 0x02
#define ACPI_PREFETCHABLE_MEMORY (UINT8) 0x03
+/*! [Begin] no source code translation */
/*
* IO Attributes
* The ISA IO ranges are: n000-n0FFh, n400-n4FFh, n800-n8FFh, nC00-nCFFh.
* The non-ISA IO ranges are: n100-n3FFh, n500-n7FFh, n900-nBFFh, nCD0-nFFFh.
*/
+/*! [End] no source code translation !*/
+
#define ACPI_NON_ISA_ONLY_RANGES (UINT8) 0x01
#define ACPI_ISA_ONLY_RANGES (UINT8) 0x02
#define ACPI_ENTIRE_RANGE (ACPI_NON_ISA_ONLY_RANGES | ACPI_ISA_ONLY_RANGES)
@@ -82,16 +85,26 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_DECODE_16 (UINT8) 0x01 /* 16-bit IO address decode */
/*
- * IRQ Attributes
+ * Interrupt attributes - used in multiple descriptors
*/
+
+/* Triggering */
+
#define ACPI_LEVEL_SENSITIVE (UINT8) 0x00
#define ACPI_EDGE_SENSITIVE (UINT8) 0x01
+/* Polarity */
+
#define ACPI_ACTIVE_HIGH (UINT8) 0x00
#define ACPI_ACTIVE_LOW (UINT8) 0x01
+#define ACPI_ACTIVE_BOTH (UINT8) 0x02
+
+/* Sharing */
#define ACPI_EXCLUSIVE (UINT8) 0x00
#define ACPI_SHARED (UINT8) 0x01
+#define ACPI_EXCLUSIVE_AND_WAKE (UINT8) 0x02
+#define ACPI_SHARED_AND_WAKE (UINT8) 0x03
/*
* DMA Attributes
@@ -128,6 +141,8 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_POS_DECODE (UINT8) 0x00
#define ACPI_SUB_DECODE (UINT8) 0x01
+/* Producer/Consumer */
+
#define ACPI_PRODUCER (UINT8) 0x00
#define ACPI_CONSUMER (UINT8) 0x01
@@ -167,7 +182,7 @@ typedef struct acpi_resource_irq
} ACPI_RESOURCE_IRQ;
-typedef struct ACPI_RESOURCE_DMA
+typedef struct acpi_resource_dma
{
UINT8 Type;
UINT8 BusMaster;
@@ -209,6 +224,24 @@ typedef struct acpi_resource_fixed_io
} ACPI_RESOURCE_FIXED_IO;
+typedef struct acpi_resource_fixed_dma
+{
+ UINT16 RequestLines;
+ UINT16 Channels;
+ UINT8 Width;
+
+} ACPI_RESOURCE_FIXED_DMA;
+
+/* Values for Width field above */
+
+#define ACPI_DMA_WIDTH8 0
+#define ACPI_DMA_WIDTH16 1
+#define ACPI_DMA_WIDTH32 2
+#define ACPI_DMA_WIDTH64 3
+#define ACPI_DMA_WIDTH128 4
+#define ACPI_DMA_WIDTH256 5
+
+
typedef struct acpi_resource_vendor
{
UINT16 ByteLength;
@@ -385,6 +418,184 @@ typedef struct acpi_resource_generic_register
} ACPI_RESOURCE_GENERIC_REGISTER;
+typedef struct acpi_resource_gpio
+{
+ UINT8 RevisionId;
+ UINT8 ConnectionType;
+ UINT8 ProducerConsumer; /* For values, see Producer/Consumer above */
+ UINT8 PinConfig;
+ UINT8 Sharable; /* For values, see Interrupt Attributes above */
+ UINT8 IoRestriction;
+ UINT8 Triggering; /* For values, see Interrupt Attributes above */
+ UINT8 Polarity; /* For values, see Interrupt Attributes above */
+ UINT16 DriveStrength;
+ UINT16 DebounceTimeout;
+ UINT16 PinTableLength;
+ UINT16 VendorLength;
+ ACPI_RESOURCE_SOURCE ResourceSource;
+ UINT16 *PinTable;
+ UINT8 *VendorData;
+
+} ACPI_RESOURCE_GPIO;
+
+/* Values for GPIO ConnectionType field above */
+
+#define ACPI_RESOURCE_GPIO_TYPE_INT 0
+#define ACPI_RESOURCE_GPIO_TYPE_IO 1
+
+/* Values for PinConfig field above */
+
+#define ACPI_PIN_CONFIG_DEFAULT 0
+#define ACPI_PIN_CONFIG_PULLUP 1
+#define ACPI_PIN_CONFIG_PULLDOWN 2
+#define ACPI_PIN_CONFIG_NOPULL 3
+
+/* Values for IoRestriction field above */
+
+#define ACPI_IO_RESTRICT_NONE 0
+#define ACPI_IO_RESTRICT_INPUT 1
+#define ACPI_IO_RESTRICT_OUTPUT 2
+#define ACPI_IO_RESTRICT_NONE_PRESERVE 3
+
+
+/* Common structure for I2C, SPI, and UART serial descriptors */
+
+#define ACPI_RESOURCE_SERIAL_COMMON \
+ UINT8 RevisionId; \
+ UINT8 Type; \
+ UINT8 ProducerConsumer; /* For values, see Producer/Consumer above */\
+ UINT8 SlaveMode; \
+ UINT8 TypeRevisionId; \
+ UINT16 TypeDataLength; \
+ UINT16 VendorLength; \
+ ACPI_RESOURCE_SOURCE ResourceSource; \
+ UINT8 *VendorData;
+
+typedef struct acpi_resource_common_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+
+} ACPI_RESOURCE_COMMON_SERIALBUS;
+
+/* Values for the Type field above */
+
+#define ACPI_RESOURCE_SERIAL_TYPE_I2C 1
+#define ACPI_RESOURCE_SERIAL_TYPE_SPI 2
+#define ACPI_RESOURCE_SERIAL_TYPE_UART 3
+
+/* Values for SlaveMode field above */
+
+#define ACPI_CONTROLLER_INITIATED 0
+#define ACPI_DEVICE_INITIATED 1
+
+
+typedef struct acpi_resource_i2c_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 AccessMode;
+ UINT16 SlaveAddress;
+ UINT32 ConnectionSpeed;
+
+} ACPI_RESOURCE_I2C_SERIALBUS;
+
+/* Values for AccessMode field above */
+
+#define ACPI_I2C_7BIT_MODE 0
+#define ACPI_I2C_10BIT_MODE 1
+
+
+typedef struct acpi_resource_spi_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 WireMode;
+ UINT8 DevicePolarity;
+ UINT8 DataBitLength;
+ UINT8 ClockPhase;
+ UINT8 ClockPolarity;
+ UINT16 DeviceSelection;
+ UINT32 ConnectionSpeed;
+
+} ACPI_RESOURCE_SPI_SERIALBUS;
+
+/* Values for WireMode field above */
+
+#define ACPI_SPI_4WIRE_MODE 0
+#define ACPI_SPI_3WIRE_MODE 1
+
+/* Values for DevicePolarity field above */
+
+#define ACPI_SPI_ACTIVE_LOW 0
+#define ACPI_SPI_ACTIVE_HIGH 1
+
+/* Values for ClockPhase field above */
+
+#define ACPI_SPI_FIRST_PHASE 0
+#define ACPI_SPI_SECOND_PHASE 1
+
+/* Values for ClockPolarity field above */
+
+#define ACPI_SPI_START_LOW 0
+#define ACPI_SPI_START_HIGH 1
+
+
+typedef struct acpi_resource_uart_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 Endian;
+ UINT8 DataBits;
+ UINT8 StopBits;
+ UINT8 FlowControl;
+ UINT8 Parity;
+ UINT8 LinesEnabled;
+ UINT16 RxFifoSize;
+ UINT16 TxFifoSize;
+ UINT32 DefaultBaudRate;
+
+} ACPI_RESOURCE_UART_SERIALBUS;
+
+/* Values for Endian field above */
+
+#define ACPI_UART_LITTLE_ENDIAN 0
+#define ACPI_UART_BIG_ENDIAN 1
+
+/* Values for DataBits field above */
+
+#define ACPI_UART_5_DATA_BITS 0
+#define ACPI_UART_6_DATA_BITS 1
+#define ACPI_UART_7_DATA_BITS 2
+#define ACPI_UART_8_DATA_BITS 3
+#define ACPI_UART_9_DATA_BITS 4
+
+/* Values for StopBits field above */
+
+#define ACPI_UART_NO_STOP_BITS 0
+#define ACPI_UART_1_STOP_BIT 1
+#define ACPI_UART_1P5_STOP_BITS 2
+#define ACPI_UART_2_STOP_BITS 3
+
+/* Values for FlowControl field above */
+
+#define ACPI_UART_FLOW_CONTROL_NONE 0
+#define ACPI_UART_FLOW_CONTROL_HW 1
+#define ACPI_UART_FLOW_CONTROL_XON_XOFF 2
+
+/* Values for Parity field above */
+
+#define ACPI_UART_PARITY_NONE 0
+#define ACPI_UART_PARITY_EVEN 1
+#define ACPI_UART_PARITY_ODD 2
+#define ACPI_UART_PARITY_MARK 3
+#define ACPI_UART_PARITY_SPACE 4
+
+/* Values for LinesEnabled bitfield above */
+
+#define ACPI_UART_CARRIER_DETECT (1<<2)
+#define ACPI_UART_RING_INDICATOR (1<<3)
+#define ACPI_UART_DATA_SET_READY (1<<4)
+#define ACPI_UART_DATA_TERMINAL_READY (1<<5)
+#define ACPI_UART_CLEAR_TO_SEND (1<<6)
+#define ACPI_UART_REQUEST_TO_SEND (1<<7)
+
/* ACPI_RESOURCE_TYPEs */
@@ -405,7 +616,10 @@ typedef struct acpi_resource_generic_register
#define ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 14 /* ACPI 3.0 */
#define ACPI_RESOURCE_TYPE_EXTENDED_IRQ 15
#define ACPI_RESOURCE_TYPE_GENERIC_REGISTER 16
-#define ACPI_RESOURCE_TYPE_MAX 16
+#define ACPI_RESOURCE_TYPE_GPIO 17 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_FIXED_DMA 18 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_SERIAL_BUS 19 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_MAX 19
/* Master union for resource descriptors */
@@ -416,6 +630,7 @@ typedef union acpi_resource_data
ACPI_RESOURCE_START_DEPENDENT StartDpf;
ACPI_RESOURCE_IO Io;
ACPI_RESOURCE_FIXED_IO FixedIo;
+ ACPI_RESOURCE_FIXED_DMA FixedDma;
ACPI_RESOURCE_VENDOR Vendor;
ACPI_RESOURCE_VENDOR_TYPED VendorTyped;
ACPI_RESOURCE_END_TAG EndTag;
@@ -428,6 +643,11 @@ typedef union acpi_resource_data
ACPI_RESOURCE_EXTENDED_ADDRESS64 ExtAddress64;
ACPI_RESOURCE_EXTENDED_IRQ ExtendedIrq;
ACPI_RESOURCE_GENERIC_REGISTER GenericReg;
+ ACPI_RESOURCE_GPIO Gpio;
+ ACPI_RESOURCE_I2C_SERIALBUS I2cSerialBus;
+ ACPI_RESOURCE_SPI_SERIALBUS SpiSerialBus;
+ ACPI_RESOURCE_UART_SERIALBUS UartSerialBus;
+ ACPI_RESOURCE_COMMON_SERIALBUS CommonSerialBus;
/* Common fields */
diff --git a/usr/src/uts/intel/sys/acpi/acstruct.h b/usr/src/uts/intel/sys/acpi/acstruct.h
index ff588f5234..b3927caa41 100644
--- a/usr/src/uts/intel/sys/acpi/acstruct.h
+++ b/usr/src/uts/intel/sys/acpi/acstruct.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/actables.h b/usr/src/uts/intel/sys/acpi/actables.h
index 6aad862bc9..26a7947dc9 100644
--- a/usr/src/uts/intel/sys/acpi/actables.h
+++ b/usr/src/uts/intel/sys/acpi/actables.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -84,6 +84,11 @@ ACPI_STATUS
AcpiTbVerifyTable (
ACPI_TABLE_DESC *TableDesc);
+ACPI_TABLE_HEADER *
+AcpiTbTableOverride (
+ ACPI_TABLE_HEADER *TableHeader,
+ ACPI_TABLE_DESC *TableDesc);
+
ACPI_STATUS
AcpiTbAddTable (
ACPI_TABLE_DESC *TableDesc,
diff --git a/usr/src/uts/intel/sys/acpi/actbl.h b/usr/src/uts/intel/sys/acpi/actbl.h
index e632291cd0..7da7378d14 100644
--- a/usr/src/uts/intel/sys/acpi/actbl.h
+++ b/usr/src/uts/intel/sys/acpi/actbl.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -298,6 +298,8 @@ typedef struct acpi_table_fadt
ACPI_GENERIC_ADDRESS XPmTimerBlock; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
ACPI_GENERIC_ADDRESS XGpe0Block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */
ACPI_GENERIC_ADDRESS XGpe1Block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */
+ ACPI_GENERIC_ADDRESS SleepControl; /* 64-bit Sleep Control register */
+ ACPI_GENERIC_ADDRESS SleepStatus; /* 64-bit Sleep Status register */
} ACPI_TABLE_FADT;
@@ -309,6 +311,7 @@ typedef struct acpi_table_fadt
#define ACPI_FADT_NO_VGA (1<<2) /* 02: [V4] It is not safe to probe for VGA hardware */
#define ACPI_FADT_NO_MSI (1<<3) /* 03: [V4] Message Signaled Interrupts (MSI) must not be enabled */
#define ACPI_FADT_NO_ASPM (1<<4) /* 04: [V4] PCIe ASPM control must not be enabled */
+#define ACPI_FADT_NO_CMOS_RTC (1<<5) /* 05: [V5] No CMOS real-time clock present */
/* Masks for FADT flags */
@@ -332,6 +335,8 @@ typedef struct acpi_table_fadt
#define ACPI_FADT_REMOTE_POWER_ON (1<<17) /* 17: [V4] System is compatible with remote power on (ACPI 3.0) */
#define ACPI_FADT_APIC_CLUSTER (1<<18) /* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */
#define ACPI_FADT_APIC_PHYSICAL (1<<19) /* 19: [V4] All local xAPICs must use physical dest mode (ACPI 3.0) */
+#define ACPI_FADT_HW_REDUCED (1<<20) /* 20: [V5] ACPI hardware is not implemented (ACPI 5.0) */
+#define ACPI_FADT_LOW_POWER_S0 (1<<21) /* 21: [V5] S0 power savings are equal or better than S3 (ACPI 5.0) */
/* Values for PreferredProfile (Prefered Power Management Profiles) */
@@ -344,9 +349,18 @@ enum AcpiPreferedPmProfiles
PM_WORKSTATION = 3,
PM_ENTERPRISE_SERVER = 4,
PM_SOHO_SERVER = 5,
- PM_APPLIANCE_PC = 6
+ PM_APPLIANCE_PC = 6,
+ PM_PERFORMANCE_SERVER = 7,
+ PM_TABLET = 8
};
+/* Values for SleepStatus and SleepControl registers (V5 FADT) */
+
+#define ACPI_X_WAKE_STATUS 0x80
+#define ACPI_X_SLEEP_TYPE_MASK 0x1C
+#define ACPI_X_SLEEP_TYPE_POSITION 0x02
+#define ACPI_X_SLEEP_ENABLE 0x20
+
/* Reset to default packing */
@@ -392,10 +406,11 @@ typedef struct acpi_table_desc
*/
#include "actbl1.h"
#include "actbl2.h"
+#include "actbl3.h"
/* Macros used to generate offsets to specific table fields */
-#define ACPI_FADT_OFFSET(f) (UINT8) ACPI_OFFSET (ACPI_TABLE_FADT, f)
+#define ACPI_FADT_OFFSET(f) (UINT16) ACPI_OFFSET (ACPI_TABLE_FADT, f)
/*
* Sizes of the various flavors of FADT. We need to look closely
@@ -405,12 +420,15 @@ typedef struct acpi_table_desc
* FADT is the bottom line as to what the version really is.
*
* For reference, the values below are as follows:
- * FADT V1 size: 0x74
- * FADT V2 size: 0x84
- * FADT V3+ size: 0xF4
+ * FADT V1 size: 0x074
+ * FADT V2 size: 0x084
+ * FADT V3 size: 0x0F4
+ * FADT V4 size: 0x0F4
+ * FADT V5 size: 0x10C
*/
#define ACPI_FADT_V1_SIZE (UINT32) (ACPI_FADT_OFFSET (Flags) + 4)
#define ACPI_FADT_V2_SIZE (UINT32) (ACPI_FADT_OFFSET (Reserved4[0]) + 3)
-#define ACPI_FADT_V3_SIZE (UINT32) (sizeof (ACPI_TABLE_FADT))
+#define ACPI_FADT_V3_SIZE (UINT32) (ACPI_FADT_OFFSET (SleepControl))
+#define ACPI_FADT_V5_SIZE (UINT32) (sizeof (ACPI_TABLE_FADT))
#endif /* __ACTBL_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/actbl1.h b/usr/src/uts/intel/sys/acpi/actbl1.h
index ad6c82fc3c..824135d3dd 100644
--- a/usr/src/uts/intel/sys/acpi/actbl1.h
+++ b/usr/src/uts/intel/sys/acpi/actbl1.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -252,16 +252,17 @@ typedef struct acpi_einj_entry
enum AcpiEinjActions
{
- ACPI_EINJ_BEGIN_OPERATION = 0,
- ACPI_EINJ_GET_TRIGGER_TABLE = 1,
- ACPI_EINJ_SET_ERROR_TYPE = 2,
- ACPI_EINJ_GET_ERROR_TYPE = 3,
- ACPI_EINJ_END_OPERATION = 4,
- ACPI_EINJ_EXECUTE_OPERATION = 5,
- ACPI_EINJ_CHECK_BUSY_STATUS = 6,
- ACPI_EINJ_GET_COMMAND_STATUS = 7,
- ACPI_EINJ_ACTION_RESERVED = 8, /* 8 and greater are reserved */
- ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
+ ACPI_EINJ_BEGIN_OPERATION = 0,
+ ACPI_EINJ_GET_TRIGGER_TABLE = 1,
+ ACPI_EINJ_SET_ERROR_TYPE = 2,
+ ACPI_EINJ_GET_ERROR_TYPE = 3,
+ ACPI_EINJ_END_OPERATION = 4,
+ ACPI_EINJ_EXECUTE_OPERATION = 5,
+ ACPI_EINJ_CHECK_BUSY_STATUS = 6,
+ ACPI_EINJ_GET_COMMAND_STATUS = 7,
+ ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
+ ACPI_EINJ_ACTION_RESERVED = 9, /* 9 and greater are reserved */
+ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
};
/* Values for Instruction field above */
@@ -273,9 +274,33 @@ enum AcpiEinjInstructions
ACPI_EINJ_WRITE_REGISTER = 2,
ACPI_EINJ_WRITE_REGISTER_VALUE = 3,
ACPI_EINJ_NOOP = 4,
- ACPI_EINJ_INSTRUCTION_RESERVED = 5 /* 5 and greater are reserved */
+ ACPI_EINJ_FLUSH_CACHELINE = 5,
+ ACPI_EINJ_INSTRUCTION_RESERVED = 6 /* 6 and greater are reserved */
};
+typedef struct acpi_einj_error_type_with_addr
+{
+ UINT32 ErrorType;
+ UINT32 VendorStructOffset;
+ UINT32 Flags;
+ UINT32 ApicId;
+ UINT64 Address;
+ UINT64 Range;
+ UINT32 PcieId;
+
+} ACPI_EINJ_ERROR_TYPE_WITH_ADDR;
+
+typedef struct acpi_einj_vendor
+{
+ UINT32 Length;
+ UINT32 PcieId;
+ UINT16 VendorId;
+ UINT16 DeviceId;
+ UINT8 RevisionId;
+ UINT8 Reserved[3];
+
+} ACPI_EINJ_VENDOR;
+
/* EINJ Trigger Error Action Table */
@@ -313,6 +338,7 @@ enum AcpiEinjCommandStatus
#define ACPI_EINJ_PLATFORM_CORRECTABLE (1<<9)
#define ACPI_EINJ_PLATFORM_UNCORRECTABLE (1<<10)
#define ACPI_EINJ_PLATFORM_FATAL (1<<11)
+#define ACPI_EINJ_VENDOR_DEFINED (1<<31)
/*******************************************************************************
@@ -731,7 +757,9 @@ enum AcpiMadtType
ACPI_MADT_TYPE_INTERRUPT_SOURCE = 8,
ACPI_MADT_TYPE_LOCAL_X2APIC = 9,
ACPI_MADT_TYPE_LOCAL_X2APIC_NMI = 10,
- ACPI_MADT_TYPE_RESERVED = 11 /* 11 and greater are reserved */
+ ACPI_MADT_TYPE_GENERIC_INTERRUPT = 11,
+ ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR = 12,
+ ACPI_MADT_TYPE_RESERVED = 13 /* 13 and greater are reserved */
};
@@ -886,11 +914,42 @@ typedef struct acpi_madt_local_x2apic_nmi
} ACPI_MADT_LOCAL_X2APIC_NMI;
+/* 11: Generic Interrupt (ACPI 5.0) */
+
+typedef struct acpi_madt_generic_interrupt
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT16 Reserved; /* Reserved - must be zero */
+ UINT32 GicId;
+ UINT32 Uid;
+ UINT32 Flags;
+ UINT32 ParkingVersion;
+ UINT32 PerformanceInterrupt;
+ UINT64 ParkedAddress;
+ UINT64 BaseAddress;
+
+} ACPI_MADT_GENERIC_INTERRUPT;
+
+
+/* 12: Generic Distributor (ACPI 5.0) */
+
+typedef struct acpi_madt_generic_distributor
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT16 Reserved; /* Reserved - must be zero */
+ UINT32 GicId;
+ UINT64 BaseAddress;
+ UINT32 GlobalIrqBase;
+ UINT32 Reserved2; /* Reserved - must be zero */
+
+} ACPI_MADT_GENERIC_DISTRIBUTOR;
+
+
/*
* Common flags fields for MADT subtables
*/
-/* MADT Local APIC flags (LapicFlags) */
+/* MADT Local APIC flags (LapicFlags) and GIC flags */
#define ACPI_MADT_ENABLED (1) /* 00: Processor is usable if set */
diff --git a/usr/src/uts/intel/sys/acpi/actbl2.h b/usr/src/uts/intel/sys/acpi/actbl2.h
index e4f01e3f6b..70c6a09a3c 100644
--- a/usr/src/uts/intel/sys/acpi/actbl2.h
+++ b/usr/src/uts/intel/sys/acpi/actbl2.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/actbl3.h b/usr/src/uts/intel/sys/acpi/actbl3.h
new file mode 100644
index 0000000000..c5dd4184dd
--- /dev/null
+++ b/usr/src/uts/intel/sys/acpi/actbl3.h
@@ -0,0 +1,650 @@
+/******************************************************************************
+ *
+ * Name: actbl3.h - ACPI Table Definitions
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef __ACTBL3_H__
+#define __ACTBL3_H__
+
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (3)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
+ ******************************************************************************/
+
+
+/*
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
+ */
+#define ACPI_SIG_BGRT "BGRT" /* Boot Graphics Resource Table */
+#define ACPI_SIG_DRTM "DRTM" /* Dynamic Root of Trust for Measurement table */
+#define ACPI_SIG_FPDT "FPDT" /* Firmware Performance Data Table */
+#define ACPI_SIG_GTDT "GTDT" /* Generic Timer Description Table */
+#define ACPI_SIG_MPST "MPST" /* Memory Power State Table */
+#define ACPI_SIG_PCCT "PCCT" /* Platform Communications Channel Table */
+#define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */
+#define ACPI_SIG_RASF "RASF" /* RAS Feature table */
+
+#define ACPI_SIG_S3PT "S3PT" /* S3 Performance (sub)Table */
+#define ACPI_SIG_PCCS "PCC" /* PCC Shared Memory Region */
+
+/* Reserved table signatures */
+
+#define ACPI_SIG_CSRT "CSRT" /* Core System Resources Table */
+#define ACPI_SIG_DBG2 "DBG2" /* Debug Port table 2 */
+#define ACPI_SIG_MATR "MATR" /* Memory Address Translation Table */
+#define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */
+#define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note about bitfields: The UINT8 type is used for bitfields in ACPI tables.
+ * This is the only type that is even remotely portable. Anything else is not
+ * portable, so do not use any other bitfield types.
+ */
+
+
+/*******************************************************************************
+ *
+ * BGRT - Boot Graphics Resource Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_bgrt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT16 Version;
+ UINT8 Status;
+ UINT8 ImageType;
+ UINT64 ImageAddress;
+ UINT32 ImageOffsetX;
+ UINT32 ImageOffsetY;
+
+} ACPI_TABLE_BGRT;
+
+
+/*******************************************************************************
+ *
+ * DRTM - Dynamic Root of Trust for Measurement table
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_drtm
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT64 EntryBaseAddress;
+ UINT64 EntryLength;
+ UINT32 EntryAddress32;
+ UINT64 EntryAddress64;
+ UINT64 ExitAddress;
+ UINT64 LogAreaAddress;
+ UINT32 LogAreaLength;
+ UINT64 ArchDependentAddress;
+ UINT32 Flags;
+
+} ACPI_TABLE_DRTM;
+
+/* 1) Validated Tables List */
+
+typedef struct acpi_drtm_vtl_list
+{
+ UINT32 ValidatedTableListCount;
+
+} ACPI_DRTM_VTL_LIST;
+
+/* 2) Resources List */
+
+typedef struct acpi_drtm_resource_list
+{
+ UINT32 ResourceListCount;
+
+} ACPI_DRTM_RESOURCE_LIST;
+
+/* 3) Platform-specific Identifiers List */
+
+typedef struct acpi_drtm_id_list
+{
+ UINT32 IdListCount;
+
+} ACPI_DRTM_ID_LIST;
+
+
+/*******************************************************************************
+ *
+ * FPDT - Firmware Performance Data Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_fpdt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+
+} ACPI_TABLE_FPDT;
+
+
+/* FPDT subtable header */
+
+typedef struct acpi_fpdt_header
+{
+ UINT16 Type;
+ UINT8 Length;
+ UINT8 Revision;
+
+} ACPI_FPDT_HEADER;
+
+/* Values for Type field above */
+
+enum AcpiFpdtType
+{
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+
+/*
+ * FPDT subtables
+ */
+
+/* 0: Firmware Basic Boot Performance Record */
+
+typedef struct acpi_fpdt_boot
+{
+ ACPI_FPDT_HEADER Header;
+ UINT8 Reserved[4];
+ UINT64 ResetEnd;
+ UINT64 LoadStart;
+ UINT64 StartupStart;
+ UINT64 ExitServicesEntry;
+ UINT64 ExitServicesExit;
+
+} ACPI_FPDT_BOOT;
+
+
+/* 1: S3 Performance Table Pointer Record */
+
+typedef struct acpi_fpdt_s3pt_ptr
+{
+ ACPI_FPDT_HEADER Header;
+ UINT8 Reserved[4];
+ UINT64 Address;
+
+} ACPI_FPDT_S3PT_PTR;
+
+
+/*
+ * S3PT - S3 Performance Table. This table is pointed to by the
+ * FPDT S3 Pointer Record above.
+ */
+typedef struct acpi_table_s3pt
+{
+ UINT8 Signature[4]; /* "S3PT" */
+ UINT32 Length;
+
+} ACPI_TABLE_S3PT;
+
+
+/*
+ * S3PT Subtables
+ */
+typedef struct acpi_s3pt_header
+{
+ UINT16 Type;
+ UINT8 Length;
+ UINT8 Revision;
+
+} ACPI_S3PT_HEADER;
+
+/* Values for Type field above */
+
+enum AcpiS3ptType
+{
+ ACPI_S3PT_TYPE_RESUME = 0,
+ ACPI_S3PT_TYPE_SUSPEND = 1,
+};
+
+typedef struct acpi_s3pt_resume
+{
+ ACPI_S3PT_HEADER Header;
+ UINT32 ResumeCount;
+ UINT64 FullResume;
+ UINT64 AverageResume;
+
+} ACPI_S3PT_RESUME;
+
+typedef struct acpi_s3pt_suspend
+{
+ ACPI_S3PT_HEADER Header;
+ UINT64 SuspendStart;
+ UINT64 SuspendEnd;
+
+} ACPI_S3PT_SUSPEND;
+
+
+/*******************************************************************************
+ *
+ * GTDT - Generic Timer Description Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_gtdt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT64 Address;
+ UINT32 Flags;
+ UINT32 SecurePl1Interrupt;
+ UINT32 SecurePl1Flags;
+ UINT32 NonSecurePl1Interrupt;
+ UINT32 NonSecurePl1Flags;
+ UINT32 VirtualTimerInterrupt;
+ UINT32 VirtualTimerFlags;
+ UINT32 NonSecurePl2Interrupt;
+ UINT32 NonSecurePl2Flags;
+
+} ACPI_TABLE_GTDT;
+
+/* Values for Flags field above */
+
+#define ACPI_GTDT_MAPPED_BLOCK_PRESENT 1
+
+/* Values for all "TimerFlags" fields above */
+
+#define ACPI_GTDT_INTERRUPT_MODE 1
+#define ACPI_GTDT_INTERRUPT_POLARITY 2
+
+
+/*******************************************************************************
+ *
+ * MPST - Memory Power State Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+#define ACPI_MPST_CHANNEL_INFO \
+ UINT16 Reserved1; \
+ UINT8 ChannelId; \
+ UINT8 Reserved2; \
+ UINT16 PowerNodeCount;
+
+/* Main table */
+
+typedef struct acpi_table_mpst
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+
+} ACPI_TABLE_MPST;
+
+
+/* Memory Platform Communication Channel Info */
+
+typedef struct acpi_mpst_channel
+{
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+
+} ACPI_MPST_CHANNEL;
+
+
+/* Memory Power Node Structure */
+
+typedef struct acpi_mpst_power_node
+{
+ UINT8 Flags;
+ UINT8 Reserved1;
+ UINT16 NodeId;
+ UINT32 Length;
+ UINT64 RangeAddress;
+ UINT64 RangeLength;
+ UINT8 NumPowerStates;
+ UINT8 NumPhysicalComponents;
+ UINT16 Reserved2;
+
+} ACPI_MPST_POWER_NODE;
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_ENABLED 1
+#define ACPI_MPST_POWER_MANAGED 2
+#define ACPI_MPST_HOT_PLUG_CAPABLE 4
+
+
+/* Memory Power State Structure (follows POWER_NODE above) */
+
+typedef struct acpi_mpst_power_state
+{
+ UINT8 PowerState;
+ UINT8 InfoIndex;
+
+} ACPI_MPST_POWER_STATE;
+
+
+/* Physical Component ID Structure (follows POWER_STATE above) */
+
+typedef struct acpi_mpst_component
+{
+ UINT16 ComponentId;
+
+} ACPI_MPST_COMPONENT;
+
+
+/* Memory Power State Characteristics Structure (follows all POWER_NODEs) */
+
+typedef struct acpi_mpst_data_hdr
+{
+ UINT16 CharacteristicsCount;
+
+} ACPI_MPST_DATA_HDR;
+
+typedef struct acpi_mpst_power_data
+{
+ UINT8 Revision;
+ UINT8 Flags;
+ UINT16 Reserved1;
+ UINT32 AveragePower;
+ UINT32 PowerSaving;
+ UINT64 ExitLatency;
+ UINT64 Reserved2;
+
+} ACPI_MPST_POWER_DATA;
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_PRESERVE 1
+#define ACPI_MPST_AUTOENTRY 2
+#define ACPI_MPST_AUTOEXIT 4
+
+
+/* Shared Memory Region (not part of an ACPI table) */
+
+typedef struct acpi_mpst_shared
+{
+ UINT32 Signature;
+ UINT16 PccCommand;
+ UINT16 PccStatus;
+ UINT16 CommandRegister;
+ UINT16 StatusRegister;
+ UINT16 PowerStateId;
+ UINT16 PowerNodeId;
+ UINT64 EnergyConsumed;
+ UINT64 AveragePower;
+
+} ACPI_MPST_SHARED;
+
+
+/*******************************************************************************
+ *
+ * PCCT - Platform Communications Channel Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_pcct
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT32 Flags;
+ UINT32 Latency;
+ UINT32 Reserved;
+
+} ACPI_TABLE_PCCT;
+
+/* Values for Flags field above */
+
+#define ACPI_PCCT_DOORBELL 1
+
+/*
+ * PCCT subtables
+ */
+
+/* 0: Generic Communications Subspace */
+
+typedef struct acpi_pcct_subspace
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT8 Reserved[6];
+ UINT64 BaseAddress;
+ UINT64 Length;
+ ACPI_GENERIC_ADDRESS DoorbellRegister;
+ UINT64 PreserveMask;
+ UINT64 WriteMask;
+
+} ACPI_PCCT_SUBSPACE;
+
+
+/*
+ * PCC memory structures (not part of the ACPI table)
+ */
+
+/* Shared Memory Region */
+
+typedef struct acpi_pcct_shared_memory
+{
+ UINT32 Signature;
+ UINT16 Command;
+ UINT16 Status;
+
+} ACPI_PCCT_SHARED_MEMORY;
+
+
+/*******************************************************************************
+ *
+ * PMTT - Platform Memory Topology Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_pmtt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT32 Reserved;
+
+} ACPI_TABLE_PMTT;
+
+
+/* Common header for PMTT subtables that follow main table */
+
+typedef struct acpi_pmtt_header
+{
+ UINT8 Type;
+ UINT8 Reserved1;
+ UINT16 Length;
+ UINT16 Flags;
+ UINT16 Reserved2;
+
+} ACPI_PMTT_HEADER;
+
+/* Values for Type field above */
+
+#define ACPI_PMTT_TYPE_SOCKET 0
+#define ACPI_PMTT_TYPE_CONTROLLER 1
+#define ACPI_PMTT_TYPE_DIMM 2
+#define ACPI_PMTT_TYPE_RESERVED 3 /* 0x03-0xFF are reserved */
+
+/* Values for Flags field above */
+
+#define ACPI_PMTT_TOP_LEVEL 0x0001
+#define ACPI_PMTT_PHYSICAL 0x0002
+#define ACPI_PMTT_MEMORY_TYPE 0x000C
+
+
+/*
+ * PMTT subtables, correspond to Type in acpi_pmtt_header
+ */
+
+
+/* 0: Socket Structure */
+
+typedef struct acpi_pmtt_socket
+{
+ ACPI_PMTT_HEADER Header;
+ UINT16 SocketId;
+ UINT16 Reserved;
+
+} ACPI_PMTT_SOCKET;
+
+
+/* 1: Memory Controller subtable */
+
+typedef struct acpi_pmtt_controller
+{
+ ACPI_PMTT_HEADER Header;
+ UINT32 ReadLatency;
+ UINT32 WriteLatency;
+ UINT32 ReadBandwidth;
+ UINT32 WriteBandwidth;
+ UINT16 AccessWidth;
+ UINT16 Alignment;
+ UINT16 Reserved;
+ UINT16 DomainCount;
+
+} ACPI_PMTT_CONTROLLER;
+
+/* 1a: Proximity Domain substructure */
+
+typedef struct acpi_pmtt_domain
+{
+ UINT32 ProximityDomain;
+
+} ACPI_PMTT_DOMAIN;
+
+
+/* 2: Physical Component Identifier (DIMM) */
+
+typedef struct acpi_pmtt_physical_component
+{
+ ACPI_PMTT_HEADER Header;
+ UINT16 ComponentId;
+ UINT16 Reserved;
+ UINT32 MemorySize;
+ UINT32 BiosHandle;
+
+} ACPI_PMTT_PHYSICAL_COMPONENT;
+
+
+/*******************************************************************************
+ *
+ * RASF - RAS Feature Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_rasf
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT8 ChannelId[12];
+
+} ACPI_TABLE_RASF;
+
+/* RASF Platform Communication Channel Shared Memory Region */
+
+typedef struct acpi_rasf_shared_memory
+{
+ UINT32 Signature;
+ UINT16 Command;
+ UINT16 Status;
+ UINT64 RequestedAddress;
+ UINT64 RequestedLength;
+ UINT64 ActualAddress;
+ UINT64 ActualLength;
+ UINT16 Flags;
+ UINT8 Speed;
+
+} ACPI_RASF_SHARED_MEMORY;
+
+/* Masks for Flags and Speed fields above */
+
+#define ACPI_RASF_SCRUBBER_RUNNING 1
+#define ACPI_RASF_SPEED (7<<1)
+
+/* Channel Commands */
+
+enum AcpiRasfCommands
+{
+ ACPI_RASF_GET_RAS_CAPABILITIES = 1,
+ ACPI_RASF_GET_PATROL_PARAMETERS = 2,
+ ACPI_RASF_START_PATROL_SCRUBBER = 3,
+ ACPI_RASF_STOP_PATROL_SCRUBBER = 4
+};
+
+/* Channel Command flags */
+
+#define ACPI_RASF_GENERATE_SCI (1<<15)
+
+/* Status values */
+
+enum AcpiRasfStatus
+{
+ ACPI_RASF_SUCCESS = 0,
+ ACPI_RASF_NOT_VALID = 1,
+ ACPI_RASF_NOT_SUPPORTED = 2,
+ ACPI_RASF_BUSY = 3,
+ ACPI_RASF_FAILED = 4,
+ ACPI_RASF_ABORTED = 5,
+ ACPI_RASF_INVALID_DATA = 6
+};
+
+/* Status flags */
+
+#define ACPI_RASF_COMMAND_COMPLETE (1)
+#define ACPI_RASF_SCI_DOORBELL (1<<1)
+#define ACPI_RASF_ERROR (1<<2)
+#define ACPI_RASF_STATUS (0x1F<<3)
+
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif /* __ACTBL3_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/actypes.h b/usr/src/uts/intel/sys/acpi/actypes.h
index 095e589607..4a5188544e 100644
--- a/usr/src/uts/intel/sys/acpi/actypes.h
+++ b/usr/src/uts/intel/sys/acpi/actypes.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -520,6 +520,13 @@ typedef UINT64 ACPI_INTEGER;
#define ACPI_SLEEP_TYPE_INVALID 0xFF
/*
+ * Sleep/Wake flags
+ */
+#define ACPI_NO_OPTIONAL_METHODS 0x00 /* Do not execute any optional methods */
+#define ACPI_EXECUTE_GTS 0x01 /* For enter sleep interface */
+#define ACPI_EXECUTE_BFS 0x02 /* For leave sleep prep interface */
+
+/*
* Standard notify values
*/
#define ACPI_NOTIFY_BUS_CHECK (UINT8) 0x00
@@ -534,8 +541,9 @@ typedef UINT64 ACPI_INTEGER;
#define ACPI_NOTIFY_DEVICE_PLD_CHECK (UINT8) 0x09
#define ACPI_NOTIFY_RESERVED (UINT8) 0x0A
#define ACPI_NOTIFY_LOCALITY_UPDATE (UINT8) 0x0B
+#define ACPI_NOTIFY_SHUTDOWN_REQUEST (UINT8) 0x0C
-#define ACPI_NOTIFY_MAX 0x0B
+#define ACPI_NOTIFY_MAX 0x0C
/*
* Types associated with ACPI names and objects. The first group of
@@ -700,8 +708,13 @@ typedef UINT32 ACPI_EVENT_STATUS;
#define ACPI_DEVICE_NOTIFY 0x2
#define ACPI_ALL_NOTIFY (ACPI_SYSTEM_NOTIFY | ACPI_DEVICE_NOTIFY)
#define ACPI_MAX_NOTIFY_HANDLER_TYPE 0x3
+#define ACPI_NUM_NOTIFY_TYPES 2
+
+#define ACPI_MAX_SYS_NOTIFY 0x7F
+#define ACPI_MAX_DEVICE_SPECIFIC_NOTIFY 0xBF
-#define ACPI_MAX_SYS_NOTIFY 0x7f
+#define ACPI_SYSTEM_HANDLER_LIST 0 /* Used as index, must be SYSTEM_NOTIFY -1 */
+#define ACPI_DEVICE_HANDLER_LIST 1 /* Used as index, must be DEVICE_NOTIFY -1 */
/* Address Space (Operation Region) Types */
@@ -716,8 +729,10 @@ typedef UINT8 ACPI_ADR_SPACE_TYPE;
#define ACPI_ADR_SPACE_CMOS (ACPI_ADR_SPACE_TYPE) 5
#define ACPI_ADR_SPACE_PCI_BAR_TARGET (ACPI_ADR_SPACE_TYPE) 6
#define ACPI_ADR_SPACE_IPMI (ACPI_ADR_SPACE_TYPE) 7
+#define ACPI_ADR_SPACE_GPIO (ACPI_ADR_SPACE_TYPE) 8
+#define ACPI_ADR_SPACE_GSBUS (ACPI_ADR_SPACE_TYPE) 9
-#define ACPI_NUM_PREDEFINED_REGIONS 8
+#define ACPI_NUM_PREDEFINED_REGIONS 10
/*
* Special Address Spaces
@@ -790,6 +805,20 @@ typedef UINT8 ACPI_ADR_SPACE_TYPE;
#define ACPI_DISABLE_EVENT 0
+/* Sleep function dispatch */
+
+typedef ACPI_STATUS (*ACPI_SLEEP_FUNCTION) (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+typedef struct acpi_sleep_functions
+{
+ ACPI_SLEEP_FUNCTION LegacyFunction;
+ ACPI_SLEEP_FUNCTION ExtendedFunction;
+
+} ACPI_SLEEP_FUNCTIONS;
+
+
/*
* External ACPI object definition
*/
@@ -1030,6 +1059,17 @@ ACPI_STATUS (*ACPI_ADR_SPACE_HANDLER) (
#define ACPI_DEFAULT_HANDLER NULL
+/* Special Context data for GenericSerialBus/GeneralPurposeIo (ACPI 5.0) */
+
+typedef struct acpi_connection_info
+{
+ UINT8 *Connection;
+ UINT16 Length;
+ UINT8 AccessLength;
+
+} ACPI_CONNECTION_INFO;
+
+
typedef
ACPI_STATUS (*ACPI_ADR_SPACE_SETUP) (
ACPI_HANDLE RegionHandle,
diff --git a/usr/src/uts/intel/sys/acpi/acutils.h b/usr/src/uts/intel/sys/acpi/acutils.h
index 489f195eb9..e9c9b95dda 100644
--- a/usr/src/uts/intel/sys/acpi/acutils.h
+++ b/usr/src/uts/intel/sys/acpi/acutils.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
extern const UINT8 AcpiGbl_ResourceAmlSizes[];
+extern const UINT8 AcpiGbl_ResourceAmlSerialBusSizes[];
/* Strings used by the disassembler and debugger resource dump routines */
@@ -69,6 +70,22 @@ extern const char *AcpiGbl_SizDecode[];
extern const char *AcpiGbl_TrsDecode[];
extern const char *AcpiGbl_TtpDecode[];
extern const char *AcpiGbl_TypDecode[];
+extern const char *AcpiGbl_PpcDecode[];
+extern const char *AcpiGbl_IorDecode[];
+extern const char *AcpiGbl_DtsDecode[];
+extern const char *AcpiGbl_CtDecode[];
+extern const char *AcpiGbl_SbtDecode[];
+extern const char *AcpiGbl_AmDecode[];
+extern const char *AcpiGbl_SmDecode[];
+extern const char *AcpiGbl_WmDecode[];
+extern const char *AcpiGbl_CphDecode[];
+extern const char *AcpiGbl_CpoDecode[];
+extern const char *AcpiGbl_DpDecode[];
+extern const char *AcpiGbl_EdDecode[];
+extern const char *AcpiGbl_BpbDecode[];
+extern const char *AcpiGbl_SbDecode[];
+extern const char *AcpiGbl_FcDecode[];
+extern const char *AcpiGbl_PtDecode[];
#endif
/* Types for Resource descriptor entries */
@@ -113,7 +130,6 @@ typedef struct acpi_pkg_info
#define DB_DWORD_DISPLAY 4
#define DB_QWORD_DISPLAY 8
-
/*
* utglobal - Global data structures and procedures
*/
@@ -722,6 +738,11 @@ void
AcpiUtStrlwr (
char *SrcString);
+int
+AcpiUtStricmp (
+ char *String1,
+ char *String2);
+
void
AcpiUtPrintString (
char *String,
@@ -897,6 +918,31 @@ AcpiUtCreateList (
#endif /* ACPI_DBG_TRACK_ALLOCATIONS */
+/*
+ * utaddress - address range check
+ */
+ACPI_STATUS
+AcpiUtAddAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ ACPI_NAMESPACE_NODE *RegionNode);
+
+void
+AcpiUtRemoveAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_NAMESPACE_NODE *RegionNode);
+
+UINT32
+AcpiUtCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ BOOLEAN Warn);
+
+void
+AcpiUtDeleteAddressLists (
+ void);
/*
* utxferror - various error/warning output functions
diff --git a/usr/src/uts/intel/sys/acpi/amlcode.h b/usr/src/uts/intel/sys/acpi/amlcode.h
index f6e0de3b35..f778bb0780 100644
--- a/usr/src/uts/intel/sys/acpi/amlcode.h
+++ b/usr/src/uts/intel/sys/acpi/amlcode.h
@@ -7,7 +7,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -191,6 +191,15 @@
/*
+ * Opcodes for "Field" operators
+ */
+#define AML_FIELD_OFFSET_OP (UINT8) 0x00
+#define AML_FIELD_ACCESS_OP (UINT8) 0x01
+#define AML_FIELD_CONNECTION_OP (UINT8) 0x02 /* ACPI 5.0 */
+#define AML_FIELD_EXT_ACCESS_OP (UINT8) 0x03 /* ACPI 5.0 */
+
+
+/*
* Internal opcodes
* Use only "Unknown" AML opcodes, don't attempt to use
* any valid ACPI ASCII values (A-Z, 0-9, '-')
@@ -204,7 +213,8 @@
#define AML_INT_METHODCALL_OP (UINT16) 0x0035
#define AML_INT_RETURN_VALUE_OP (UINT16) 0x0036
#define AML_INT_EVAL_SUBTREE_OP (UINT16) 0x0037
-
+#define AML_INT_CONNECTION_OP (UINT16) 0x0038
+#define AML_INT_EXTACCESSFIELD_OP (UINT16) 0x0039
#define ARG_NONE 0x0
@@ -478,13 +488,16 @@ typedef enum
*/
typedef enum
{
- AML_FIELD_ATTRIB_SMB_QUICK = 0x02,
- AML_FIELD_ATTRIB_SMB_SEND_RCV = 0x04,
- AML_FIELD_ATTRIB_SMB_BYTE = 0x06,
- AML_FIELD_ATTRIB_SMB_WORD = 0x08,
- AML_FIELD_ATTRIB_SMB_BLOCK = 0x0A,
- AML_FIELD_ATTRIB_SMB_WORD_CALL = 0x0C,
- AML_FIELD_ATTRIB_SMB_BLOCK_CALL = 0x0D
+ AML_FIELD_ATTRIB_QUICK = 0x02,
+ AML_FIELD_ATTRIB_SEND_RCV = 0x04,
+ AML_FIELD_ATTRIB_BYTE = 0x06,
+ AML_FIELD_ATTRIB_WORD = 0x08,
+ AML_FIELD_ATTRIB_BLOCK = 0x0A,
+ AML_FIELD_ATTRIB_MULTIBYTE = 0x0B,
+ AML_FIELD_ATTRIB_WORD_CALL = 0x0C,
+ AML_FIELD_ATTRIB_BLOCK_CALL = 0x0D,
+ AML_FIELD_ATTRIB_RAW_BYTES = 0x0E,
+ AML_FIELD_ATTRIB_RAW_PROCESS = 0x0F
} AML_ACCESS_ATTRIBUTE;
diff --git a/usr/src/uts/intel/sys/acpi/amlresrc.h b/usr/src/uts/intel/sys/acpi/amlresrc.h
index 1375d96cf8..6d85b8a2d0 100644
--- a/usr/src/uts/intel/sys/acpi/amlresrc.h
+++ b/usr/src/uts/intel/sys/acpi/amlresrc.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,29 +59,48 @@
#define ACPI_RESTAG_TYPESPECIFICATTRIBUTES "_ATT"
#define ACPI_RESTAG_BASEADDRESS "_BAS"
#define ACPI_RESTAG_BUSMASTER "_BM_" /* Master(1), Slave(0) */
+#define ACPI_RESTAG_DEBOUNCETIME "_DBT"
#define ACPI_RESTAG_DECODE "_DEC"
+#define ACPI_RESTAG_DEVICEPOLARITY "_DPL"
#define ACPI_RESTAG_DMA "_DMA"
#define ACPI_RESTAG_DMATYPE "_TYP" /* Compatible(0), A(1), B(2), F(3) */
+#define ACPI_RESTAG_DRIVESTRENGTH "_DRS"
+#define ACPI_RESTAG_ENDIANNESS "_END"
+#define ACPI_RESTAG_FLOWCONTROL "_FLC"
#define ACPI_RESTAG_GRANULARITY "_GRA"
#define ACPI_RESTAG_INTERRUPT "_INT"
#define ACPI_RESTAG_INTERRUPTLEVEL "_LL_" /* ActiveLo(1), ActiveHi(0) */
#define ACPI_RESTAG_INTERRUPTSHARE "_SHR" /* Shareable(1), NoShare(0) */
#define ACPI_RESTAG_INTERRUPTTYPE "_HE_" /* Edge(1), Level(0) */
+#define ACPI_RESTAG_IORESTRICTION "_IOR"
#define ACPI_RESTAG_LENGTH "_LEN"
+#define ACPI_RESTAG_LINE "_LIN"
#define ACPI_RESTAG_MEMATTRIBUTES "_MTP" /* Memory(0), Reserved(1), ACPI(2), NVS(3) */
#define ACPI_RESTAG_MEMTYPE "_MEM" /* NonCache(0), Cacheable(1) Cache+combine(2), Cache+prefetch(3) */
#define ACPI_RESTAG_MAXADDR "_MAX"
#define ACPI_RESTAG_MINADDR "_MIN"
#define ACPI_RESTAG_MAXTYPE "_MAF"
#define ACPI_RESTAG_MINTYPE "_MIF"
+#define ACPI_RESTAG_MODE "_MOD"
+#define ACPI_RESTAG_PARITY "_PAR"
+#define ACPI_RESTAG_PHASE "_PHA"
+#define ACPI_RESTAG_PIN "_PIN"
+#define ACPI_RESTAG_PINCONFIG "_PPI"
+#define ACPI_RESTAG_POLARITY "_POL"
#define ACPI_RESTAG_REGISTERBITOFFSET "_RBO"
#define ACPI_RESTAG_REGISTERBITWIDTH "_RBW"
#define ACPI_RESTAG_RANGETYPE "_RNG"
#define ACPI_RESTAG_READWRITETYPE "_RW_" /* ReadOnly(0), Writeable (1) */
+#define ACPI_RESTAG_LENGTH_RX "_RXL"
+#define ACPI_RESTAG_LENGTH_TX "_TXL"
+#define ACPI_RESTAG_SLAVEMODE "_SLV"
+#define ACPI_RESTAG_SPEED "_SPE"
+#define ACPI_RESTAG_STOPBITS "_STB"
#define ACPI_RESTAG_TRANSLATION "_TRA"
#define ACPI_RESTAG_TRANSTYPE "_TRS" /* Sparse(1), Dense(0) */
#define ACPI_RESTAG_TYPE "_TTP" /* Translation(1), Static (0) */
#define ACPI_RESTAG_XFERTYPE "_SIZ" /* 8(0), 8And16(1), 16(2) */
+#define ACPI_RESTAG_VENDORDATA "_VEN"
/* Default sizes for "small" resource descriptors */
@@ -92,6 +111,7 @@
#define ASL_RDESC_END_DEPEND_SIZE 0x00
#define ASL_RDESC_IO_SIZE 0x07
#define ASL_RDESC_FIXED_IO_SIZE 0x03
+#define ASL_RDESC_FIXED_DMA_SIZE 0x05
#define ASL_RDESC_END_TAG_SIZE 0x01
@@ -214,6 +234,16 @@ typedef struct aml_resource_end_tag
} AML_RESOURCE_END_TAG;
+typedef struct aml_resource_fixed_dma
+{
+ AML_RESOURCE_SMALL_HEADER_COMMON
+ UINT16 RequestLines;
+ UINT16 Channels;
+ UINT8 Width;
+
+} AML_RESOURCE_FIXED_DMA;
+
+
/*
* LARGE descriptors
*/
@@ -368,6 +398,130 @@ typedef struct aml_resource_generic_register
} AML_RESOURCE_GENERIC_REGISTER;
+
+/* Common descriptor for GpioInt and GpioIo (ACPI 5.0) */
+
+typedef struct aml_resource_gpio
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ UINT8 RevisionId;
+ UINT8 ConnectionType;
+ UINT16 Flags;
+ UINT16 IntFlags;
+ UINT8 PinConfig;
+ UINT16 DriveStrength;
+ UINT16 DebounceTimeout;
+ UINT16 PinTableOffset;
+ UINT8 ResSourceIndex;
+ UINT16 ResSourceOffset;
+ UINT16 VendorOffset;
+ UINT16 VendorLength;
+ /*
+ * Optional fields follow immediately:
+ * 1) PIN list (Words)
+ * 2) Resource Source String
+ * 3) Vendor Data bytes
+ */
+
+} AML_RESOURCE_GPIO;
+
+#define AML_RESOURCE_GPIO_REVISION 1 /* ACPI 5.0 */
+
+/* Values for ConnectionType above */
+
+#define AML_RESOURCE_GPIO_TYPE_INT 0
+#define AML_RESOURCE_GPIO_TYPE_IO 1
+#define AML_RESOURCE_MAX_GPIOTYPE 1
+
+
+/* Common preamble for all serial descriptors (ACPI 5.0) */
+
+#define AML_RESOURCE_SERIAL_COMMON \
+ UINT8 RevisionId; \
+ UINT8 ResSourceIndex; \
+ UINT8 Type; \
+ UINT8 Flags; \
+ UINT16 TypeSpecificFlags; \
+ UINT8 TypeRevisionId; \
+ UINT16 TypeDataLength; \
+
+/* Values for the type field above */
+
+#define AML_RESOURCE_I2C_SERIALBUSTYPE 1
+#define AML_RESOURCE_SPI_SERIALBUSTYPE 2
+#define AML_RESOURCE_UART_SERIALBUSTYPE 3
+#define AML_RESOURCE_MAX_SERIALBUSTYPE 3
+#define AML_RESOURCE_VENDOR_SERIALBUSTYPE 192 /* Vendor defined is 0xC0-0xFF (NOT SUPPORTED) */
+
+typedef struct aml_resource_common_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+
+} AML_RESOURCE_COMMON_SERIALBUS;
+
+typedef struct aml_resource_i2c_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 ConnectionSpeed;
+ UINT16 SlaveAddress;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_I2C_SERIALBUS;
+
+#define AML_RESOURCE_I2C_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_I2C_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_I2C_MIN_DATA_LEN 6
+
+typedef struct aml_resource_spi_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 ConnectionSpeed;
+ UINT8 DataBitLength;
+ UINT8 ClockPhase;
+ UINT8 ClockPolarity;
+ UINT16 DeviceSelection;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_SPI_SERIALBUS;
+
+#define AML_RESOURCE_SPI_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_SPI_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_SPI_MIN_DATA_LEN 9
+
+
+typedef struct aml_resource_uart_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 DefaultBaudRate;
+ UINT16 RxFifoSize;
+ UINT16 TxFifoSize;
+ UINT8 Parity;
+ UINT8 LinesEnabled;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_UART_SERIALBUS;
+
+#define AML_RESOURCE_UART_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_UART_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_UART_MIN_DATA_LEN 10
+
+
/* restore default alignment */
#pragma pack()
@@ -390,6 +544,7 @@ typedef union aml_resource
AML_RESOURCE_END_DEPENDENT EndDpf;
AML_RESOURCE_IO Io;
AML_RESOURCE_FIXED_IO FixedIo;
+ AML_RESOURCE_FIXED_DMA FixedDma;
AML_RESOURCE_VENDOR_SMALL VendorSmall;
AML_RESOURCE_END_TAG EndTag;
@@ -405,6 +560,11 @@ typedef union aml_resource
AML_RESOURCE_ADDRESS64 Address64;
AML_RESOURCE_EXTENDED_ADDRESS64 ExtAddress64;
AML_RESOURCE_EXTENDED_IRQ ExtendedIrq;
+ AML_RESOURCE_GPIO Gpio;
+ AML_RESOURCE_I2C_SERIALBUS I2cSerialBus;
+ AML_RESOURCE_SPI_SERIALBUS SpiSerialBus;
+ AML_RESOURCE_UART_SERIALBUS UartSerialBus;
+ AML_RESOURCE_COMMON_SERIALBUS CommonSerialBus;
/* Utility overlays */
diff --git a/usr/src/uts/intel/sys/acpi/platform/accygwin.h b/usr/src/uts/intel/sys/acpi/platform/accygwin.h
index 10ae3e83b7..831f313987 100644
--- a/usr/src/uts/intel/sys/acpi/platform/accygwin.h
+++ b/usr/src/uts/intel/sys/acpi/platform/accygwin.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acefi.h b/usr/src/uts/intel/sys/acpi/platform/acefi.h
index 9249ae8d24..bab6159381 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acefi.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acefi.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acenv.h b/usr/src/uts/intel/sys/acpi/platform/acenv.h
index 7f893c13ff..19ca03baac 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acenv.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acenv.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,7 @@
#define ACPI_SINGLE_THREADED
#endif
-/* AcpiExec and AcpiBin configuration */
+/* AcpiExec configuration. Multithreaded with full AML debugger */
#ifdef ACPI_EXEC_APP
#define ACPI_APPLICATION
@@ -88,7 +88,27 @@
#define ACPI_DBG_TRACK_ALLOCATIONS
#endif
-#ifdef ACPI_BIN_APP
+/* AcpiNames configuration. Single threaded with debugger output enabled. */
+
+#ifdef ACPI_NAMES_APP
+#define ACPI_DEBUGGER
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
+/*
+ * AcpiBin/AcpiHelp/AcpiSrc configuration. All single threaded, with
+ * no debug output.
+ */
+#if (defined ACPI_BIN_APP) || \
+ (defined ACPI_SRC_APP) || \
+ (defined ACPI_XTRACT_APP)
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
+#ifdef ACPI_HELP_APP
+#define ACPI_DEBUG_OUTPUT
#define ACPI_APPLICATION
#define ACPI_SINGLE_THREADED
#endif
diff --git a/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h b/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
index ad106ecf5e..98f972f373 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acgcc.h b/usr/src/uts/intel/sys/acpi/platform/acgcc.h
index 4b197ff63a..d2eafff395 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acgcc.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acgcc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acintel.h b/usr/src/uts/intel/sys/acpi/platform/acintel.h
index cf7e0bc224..46d238bedb 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acintel.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acintel.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/aclinux.h b/usr/src/uts/intel/sys/acpi/platform/aclinux.h
index 3fc3d97cb8..edfff150ed 100644
--- a/usr/src/uts/intel/sys/acpi/platform/aclinux.h
+++ b/usr/src/uts/intel/sys/acpi/platform/aclinux.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -87,17 +87,18 @@
/* Host-dependent types and defines for user-space ACPICA */
#define ACPI_FLUSH_CPU_CACHE()
-#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (pthread))
#if defined(__ia64__) || defined(__x86_64__)
#define ACPI_MACHINE_WIDTH 64
#define COMPILER_DEPENDENT_INT64 long
#define COMPILER_DEPENDENT_UINT64 unsigned long
+#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (pthread))
#else
#define ACPI_MACHINE_WIDTH 32
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
#define ACPI_USE_NATIVE_DIVIDE
+#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (UINT32) (void *) (pthread))
#endif
#ifndef __cdecl
diff --git a/usr/src/uts/intel/sys/acpi/platform/acmsvc.h b/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
index 8590d95fd0..b73a62caf1 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,28 @@
#ifndef __ACMSVC_H__
#define __ACMSVC_H__
+
+/*
+ * Map low I/O functions for MS. This allows us to disable MS language
+ * extensions for maximum portability.
+ */
+#define open _open
+#define read _read
+#define write _write
+#define close _close
+#define stat _stat
+#define fstat _fstat
+#define mkdir _mkdir
+#define strlwr _strlwr
+#define O_RDONLY _O_RDONLY
+#define O_BINARY _O_BINARY
+#define O_CREAT _O_CREAT
+#define O_WRONLY _O_WRONLY
+#define O_TRUNC _O_TRUNC
+#define S_IREAD _S_IREAD
+#define S_IWRITE _S_IWRITE
+#define S_IFDIR _S_IFDIR
+
/* Eliminate warnings for "old" (non-secure) versions of clib functions */
#ifndef _CRT_SECURE_NO_WARNINGS
diff --git a/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h b/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
index 7c7a63ec94..23b0e8c4c3 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acos2.h b/usr/src/uts/intel/sys/acpi/platform/acos2.h
index f01d697e31..0704c50161 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acos2.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acos2.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acwin.h b/usr/src/uts/intel/sys/acpi/platform/acwin.h
index 1355827ccb..c7d4bcd386 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acwin.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acwin.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acwin64.h b/usr/src/uts/intel/sys/acpi/platform/acwin64.h
index b4d184294c..022baf050e 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acwin64.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acwin64.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/bootinfo.h b/usr/src/uts/intel/sys/bootinfo.h
index 08ff9c3ca0..3adce64fc4 100644
--- a/usr/src/uts/intel/sys/bootinfo.h
+++ b/usr/src/uts/intel/sys/bootinfo.h
@@ -32,6 +32,13 @@ extern "C" {
#endif
/*
+ * This is used by bootfs and dboot. It should be at least as large as the
+ * number of modules that bootloaders (e.g., grub) can support. This figure
+ * has been chosen to match grub's value exactly.
+ */
+#define MAX_BOOT_MODULES 99
+
+/*
* The 32-bit kernel loader code needs to build several structures that the
* kernel is expecting. They will contain native sized pointers for the
* target kernel.
@@ -51,6 +58,12 @@ typedef void *native_ptr_t;
#endif
+typedef enum boot_module_type {
+ BMT_ROOTFS,
+ BMT_FILE,
+ BMT_HASH
+} boot_module_type_t;
+
struct boot_memlist {
uint64_t addr;
uint64_t size;
@@ -62,9 +75,11 @@ struct boot_memlist {
* The kernel needs to know how to find its modules.
*/
struct boot_modules {
- native_ptr_t bm_addr;
- uint32_t bm_size;
- uint32_t bm_padding;
+ native_ptr_t bm_addr;
+ native_ptr_t bm_name;
+ native_ptr_t bm_hash;
+ uint32_t bm_size;
+ boot_module_type_t bm_type;
};
/*
diff --git a/usr/src/uts/intel/sys/bootvfs.h b/usr/src/uts/intel/sys/bootvfs.h
index 63696395da..b1cdabaccc 100644
--- a/usr/src/uts/intel/sys/bootvfs.h
+++ b/usr/src/uts/intel/sys/bootvfs.h
@@ -69,6 +69,18 @@ struct boot_fs_ops {
extern struct boot_fs_ops *bfs_ops;
+#ifdef _KERNEL
+
+extern int BRD_MOUNTROOT(struct boot_fs_ops *, char *);
+extern int BRD_UNMOUNTROOT(struct boot_fs_ops *);
+extern int BRD_OPEN(struct boot_fs_ops *, char *, int);
+extern int BRD_CLOSE(struct boot_fs_ops *, int);
+extern ssize_t BRD_READ(struct boot_fs_ops *, int, caddr_t, size_t);
+extern off_t BRD_SEEK(struct boot_fs_ops *, int, off_t, int);
+extern int BRD_FSTAT(struct boot_fs_ops *, int, struct bootstat *);
+
+#else
+
#define BRD_MOUNTROOT(ops, str) ((ops)->fsw_mountroot)(str)
#define BRD_UNMOUNTROOT(ops) ((ops)->fsw_unmountroot)()
#define BRD_OPEN(ops, file, flag) ((ops)->fsw_open)(file, flag)
@@ -77,6 +89,11 @@ extern struct boot_fs_ops *bfs_ops;
#define BRD_SEEK(ops, fd, addr, w) ((ops)->fsw_lseek)(fd, addr, w)
#define BRD_FSTAT(ops, fd, stp) ((ops)->fsw_fstat)(fd, stp)
+#endif
+
+#define SYSTEM_BOOT_PATH "/system/boot"
+#define BFD_F_SYSTEM_BOOT 0x40000000
+
#ifdef _BOOT
extern int mountroot(char *str);
diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h
index 3f9ebdb6b7..c1d045515c 100644
--- a/usr/src/uts/intel/sys/machbrand.h
+++ b/usr/src/uts/intel/sys/machbrand.h
@@ -35,17 +35,20 @@ extern "C" {
struct brand_mach_ops {
void (*b_sysenter)(void);
+ void (*b_int80)(void);
void (*b_int91)(void);
void (*b_syscall)(void);
void (*b_syscall32)(void);
+ greg_t (*b_fixsegreg)(greg_t, model_t);
};
#endif /* _ASM */
#define BRAND_CB_SYSENTER 0
-#define BRAND_CB_INT91 1
-#define BRAND_CB_SYSCALL 2
-#define BRAND_CB_SYSCALL32 3
+#define BRAND_CB_INT80 1
+#define BRAND_CB_INT91 2
+#define BRAND_CB_SYSCALL 3
+#define BRAND_CB_SYSCALL32 4
#ifdef __cplusplus
}
diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h
index c4b194fcd8..8a6e398eec 100644
--- a/usr/src/uts/intel/sys/segments.h
+++ b/usr/src/uts/intel/sys/segments.h
@@ -683,6 +683,8 @@ extern void _start(), cmnint();
extern void achktrap(), mcetrap();
extern void xmtrap();
extern void fasttrap();
+extern void sys_int80();
+extern void brand_sys_int80();
extern void dtrace_ret();
#if !defined(__amd64)
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index fa3485d69b..d61147295a 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -98,7 +98,7 @@ extern "C" {
#define CPUID_INTC_ECX_SSE3 0x00000001 /* Yet more SSE extensions */
#define CPUID_INTC_ECX_PCLMULQDQ 0x00000002 /* PCLMULQDQ insn */
- /* 0x00000004 - reserved */
+#define CPUID_INTC_ECX_DTES64 0x00000004 /* 64-bit DS area */
#define CPUID_INTC_ECX_MON 0x00000008 /* MONITOR/MWAIT */
#define CPUID_INTC_ECX_DSCPL 0x00000010 /* CPL-qualified debug store */
#define CPUID_INTC_ECX_VMX 0x00000020 /* Hardware VM extensions */
@@ -108,18 +108,19 @@ extern "C" {
#define CPUID_INTC_ECX_SSSE3 0x00000200 /* Supplemental SSE3 insns */
#define CPUID_INTC_ECX_CID 0x00000400 /* L1 context ID */
/* 0x00000800 - reserved */
- /* 0x00001000 - reserved */
+#define CPUID_INTC_ECX_FMA 0x00001000 /* Fused Multiply Add */
#define CPUID_INTC_ECX_CX16 0x00002000 /* cmpxchg16 */
#define CPUID_INTC_ECX_ETPRD 0x00004000 /* extended task pri messages */
- /* 0x00008000 - reserved */
+#define CPUID_INTC_ECX_PDCM 0x00008000 /* Perf/Debug Capability MSR */
/* 0x00010000 - reserved */
- /* 0x00020000 - reserved */
+#define CPUID_INTC_ECX_PCID 0x00020000 /* process-context ids */
#define CPUID_INTC_ECX_DCA 0x00040000 /* direct cache access */
#define CPUID_INTC_ECX_SSE4_1 0x00080000 /* SSE4.1 insns */
#define CPUID_INTC_ECX_SSE4_2 0x00100000 /* SSE4.2 insns */
#define CPUID_INTC_ECX_X2APIC 0x00200000 /* x2APIC */
#define CPUID_INTC_ECX_MOVBE 0x00400000 /* MOVBE insn */
#define CPUID_INTC_ECX_POPCNT 0x00800000 /* POPCNT insn */
+#define CPUID_INTC_ECX_TSCDL 0x01000000 /* Deadline TSC */
#define CPUID_INTC_ECX_AES 0x02000000 /* AES insns */
#define CPUID_INTC_ECX_XSAVE 0x04000000 /* XSAVE/XRESTOR insns */
#define CPUID_INTC_ECX_OSXSAVE 0x08000000 /* OS supports XSAVE insns */
@@ -131,10 +132,10 @@ extern "C" {
#define FMT_CPUID_INTC_ECX \
"\20" \
"\37rdrand\36f16c\35avx\34osxsav\33xsave" \
- "\32aes" \
+ "\32aes\31tscdl" \
"\30popcnt\27movbe\26x2apic\25sse4.2\24sse4.1\23dca" \
- "\20\17etprd\16cx16\13cid\12ssse3\11tm2" \
- "\10est\7smx\6vmx\5dscpl\4mon\2pclmulqdq\1sse3"
+ "\20\18pdcm\17etprd\16cx16\13cid\12ssse3\11tm2" \
+ "\10est\7smx\6vmx\5dscpl\4mon\3dtes64\2pclmulqdq\1sse3"
/*
* cpuid instruction feature flags in %edx (extended function 0x80000001)
@@ -192,16 +193,25 @@ extern "C" {
#define CPUID_AMD_ECX_3DNP 0x00000100 /* AMD: 3DNowPrefectch */
#define CPUID_AMD_ECX_OSVW 0x00000200 /* AMD: OSVW */
#define CPUID_AMD_ECX_IBS 0x00000400 /* AMD: IBS */
-#define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: SSE5 */
+#define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: Extended AVX */
#define CPUID_AMD_ECX_SKINIT 0x00001000 /* AMD: SKINIT */
#define CPUID_AMD_ECX_WDT 0x00002000 /* AMD: WDT */
+ /* 0x00004000 - reserved */
+#define CPUID_AMD_ECX_LWP 0x00008000 /* AMD: Lightweight profiling */
+#define CPUID_AMD_ECX_FMA4 0x00010000 /* AMD: 4-operand FMA support */
+ /* 0x00020000 - reserved */
+ /* 0x00040000 - reserved */
+#define CPUID_AMD_ECX_NIDMSR 0x00080000 /* AMD: Node ID MSR */
+ /* 0x00100000 - reserved */
+#define CPUID_AMD_ECX_TBM 0x00200000 /* AMD: trailing bit manips. */
#define CPUID_AMD_ECX_TOPOEXT 0x00400000 /* AMD: Topology Extensions */
+
#define FMT_CPUID_AMD_ECX \
"\20" \
- "\22topoext" \
+ "\23topoext\22tbm\20nimdsr\17fma4\16lwp" \
"\14wdt\13skinit\12sse5\11ibs\10osvw\93dnp\8mas" \
- "\7sse4a\6lzcnt\5cr8d\3svm\2lcmplgcy\1ahf64"
+ "\7sse4a\6lzcnt\5cr8d\4eas\3svm\2lcmplgcy\1ahf64"
/*
* Intel now seems to have claimed part of the "extended" function
diff --git a/usr/src/uts/intel/udmf/Makefile b/usr/src/uts/intel/udmf/Makefile
new file mode 100644
index 0000000000..1e9f0e5400
--- /dev/null
+++ b/usr/src/uts/intel/udmf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = udmf
+OBJECTS = $(UDMF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(UDMF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.0\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-value
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/upf/Makefile b/usr/src/uts/intel/upf/Makefile
new file mode 100644
index 0000000000..5698051c51
--- /dev/null
+++ b/usr/src/uts/intel/upf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = upf
+OBJECTS = $(UPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(UPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.1\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-type-limits
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/urf/Makefile b/usr/src/uts/intel/urf/Makefile
new file mode 100644
index 0000000000..947cdac9e5
--- /dev/null
+++ b/usr/src/uts/intel/urf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = urf
+OBJECTS = $(URF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(URF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.0\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+CERRWARN += -_gcc=-Wno-uninitialized
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/usbgem/Makefile b/usr/src/uts/intel/usbgem/Makefile
new file mode 100644
index 0000000000..4465fd6c92
--- /dev/null
+++ b/usr/src/uts/intel/usbgem/Makefile
@@ -0,0 +1,84 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = usbgem
+OBJECTS = $(USBGEM_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(USBGEM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# The USBGEM has support for various different features. We use
+# these pre-processor macros to define the set we care about.
+#
+CPPFLAGS += \
+ -DUSBGEM_CONFIG_GLDv3 \
+ -DMODULE \
+ -DGEN_CONFIG_FMA \
+ -DUSBGEM_CONFIG_MAC_PROP \
+ -DVERSION=\"1.6\"
+
+CERRWARN += -_gcc=-Wno-uninitialized
+CERRWARN += -_gcc=-Wno-parentheses
+CERRWARN += -_gcc=-Wno-switch
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+CERRWARN += -_gcc=-Wno-unused-function
+
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/vmxnet/Makefile b/usr/src/uts/intel/vmxnet/Makefile
new file mode 100644
index 0000000000..4f3ebcf5af
--- /dev/null
+++ b/usr/src/uts/intel/vmxnet/Makefile
@@ -0,0 +1,93 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# VMware Ethernet Adapter b module
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+UTSCLOSED = ../../../../closed/uts
+
+#
+# Define the module and object file sets.
+#
+MODULE = vmxnet
+#
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/intel/io/vmxnet
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# The list of object files is defined here, rather than in Makefile.files,
+# because the "$(CLOSED_BUILD)" macro has not been defined at the time
+# Makefile.files is processed.
+#
+VMXNET_OBJS += vmxnet.o
+
+OBJECTS = $(VMXNET_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VMXNET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(LINT_MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/i86pc
+LDFLAGS += -dy -N misc/gld
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+#
+# Intentionally don't build lint libraries to minimize divergence with
+# the upstream source.
+#
+lint:
+modlintlib:
+clean.lint:
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include ../Makefile.targ
diff --git a/usr/src/uts/intel/vnd/Makefile b/usr/src/uts/intel/vnd/Makefile
new file mode 100644
index 0000000000..f66f89062c
--- /dev/null
+++ b/usr/src/uts/intel/vnd/Makefile
@@ -0,0 +1,56 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = vnd
+OBJECTS = $(VND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+CONF_SRCDIR = $(UTSBASE)/common/io/vnd
+
+LDFLAGS += -dy -Nmisc/neti -Nmisch/hook -Nfs/dev -Nmisc/gsqueue
+
+#
+# We use <sys/ctype.h> which causes gcc to think that all of its inline
+# functions are defined and unused.
+#
+CERRWARN += -_gcc=-Wno-unused-function
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc
index 5a1639a692..b989364998 100644
--- a/usr/src/uts/sparc/Makefile.sparc
+++ b/usr/src/uts/sparc/Makefile.sparc
@@ -21,6 +21,7 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
@@ -236,6 +237,7 @@ DRV_KMODS += nulldriver
DRV_KMODS += bridge trill
DRV_KMODS += bpf
DRV_KMODS += dca
+DRV_KMODS += inotify
#
# Hardware Drivers in common space
diff --git a/usr/src/uts/sparc/inotify/Makefile b/usr/src/uts/sparc/inotify/Makefile
new file mode 100644
index 0000000000..ce2b956955
--- /dev/null
+++ b/usr/src/uts/sparc/inotify/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = inotify
+OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR
+CERRWARN += -_gcc=-Wno-parentheses
+LDFLAGS += -dy -Nfs/specfs
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.debug64 b/usr/src/uts/sparc/ip/ip.global-objs.debug64
index b05d399caf..3098f98265 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64
@@ -105,7 +105,6 @@ ilb_sticky_expiry
ilb_sticky_timer_size
ilb_sticky_timeout
ill_no_arena
-ill_null
inet_dev_info
inet_devops
ip6_ftable_hash_size
diff --git a/usr/src/uts/sparc/ip/ip.global-objs.obj64 b/usr/src/uts/sparc/ip/ip.global-objs.obj64
index 7dd30502a0..f182d7198e 100644
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64
@@ -105,7 +105,6 @@ ilb_sticky_expiry
ilb_sticky_timer_size
ilb_sticky_timeout
ill_no_arena
-ill_null
inet_dev_info
inet_devops
ip6_ftable_hash_size
diff --git a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
index 61566e8b65..26a0777190 100644
--- a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
@@ -23,6 +23,10 @@
# Use is subject to license terms.
#
+hook4_vnd_in
+hook4_vnd_out
+hook6_vnd_in
+hook6_vnd_out
fr_availfuncs
fr_features
fr_objbytes
diff --git a/usr/src/uts/sun4/brand/common/brand_solaris.s b/usr/src/uts/sun4/brand/common/brand_solaris.s
index 889218bc5f..9097273036 100644
--- a/usr/src/uts/sun4/brand/common/brand_solaris.s
+++ b/usr/src/uts/sun4/brand/common/brand_solaris.s
@@ -236,7 +236,7 @@ _emulation_check:
#endif /* sun4v */
ldn [%g2 + CPU_THREAD], %g3; /* get thread ptr */
ldn [%g3 + T_PROCP], %g4; /* get proc ptr */
- ldn [%g4 + P_BRAND_DATA], %g5; /* get brand data ptr */
+ ldn [%g4 + __P_BRAND_DATA], %g5; /* get brand data ptr */
ldn [%g5 + SPD_HANDLER], %g5; /* get userland brnd hdlr ptr */
brz %g5, _exit; /* has it been set? */
nop;
diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in
index 4f6d19ba01..de214274ee 100644
--- a/usr/src/uts/sun4/ml/offsets.in
+++ b/usr/src/uts/sun4/ml/offsets.in
@@ -109,7 +109,7 @@ proc PROCSIZE
p_utraps
p_agenttp
p_brand
- p_brand_data
+ __p_brand_data
\#define P_UTRAP4 (UT_ILLTRAP_INSTRUCTION * CPTRSIZE)
\#define P_UTRAP7 (UT_FP_DISABLED * CPTRSIZE)
diff --git a/usr/src/uts/sun4u/io/pci/pcisch.c b/usr/src/uts/sun4u/io/pci/pcisch.c
index 27916bcc74..2a2bd05970 100644
--- a/usr/src/uts/sun4u/io/pci/pcisch.c
+++ b/usr/src/uts/sun4u/io/pci/pcisch.c
@@ -2973,11 +2973,12 @@ iommu_tlb_scrub(iommu_t *iommu_p, int scrub)
"\tContext=%lx %sWritable %sStreamable\n"
"\tPCI Page Size=%sk Address in page %lx\n",
ddi_driver_name(dip), ddi_get_instance(dip), errstat, i,
- (tag & TLBTAG_CONTEXT_BITS) >> TLBTAG_CONTEXT_SHIFT,
+ (uint64_t)(tag & TLBTAG_CONTEXT_BITS) >>
+ TLBTAG_CONTEXT_SHIFT,
(tag & TLBTAG_WRITABLE_BIT) ? "" : neg,
(tag & TLBTAG_STREAM_BIT) ? "" : neg,
(tag & TLBTAG_PGSIZE_BIT) ? "64" : "8",
- (tag & TLBTAG_PCIVPN_BITS) << 13);
+ (uint64_t)(tag & TLBTAG_PCIVPN_BITS) << 13);
cmn_err(CE_CONT, "Memory: %sValid %sCacheable Page Frame=%lx\n",
(data & TLBDATA_VALID_BIT) ? "" : neg,
(data & TLBDATA_CACHE_BIT) ? "" : neg, pfn);
diff --git a/usr/src/uts/sun4v/io/vcc.c b/usr/src/uts/sun4v/io/vcc.c
index feeaf03e8f..85f722e467 100644
--- a/usr/src/uts/sun4v/io/vcc.c
+++ b/usr/src/uts/sun4v/io/vcc.c
@@ -24,6 +24,9 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <sys/types.h>
#include <sys/file.h>
@@ -2456,7 +2459,7 @@ vcc_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
*reventsp |= (events & POLLIN);
}
- if (((*reventsp) == 0) && (!anyyet)) {
+ if ((((*reventsp) == 0) && (!anyyet)) || (events & POLLET)) {
*phpp = &vport->poll;
if (events & POLLIN) {
mutex_enter(&vport->lock);